7116452 Sdiff src/cpu/x86/vm

src/cpu/x86/vm/x86_32.ad
Index Unified diffs Context diffs Sdiffs Wdiffs Patch New Old Previous File Next File 7116452 Sdiff src/cpu/x86/vm

src/cpu/x86/vm/x86_32.ad

Print this page

 264   return operand;
 265 }
 266 
 267 // Buffer for 128-bits masks used by SSE instructions.
 268 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment)
 269 
 270 // Static initialization during VM startup.
 271 static jlong *float_signmask_pool  = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF));
 272 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF));
 273 static jlong *float_signflip_pool  = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000));
 274 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000));
 275 
 276 // Offset hacking within calls.
 277 static int pre_call_FPU_size() {
 278   if (Compile::current()->in_24_bit_fp_mode())
 279     return 6; // fldcw
 280   return 0;
 281 }
 282 
 283 static int preserve_SP_size() {
 284   return LP64_ONLY(1 +) 2;  // [rex,] op, rm(reg/reg)
 285 }
 286 
 287 // !!!!! Special hack to get all type of calls to specify the byte offset
 288 //       from the start of the call to the point where the return address
 289 //       will point.
 290 int MachCallStaticJavaNode::ret_addr_offset() {
 291   int offset = 5 + pre_call_FPU_size();  // 5 bytes from start of call to where return address points
 292   if (_method_handle_invoke)
 293     offset += preserve_SP_size();
 294   return offset;
 295 }
 296 
 297 int MachCallDynamicJavaNode::ret_addr_offset() {
 298   return 10 + pre_call_FPU_size();  // 10 bytes from start of call to where return address points
 299 }
 300 
 301 static int sizeof_FFree_Float_Stack_All = -1;
 302 
 303 int MachCallRuntimeNode::ret_addr_offset() {
 304   assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already");

 478         if ( displace_is_oop ) {
 479           emit_d32_reloc(cbuf, displace, relocInfo::oop_type, 1);
 480         } else {
 481           emit_d32      (cbuf, displace);
 482         }
 483       }
 484     }
 485   }
 486 }
 487 
 488 
 489 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
 490   if( dst_encoding == src_encoding ) {
 491     // reg-reg copy, use an empty encoding
 492   } else {
 493     emit_opcode( cbuf, 0x8B );
 494     emit_rm(cbuf, 0x3, dst_encoding, src_encoding );
 495   }
 496 }
 497 
 498 void encode_CopyXD( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
 499   if( dst_encoding == src_encoding ) {
 500     // reg-reg copy, use an empty encoding
 501   } else {
 502     MacroAssembler _masm(&cbuf);














 503 
 504     __ movdqa(as_XMMRegister(dst_encoding), as_XMMRegister(src_encoding));
 505   }






 506 }
 507 
 508 
 509 //=============================================================================
 510 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
 511 
 512 int Compile::ConstantTable::calculate_table_base_offset() const {
 513   return 0;  // absolute addressing, no offset
 514 }
 515 
 516 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
 517   // Empty encoding
 518 }
 519 
 520 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
 521   return 0;
 522 }
 523 
 524 #ifndef PRODUCT
 525 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {

 775     emit_opcode  (*cbuf, opcode );
 776     encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, false);
 777 #ifndef PRODUCT
 778   } else if( !do_size ) {
 779     if( size != 0 ) st->print("\n\t");
 780     if( opcode == 0x8B || opcode == 0x89 ) { // MOV
 781       if( is_load ) st->print("%s   %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset);
 782       else          st->print("%s   [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]);
 783     } else { // FLD, FST, PUSH, POP
 784       st->print("%s [ESP + #%d]",op_str,offset);
 785     }
 786 #endif
 787   }
 788   int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 789   return size+3+offset_size;
 790 }
 791 
 792 // Helper for XMM registers.  Extra opcode bits, limited syntax.
 793 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load,
 794                          int offset, int reg_lo, int reg_hi, int size, outputStream* st ) {
 795   if( cbuf ) {
 796     if( reg_lo+1 == reg_hi ) { // double move?
 797       if( is_load && !UseXmmLoadAndClearUpper )
 798         emit_opcode(*cbuf, 0x66 ); // use 'movlpd' for load
 799       else
 800         emit_opcode(*cbuf, 0xF2 ); // use 'movsd' otherwise
 801     } else {
 802       emit_opcode(*cbuf, 0xF3 );
 803     }
 804     emit_opcode(*cbuf, 0x0F );
 805     if( reg_lo+1 == reg_hi && is_load && !UseXmmLoadAndClearUpper )
 806       emit_opcode(*cbuf, 0x12 );   // use 'movlpd' for load
 807     else
 808       emit_opcode(*cbuf, is_load ? 0x10 : 0x11 );
 809     encode_RegMem(*cbuf, Matcher::_regEncode[reg_lo], ESP_enc, 0x4, 0, offset, false);

 810 #ifndef PRODUCT
 811   } else if( !do_size ) {
 812     if( size != 0 ) st->print("\n\t");
 813     if( reg_lo+1 == reg_hi ) { // double move?
 814       if( is_load ) st->print("%s %s,[ESP + #%d]",
 815                                UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD",
 816                                Matcher::regName[reg_lo], offset);
 817       else          st->print("MOVSD  [ESP + #%d],%s",
 818                                offset, Matcher::regName[reg_lo]);
 819     } else {
 820       if( is_load ) st->print("MOVSS  %s,[ESP + #%d]",
 821                                Matcher::regName[reg_lo], offset);
 822       else          st->print("MOVSS  [ESP + #%d],%s",
 823                                offset, Matcher::regName[reg_lo]);
 824     }
 825 #endif
 826   }
 827   int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);

 828   return size+5+offset_size;
 829 }
 830 
 831 
 832 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 833                             int src_hi, int dst_hi, int size, outputStream* st ) {
 834   if( UseXmmRegToRegMoveAll ) {//Use movaps,movapd to move between xmm registers
 835     if( cbuf ) {
 836       if( (src_lo+1 == src_hi && dst_lo+1 == dst_hi) ) {
 837         emit_opcode(*cbuf, 0x66 );




 838       }
 839       emit_opcode(*cbuf, 0x0F );
 840       emit_opcode(*cbuf, 0x28 );
 841       emit_rm    (*cbuf, 0x3, Matcher::_regEncode[dst_lo], Matcher::_regEncode[src_lo] );
 842 #ifndef PRODUCT
 843     } else if( !do_size ) {
 844       if( size != 0 ) st->print("\n\t");
 845       if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?

 846         st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 847       } else {
 848         st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 849       }
 850 #endif
 851     }
 852     return size + ((src_lo+1 == src_hi && dst_lo+1 == dst_hi) ? 4 : 3);
 853   } else {
 854     if( cbuf ) {
 855       emit_opcode(*cbuf, (src_lo+1 == src_hi && dst_lo+1 == dst_hi) ? 0xF2 : 0xF3 );
 856       emit_opcode(*cbuf, 0x0F );
 857       emit_opcode(*cbuf, 0x10 );
 858       emit_rm    (*cbuf, 0x3, Matcher::_regEncode[dst_lo], Matcher::_regEncode[src_lo] );
 859 #ifndef PRODUCT
 860     } else if( !do_size ) {
 861       if( size != 0 ) st->print("\n\t");
 862       if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
 863         st->print("MOVSD  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 864       } else {
 865         st->print("MOVSS  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 866       }

 867 #endif
 868     }
 869     return size+4;
 870   }




 871 }
 872 
 873 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 874                             int src_hi, int dst_hi, int size, outputStream* st ) {
 875   // 32-bit
 876   if (cbuf) {
 877     emit_opcode(*cbuf, 0x66);
 878     emit_opcode(*cbuf, 0x0F);
 879     emit_opcode(*cbuf, 0x6E);
 880     emit_rm(*cbuf, 0x3, Matcher::_regEncode[dst_lo] & 7, Matcher::_regEncode[src_lo] & 7);
 881 #ifndef PRODUCT
 882   } else if (!do_size) {
 883     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
 884 #endif
 885   }
 886   return 4;
 887 }
 888 
 889 
 890 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 891                                  int src_hi, int dst_hi, int size, outputStream* st ) {
 892   // 32-bit
 893   if (cbuf) {
 894     emit_opcode(*cbuf, 0x66);
 895     emit_opcode(*cbuf, 0x0F);
 896     emit_opcode(*cbuf, 0x7E);
 897     emit_rm(*cbuf, 0x3, Matcher::_regEncode[src_lo] & 7, Matcher::_regEncode[dst_lo] & 7);
 898 #ifndef PRODUCT
 899   } else if (!do_size) {
 900     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
 901 #endif
 902   }
 903   return 4;
 904 }
 905 
 906 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) {
 907   if( cbuf ) {
 908     emit_opcode(*cbuf, 0x8B );
 909     emit_rm    (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] );
 910 #ifndef PRODUCT
 911   } else if( !do_size ) {
 912     if( size != 0 ) st->print("\n\t");
 913     st->print("MOV    %s,%s",Matcher::regName[dst],Matcher::regName[src]);
 914 #endif
 915   }
 916   return size+2;
 917 }

1914     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
1915     // who we intended to call.
1916     cbuf.set_insts_mark();
1917     $$$emit8$primary;
1918     emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1919                 virtual_call_Relocation::spec(virtual_call_oop_addr), RELOC_IMM32 );
1920   %}
1921 
1922   enc_class Java_Compiled_Call (method meth) %{    // JAVA COMPILED CALL
1923     int disp = in_bytes(methodOopDesc::from_compiled_offset());
1924     assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small");
1925 
1926     // CALL *[EAX+in_bytes(methodOopDesc::from_compiled_code_entry_point_offset())]
1927     cbuf.set_insts_mark();
1928     $$$emit8$primary;
1929     emit_rm(cbuf, 0x01, $secondary, EAX_enc );  // R/M byte
1930     emit_d8(cbuf, disp);             // Displacement
1931 
1932   %}
1933 
1934   enc_class Xor_Reg (eRegI dst) %{
1935     emit_opcode(cbuf, 0x33);
1936     emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg);
1937   %}
1938 
1939 //   Following encoding is no longer used, but may be restored if calling
1940 //   convention changes significantly.
1941 //   Became: Xor_Reg(EBP), Java_To_Runtime( labl )
1942 //
1943 //   enc_class Java_Interpreter_Call (label labl) %{    // JAVA INTERPRETER CALL
1944 //     // int ic_reg     = Matcher::inline_cache_reg();
1945 //     // int ic_encode  = Matcher::_regEncode[ic_reg];
1946 //     // int imo_reg    = Matcher::interpreter_method_oop_reg();
1947 //     // int imo_encode = Matcher::_regEncode[imo_reg];
1948 //
1949 //     // // Interpreter expects method_oop in EBX, currently a callee-saved register,
1950 //     // // so we load it immediately before the call
1951 //     // emit_opcode(cbuf, 0x8B);                     // MOV    imo_reg,ic_reg  # method_oop
1952 //     // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte
1953 //
1954 //     // xor rbp,ebp
1955 //     emit_opcode(cbuf, 0x33);
1956 //     emit_rm(cbuf, 0x3, EBP_enc, EBP_enc);
1957 //
1958 //     // CALL to interpreter.

1996       emit_d32(cbuf, src_con);
1997     }
1998   %}
1999 
2000   enc_class LdImmL_Hi( eRegL dst, immL src) %{    // Load Immediate
2001     // Load immediate does not have a zero or sign extended version
2002     // for 8-bit immediates
2003     int dst_enc = $dst$$reg + 2;
2004     int src_con = ((julong)($src$$constant)) >> 32;
2005     if (src_con == 0) {
2006       // xor dst, dst
2007       emit_opcode(cbuf, 0x33);
2008       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
2009     } else {
2010       emit_opcode(cbuf, $primary + dst_enc);
2011       emit_d32(cbuf, src_con);
2012     }
2013   %}
2014 
2015 
2016   enc_class MovI2X_reg(regX dst, eRegI src) %{
2017     emit_opcode(cbuf, 0x66 );     // MOVD dst,src
2018     emit_opcode(cbuf, 0x0F );
2019     emit_opcode(cbuf, 0x6E );
2020     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2021   %}
2022 
2023   enc_class MovX2I_reg(eRegI dst, regX src) %{
2024     emit_opcode(cbuf, 0x66 );     // MOVD dst,src
2025     emit_opcode(cbuf, 0x0F );
2026     emit_opcode(cbuf, 0x7E );
2027     emit_rm(cbuf, 0x3, $src$$reg, $dst$$reg);
2028   %}
2029 
2030   enc_class MovL2XD_reg(regXD dst, eRegL src, regXD tmp) %{
2031     { // MOVD $dst,$src.lo
2032       emit_opcode(cbuf,0x66);
2033       emit_opcode(cbuf,0x0F);
2034       emit_opcode(cbuf,0x6E);
2035       emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2036     }
2037     { // MOVD $tmp,$src.hi
2038       emit_opcode(cbuf,0x66);
2039       emit_opcode(cbuf,0x0F);
2040       emit_opcode(cbuf,0x6E);
2041       emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg));
2042     }
2043     { // PUNPCKLDQ $dst,$tmp
2044       emit_opcode(cbuf,0x66);
2045       emit_opcode(cbuf,0x0F);
2046       emit_opcode(cbuf,0x62);
2047       emit_rm(cbuf, 0x3, $dst$$reg, $tmp$$reg);
2048      }
2049   %}
2050 
2051   enc_class MovXD2L_reg(eRegL dst, regXD src, regXD tmp) %{
2052     { // MOVD $dst.lo,$src
2053       emit_opcode(cbuf,0x66);
2054       emit_opcode(cbuf,0x0F);
2055       emit_opcode(cbuf,0x7E);
2056       emit_rm(cbuf, 0x3, $src$$reg, $dst$$reg);
2057     }
2058     { // PSHUFLW $tmp,$src,0x4E  (01001110b)
2059       emit_opcode(cbuf,0xF2);
2060       emit_opcode(cbuf,0x0F);
2061       emit_opcode(cbuf,0x70);
2062       emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
2063       emit_d8(cbuf, 0x4E);
2064     }
2065     { // MOVD $dst.hi,$tmp
2066       emit_opcode(cbuf,0x66);
2067       emit_opcode(cbuf,0x0F);
2068       emit_opcode(cbuf,0x7E);
2069       emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg));
2070     }
2071   %}
2072 
2073 
2074   // Encode a reg-reg copy.  If it is useless, then empty encoding.
2075   enc_class enc_Copy( eRegI dst, eRegI src ) %{
2076     encode_Copy( cbuf, $dst$$reg, $src$$reg );
2077   %}
2078 
2079   enc_class enc_CopyL_Lo( eRegI dst, eRegL src ) %{
2080     encode_Copy( cbuf, $dst$$reg, $src$$reg );
2081   %}
2082 
2083   // Encode xmm reg-reg copy.  If it is useless, then empty encoding.
2084   enc_class enc_CopyXD( RegXD dst, RegXD src ) %{
2085     encode_CopyXD( cbuf, $dst$$reg, $src$$reg );
2086   %}
2087 
2088   enc_class RegReg (eRegI dst, eRegI src) %{    // RegReg(Many)
2089     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2090   %}
2091 
2092   enc_class RegReg_Lo(eRegL dst, eRegL src) %{    // RegReg(Many)
2093     $$$emit8$primary;
2094     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2095   %}
2096 
2097   enc_class RegReg_Hi(eRegL dst, eRegL src) %{    // RegReg(Many)
2098     $$$emit8$secondary;
2099     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2100   %}
2101 
2102   enc_class RegReg_Lo2(eRegL dst, eRegL src) %{    // RegReg(Many)
2103     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2104   %}
2105 
2106   enc_class RegReg_Hi2(eRegL dst, eRegL src) %{    // RegReg(Many)
2107     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));

2617 
2618 
2619   enc_class Push_Reg_Mod_D( regD dst, regD src) %{
2620     // load dst in FPR0
2621     emit_opcode( cbuf, 0xD9 );
2622     emit_d8( cbuf, 0xC0-1+$dst$$reg );
2623     if ($src$$reg != FPR1L_enc) {
2624       // fincstp
2625       emit_opcode (cbuf, 0xD9);
2626       emit_opcode (cbuf, 0xF7);
2627       // swap src with FPR1:
2628       // FXCH FPR1 with src
2629       emit_opcode(cbuf, 0xD9);
2630       emit_d8(cbuf, 0xC8-1+$src$$reg );
2631       // fdecstp
2632       emit_opcode (cbuf, 0xD9);
2633       emit_opcode (cbuf, 0xF6);
2634     }
2635   %}
2636 
2637   enc_class Push_ModD_encoding( regXD src0, regXD src1) %{
2638     // Allocate a word
2639     emit_opcode(cbuf,0x83);            // SUB ESP,8
2640     emit_opcode(cbuf,0xEC);
2641     emit_d8(cbuf,0x08);
2642 
2643     emit_opcode  (cbuf, 0xF2 );     // MOVSD [ESP], src1
2644     emit_opcode  (cbuf, 0x0F );
2645     emit_opcode  (cbuf, 0x11 );
2646     encode_RegMem(cbuf, $src1$$reg, ESP_enc, 0x4, 0, 0, false);
2647 
2648     emit_opcode(cbuf,0xDD );      // FLD_D [ESP]
2649     encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
2650 
2651     emit_opcode  (cbuf, 0xF2 );     // MOVSD [ESP], src0
2652     emit_opcode  (cbuf, 0x0F );
2653     emit_opcode  (cbuf, 0x11 );
2654     encode_RegMem(cbuf, $src0$$reg, ESP_enc, 0x4, 0, 0, false);
2655 
2656     emit_opcode(cbuf,0xDD );      // FLD_D [ESP]
2657     encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
2658 
2659   %}
2660 
2661   enc_class Push_ModX_encoding( regX src0, regX src1) %{
2662     // Allocate a word
2663     emit_opcode(cbuf,0x83);            // SUB ESP,4
2664     emit_opcode(cbuf,0xEC);
2665     emit_d8(cbuf,0x04);
2666 
2667     emit_opcode  (cbuf, 0xF3 );     // MOVSS [ESP], src1
2668     emit_opcode  (cbuf, 0x0F );
2669     emit_opcode  (cbuf, 0x11 );
2670     encode_RegMem(cbuf, $src1$$reg, ESP_enc, 0x4, 0, 0, false);
2671 
2672     emit_opcode(cbuf,0xD9 );      // FLD [ESP]
2673     encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
2674 
2675     emit_opcode  (cbuf, 0xF3 );     // MOVSS [ESP], src0
2676     emit_opcode  (cbuf, 0x0F );
2677     emit_opcode  (cbuf, 0x11 );
2678     encode_RegMem(cbuf, $src0$$reg, ESP_enc, 0x4, 0, 0, false);
2679 
2680     emit_opcode(cbuf,0xD9 );      // FLD [ESP]
2681     encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
2682 
2683   %}
2684 
2685   enc_class Push_ResultXD(regXD dst) %{
2686     store_to_stackslot( cbuf, 0xDD, 0x03, 0 ); //FSTP [ESP]
2687 
2688     // UseXmmLoadAndClearUpper ? movsd dst,[esp] : movlpd dst,[esp]
2689     emit_opcode  (cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
2690     emit_opcode  (cbuf, 0x0F );
2691     emit_opcode  (cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12);
2692     encode_RegMem(cbuf, $dst$$reg, ESP_enc, 0x4, 0, 0, false);
2693 
2694     emit_opcode(cbuf,0x83);    // ADD ESP,8
2695     emit_opcode(cbuf,0xC4);
2696     emit_d8(cbuf,0x08);
2697   %}
2698 
2699   enc_class Push_ResultX(regX dst, immI d8) %{
2700     store_to_stackslot( cbuf, 0xD9, 0x03, 0 ); //FSTP_S [ESP]
2701 
2702     emit_opcode  (cbuf, 0xF3 );     // MOVSS dst(xmm), [ESP]
2703     emit_opcode  (cbuf, 0x0F );
2704     emit_opcode  (cbuf, 0x10 );
2705     encode_RegMem(cbuf, $dst$$reg, ESP_enc, 0x4, 0, 0, false);
2706 
2707     emit_opcode(cbuf,0x83);    // ADD ESP,d8 (4 or 8)
2708     emit_opcode(cbuf,0xC4);
2709     emit_d8(cbuf,$d8$$constant);
2710   %}
2711 
2712   enc_class Push_SrcXD(regXD src) %{
2713     // Allocate a word
2714     emit_opcode(cbuf,0x83);            // SUB ESP,8
2715     emit_opcode(cbuf,0xEC);
2716     emit_d8(cbuf,0x08);
2717 
2718     emit_opcode  (cbuf, 0xF2 );     // MOVSD [ESP], src
2719     emit_opcode  (cbuf, 0x0F );
2720     emit_opcode  (cbuf, 0x11 );
2721     encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
2722 
2723     emit_opcode(cbuf,0xDD );      // FLD_D [ESP]
2724     encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
2725   %}
2726 
2727   enc_class push_stack_temp_qword() %{
2728     emit_opcode(cbuf,0x83);     // SUB ESP,8
2729     emit_opcode(cbuf,0xEC);
2730     emit_d8    (cbuf,0x08);
2731   %}
2732 
2733   enc_class pop_stack_temp_qword() %{
2734     emit_opcode(cbuf,0x83);     // ADD ESP,8
2735     emit_opcode(cbuf,0xC4);
2736     emit_d8    (cbuf,0x08);
2737   %}
2738 
2739   enc_class push_xmm_to_fpr1( regXD xmm_src ) %{
2740     emit_opcode  (cbuf, 0xF2 );     // MOVSD [ESP], xmm_src
2741     emit_opcode  (cbuf, 0x0F );
2742     emit_opcode  (cbuf, 0x11 );
2743     encode_RegMem(cbuf, $xmm_src$$reg, ESP_enc, 0x4, 0, 0, false);
2744 
2745     emit_opcode(cbuf,0xDD );      // FLD_D [ESP]
2746     encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
2747   %}
2748 
2749   // Compute X^Y using Intel's fast hardware instructions, if possible.
2750   // Otherwise return a NaN.
2751   enc_class pow_exp_core_encoding %{
2752     // FPR1 holds Y*ln2(X).  Compute FPR1 = 2^(Y*ln2(X))
2753     emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xC0);  // fdup = fld st(0)          Q       Q
2754     emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xFC);  // frndint               int(Q)      Q
2755     emit_opcode(cbuf,0xDC); emit_opcode(cbuf,0xE9);  // fsub st(1) -= st(0);  int(Q) frac(Q)
2756     emit_opcode(cbuf,0xDB);                          // FISTP [ESP]           frac(Q)
2757     emit_opcode(cbuf,0x1C);
2758     emit_d8(cbuf,0x24);
2759     emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xF0);  // f2xm1                 2^frac(Q)-1
2760     emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xE8);  // fld1                  1 2^frac(Q)-1
2761     emit_opcode(cbuf,0xDE); emit_opcode(cbuf,0xC1);  // faddp                 2^frac(Q)
2762     emit_opcode(cbuf,0x8B);                          // mov rax,[esp+0]=int(Q)
2763     encode_RegMem(cbuf, EAX_enc, ESP_enc, 0x4, 0, 0, false);
2764     emit_opcode(cbuf,0xC7);                          // mov rcx,0xFFFFF800 - overflow mask
2765     emit_rm(cbuf, 0x3, 0x0, ECX_enc);
2766     emit_d32(cbuf,0xFFFFF800);

2905     emit_opcode( cbuf, 0x7A );
2906     emit_d8    ( cbuf, 0x13 );
2907     // movl(dst, less_result);
2908     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2909     emit_d32( cbuf, -1 );
2910     // jcc(Assembler::below, exit);
2911     emit_opcode( cbuf, 0x72 );
2912     emit_d8    ( cbuf, 0x0C );
2913     // movl(dst, equal_result);
2914     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2915     emit_d32( cbuf, 0 );
2916     // jcc(Assembler::equal, exit);
2917     emit_opcode( cbuf, 0x74 );
2918     emit_d8    ( cbuf, 0x05 );
2919     // movl(dst, greater_result);
2920     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2921     emit_d32( cbuf, 1 );
2922   %}
2923 
2924 
2925   // XMM version of CmpF_Result. Because the XMM compare
2926   // instructions set the EFLAGS directly. It becomes simpler than
2927   // the float version above.
2928   enc_class CmpX_Result(eRegI dst) %{
2929     MacroAssembler _masm(&cbuf);
2930     Label nan, inc, done;
2931 
2932     __ jccb(Assembler::parity, nan);
2933     __ jccb(Assembler::equal,  done);
2934     __ jccb(Assembler::above,  inc);
2935     __ bind(nan);
2936     __ decrement(as_Register($dst$$reg)); // NO L qqq
2937     __ jmpb(done);
2938     __ bind(inc);
2939     __ increment(as_Register($dst$$reg)); // NO L qqq
2940     __ bind(done);
2941   %}
2942 
2943   // Compare the longs and set flags
2944   // BROKEN!  Do Not use as-is
2945   enc_class cmpl_test( eRegL src1, eRegL src2 ) %{
2946     // CMP    $src1.hi,$src2.hi
2947     emit_opcode( cbuf, 0x3B );
2948     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
2949     // JNE,s  done
2950     emit_opcode(cbuf,0x75);
2951     emit_d8(cbuf, 2 );
2952     // CMP    $src1.lo,$src2.lo
2953     emit_opcode( cbuf, 0x3B );
2954     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2955 // done:
2956   %}
2957 
2958   enc_class convert_int_long( regL dst, eRegI src ) %{
2959     // mov $dst.lo,$src
2960     int dst_encoding = $dst$$reg;
2961     int src_encoding = $src$$reg;
2962     encode_Copy( cbuf, dst_encoding  , src_encoding );

3145     emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg);
3146     // CMP    $tmp,$src.lo
3147     emit_opcode( cbuf, 0x3B );
3148     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg );
3149     // SBB    $tmp,$src.hi
3150     emit_opcode( cbuf, 0x1B );
3151     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) );
3152   %}
3153 
3154  // Sniff, sniff... smells like Gnu Superoptimizer
3155   enc_class neg_long( eRegL dst ) %{
3156     emit_opcode(cbuf,0xF7);    // NEG hi
3157     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
3158     emit_opcode(cbuf,0xF7);    // NEG lo
3159     emit_rm    (cbuf,0x3, 0x3,               $dst$$reg );
3160     emit_opcode(cbuf,0x83);    // SBB hi,0
3161     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
3162     emit_d8    (cbuf,0 );
3163   %}
3164 
3165   enc_class movq_ld(regXD dst, memory mem) %{
3166     MacroAssembler _masm(&cbuf);
3167     __ movq($dst$$XMMRegister, $mem$$Address);
3168   %}
3169 
3170   enc_class movq_st(memory mem, regXD src) %{
3171     MacroAssembler _masm(&cbuf);
3172     __ movq($mem$$Address, $src$$XMMRegister);
3173   %}
3174 
3175   enc_class pshufd_8x8(regX dst, regX src) %{
3176     MacroAssembler _masm(&cbuf);
3177 
3178     encode_CopyXD(cbuf, $dst$$reg, $src$$reg);
3179     __ punpcklbw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg));
3180     __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg), 0x00);
3181   %}
3182 
3183   enc_class pshufd_4x16(regX dst, regX src) %{
3184     MacroAssembler _masm(&cbuf);
3185 
3186     __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), 0x00);
3187   %}
3188 
3189   enc_class pshufd(regXD dst, regXD src, int mode) %{
3190     MacroAssembler _masm(&cbuf);
3191 
3192     __ pshufd(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), $mode);
3193   %}
3194 
3195   enc_class pxor(regXD dst, regXD src) %{
3196     MacroAssembler _masm(&cbuf);
3197 
3198     __ pxor(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg));
3199   %}
3200 
3201   enc_class mov_i2x(regXD dst, eRegI src) %{
3202     MacroAssembler _masm(&cbuf);
3203 
3204     __ movdl(as_XMMRegister($dst$$reg), as_Register($src$$reg));
3205   %}
3206 
3207 
3208   // Because the transitions from emitted code to the runtime
3209   // monitorenter/exit helper stubs are so slow it's critical that
3210   // we inline both the stack-locking fast-path and the inflated fast path.
3211   //
3212   // See also: cmpFastLock and cmpFastUnlock.
3213   //
3214   // What follows is a specialized inline transliteration of the code
3215   // in slow_enter() and slow_exit().  If we're concerned about I$ bloat
3216   // another option would be to emit TrySlowEnter and TrySlowExit methods
3217   // at startup-time.  These methods would accept arguments as
3218   // (rax,=Obj, rbx=Self, rcx=box, rdx=Scratch) and return success-failure
3219   // indications in the icc.ZFlag.  Fast_Lock and Fast_Unlock would simply
3220   // marshal the arguments and emit calls to TrySlowEnter and TrySlowExit.
3221   // In practice, however, the # of lock sites is bounded and is usually small.
3222   // Besides the call overhead, TrySlowEnter and TrySlowExit might suffer
3223   // if the processor uses simple bimodal branch predictors keyed by EIP
3224   // Since the helper routines would be called from multiple synchronization
3225   // sites.
3226   //
3227   // An even better approach would be write "MonitorEnter()" and "MonitorExit()"

3825     emit_opcode(cbuf,0x5A);       // POP EDX
3826     emit_opcode(cbuf,0x81);       // CMP EDX,imm
3827     emit_d8    (cbuf,0xFA);       // rdx
3828     emit_d32   (cbuf,0x80000000); //         0x80000000
3829     emit_opcode(cbuf,0x75);       // JNE around_slow_call
3830     emit_d8    (cbuf,0x07+4);     // Size of slow_call
3831     emit_opcode(cbuf,0x85);       // TEST EAX,EAX
3832     emit_opcode(cbuf,0xC0);       // 2/rax,/rax,
3833     emit_opcode(cbuf,0x75);       // JNE around_slow_call
3834     emit_d8    (cbuf,0x07);       // Size of slow_call
3835     // Push src onto stack slow-path
3836     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
3837     emit_d8    (cbuf,0xC0-1+$src$$reg );
3838     // CALL directly to the runtime
3839     cbuf.set_insts_mark();
3840     emit_opcode(cbuf,0xE8);       // Call into runtime
3841     emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3842     // Carry on here...
3843   %}
3844 
3845   enc_class X2L_encoding( regX src ) %{
3846     // Allocate a word
3847     emit_opcode(cbuf,0x83);      // SUB ESP,8
3848     emit_opcode(cbuf,0xEC);
3849     emit_d8(cbuf,0x08);
3850 
3851     emit_opcode  (cbuf, 0xF3 );  // MOVSS [ESP], src
3852     emit_opcode  (cbuf, 0x0F );
3853     emit_opcode  (cbuf, 0x11 );
3854     encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
3855 
3856     emit_opcode(cbuf,0xD9 );     // FLD_S [ESP]
3857     encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
3858 
3859     emit_opcode(cbuf,0xD9);      // FLDCW  trunc
3860     emit_opcode(cbuf,0x2D);
3861     emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
3862 
3863     // Encoding assumes a double has been pushed into FPR0.
3864     // Store down the double as a long, popping the FPU stack
3865     emit_opcode(cbuf,0xDF);      // FISTP [ESP]
3866     emit_opcode(cbuf,0x3C);
3867     emit_d8(cbuf,0x24);
3868 
3869     // Restore the rounding mode; mask the exception
3870     emit_opcode(cbuf,0xD9);      // FLDCW   std/24-bit mode
3871     emit_opcode(cbuf,0x2D);
3872     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
3873       ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
3874       : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
3875 
3876     // Load the converted int; adjust CPU stack
3877     emit_opcode(cbuf,0x58);      // POP EAX
3878 
3879     emit_opcode(cbuf,0x5A);      // POP EDX
3880 
3881     emit_opcode(cbuf,0x81);      // CMP EDX,imm
3882     emit_d8    (cbuf,0xFA);      // rdx
3883     emit_d32   (cbuf,0x80000000);//         0x80000000
3884 
3885     emit_opcode(cbuf,0x75);      // JNE around_slow_call
3886     emit_d8    (cbuf,0x13+4);    // Size of slow_call
3887 
3888     emit_opcode(cbuf,0x85);      // TEST EAX,EAX
3889     emit_opcode(cbuf,0xC0);      // 2/rax,/rax,
3890 
3891     emit_opcode(cbuf,0x75);      // JNE around_slow_call
3892     emit_d8    (cbuf,0x13);      // Size of slow_call
3893 
3894     // Allocate a word
3895     emit_opcode(cbuf,0x83);      // SUB ESP,4
3896     emit_opcode(cbuf,0xEC);
3897     emit_d8(cbuf,0x04);
3898 
3899     emit_opcode  (cbuf, 0xF3 );  // MOVSS [ESP], src
3900     emit_opcode  (cbuf, 0x0F );
3901     emit_opcode  (cbuf, 0x11 );
3902     encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
3903 
3904     emit_opcode(cbuf,0xD9 );     // FLD_S [ESP]
3905     encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
3906 
3907     emit_opcode(cbuf,0x83);      // ADD ESP,4
3908     emit_opcode(cbuf,0xC4);
3909     emit_d8(cbuf,0x04);
3910 
3911     // CALL directly to the runtime
3912     cbuf.set_insts_mark();
3913     emit_opcode(cbuf,0xE8);       // Call into runtime
3914     emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3915     // Carry on here...
3916   %}
3917 
3918   enc_class XD2L_encoding( regXD src ) %{
3919     // Allocate a word
3920     emit_opcode(cbuf,0x83);      // SUB ESP,8
3921     emit_opcode(cbuf,0xEC);
3922     emit_d8(cbuf,0x08);
3923 
3924     emit_opcode  (cbuf, 0xF2 );  // MOVSD [ESP], src
3925     emit_opcode  (cbuf, 0x0F );
3926     emit_opcode  (cbuf, 0x11 );
3927     encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
3928 
3929     emit_opcode(cbuf,0xDD );     // FLD_D [ESP]
3930     encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
3931 
3932     emit_opcode(cbuf,0xD9);      // FLDCW  trunc
3933     emit_opcode(cbuf,0x2D);
3934     emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
3935 
3936     // Encoding assumes a double has been pushed into FPR0.
3937     // Store down the double as a long, popping the FPU stack
3938     emit_opcode(cbuf,0xDF);      // FISTP [ESP]
3939     emit_opcode(cbuf,0x3C);
3940     emit_d8(cbuf,0x24);
3941 
3942     // Restore the rounding mode; mask the exception
3943     emit_opcode(cbuf,0xD9);      // FLDCW   std/24-bit mode
3944     emit_opcode(cbuf,0x2D);
3945     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
3946       ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
3947       : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
3948 
3949     // Load the converted int; adjust CPU stack
3950     emit_opcode(cbuf,0x58);      // POP EAX
3951 
3952     emit_opcode(cbuf,0x5A);      // POP EDX
3953 
3954     emit_opcode(cbuf,0x81);      // CMP EDX,imm
3955     emit_d8    (cbuf,0xFA);      // rdx
3956     emit_d32   (cbuf,0x80000000); //         0x80000000
3957 
3958     emit_opcode(cbuf,0x75);      // JNE around_slow_call
3959     emit_d8    (cbuf,0x13+4);    // Size of slow_call
3960 
3961     emit_opcode(cbuf,0x85);      // TEST EAX,EAX
3962     emit_opcode(cbuf,0xC0);      // 2/rax,/rax,
3963 
3964     emit_opcode(cbuf,0x75);      // JNE around_slow_call
3965     emit_d8    (cbuf,0x13);      // Size of slow_call
3966 
3967     // Push src onto stack slow-path
3968     // Allocate a word
3969     emit_opcode(cbuf,0x83);      // SUB ESP,8
3970     emit_opcode(cbuf,0xEC);
3971     emit_d8(cbuf,0x08);
3972 
3973     emit_opcode  (cbuf, 0xF2 );  // MOVSD [ESP], src
3974     emit_opcode  (cbuf, 0x0F );
3975     emit_opcode  (cbuf, 0x11 );
3976     encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
3977 
3978     emit_opcode(cbuf,0xDD );     // FLD_D [ESP]
3979     encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
3980 
3981     emit_opcode(cbuf,0x83);      // ADD ESP,8
3982     emit_opcode(cbuf,0xC4);
3983     emit_d8(cbuf,0x08);
3984 
3985     // CALL directly to the runtime
3986     cbuf.set_insts_mark();
3987     emit_opcode(cbuf,0xE8);      // Call into runtime
3988     emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3989     // Carry on here...
3990   %}
3991 
3992   enc_class D2X_encoding( regX dst, regD src ) %{
3993     // Allocate a word
3994     emit_opcode(cbuf,0x83);            // SUB ESP,4
3995     emit_opcode(cbuf,0xEC);
3996     emit_d8(cbuf,0x04);
3997     int pop = 0x02;
3998     if ($src$$reg != FPR1L_enc) {
3999       emit_opcode( cbuf, 0xD9 );       // FLD    ST(i-1)
4000       emit_d8( cbuf, 0xC0-1+$src$$reg );
4001       pop = 0x03;
4002     }
4003     store_to_stackslot( cbuf, 0xD9, pop, 0 ); // FST<P>_S  [ESP]
4004 
4005     emit_opcode  (cbuf, 0xF3 );        // MOVSS dst(xmm), [ESP]
4006     emit_opcode  (cbuf, 0x0F );
4007     emit_opcode  (cbuf, 0x10 );
4008     encode_RegMem(cbuf, $dst$$reg, ESP_enc, 0x4, 0, 0, false);
4009 
4010     emit_opcode(cbuf,0x83);            // ADD ESP,4
4011     emit_opcode(cbuf,0xC4);
4012     emit_d8(cbuf,0x04);
4013     // Carry on here...
4014   %}
4015 
4016   enc_class FX2I_encoding( regX src, eRegI dst ) %{
4017     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
4018 
4019     // Compare the result to see if we need to go to the slow path
4020     emit_opcode(cbuf,0x81);       // CMP dst,imm
4021     emit_rm    (cbuf,0x3,0x7,$dst$$reg);
4022     emit_d32   (cbuf,0x80000000); //         0x80000000
4023 
4024     emit_opcode(cbuf,0x75);       // JNE around_slow_call
4025     emit_d8    (cbuf,0x13);       // Size of slow_call
4026     // Store xmm to a temp memory
4027     // location and push it onto stack.
4028 
4029     emit_opcode(cbuf,0x83);  // SUB ESP,4
4030     emit_opcode(cbuf,0xEC);
4031     emit_d8(cbuf, $primary ? 0x8 : 0x4);
4032 
4033     emit_opcode  (cbuf, $primary ? 0xF2 : 0xF3 );   // MOVSS [ESP], xmm
4034     emit_opcode  (cbuf, 0x0F );
4035     emit_opcode  (cbuf, 0x11 );
4036     encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
4037 
4038     emit_opcode(cbuf, $primary ? 0xDD : 0xD9 );      // FLD [ESP]
4039     encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
4040 
4041     emit_opcode(cbuf,0x83);    // ADD ESP,4
4042     emit_opcode(cbuf,0xC4);
4043     emit_d8(cbuf, $primary ? 0x8 : 0x4);
4044 
4045     // CALL directly to the runtime
4046     cbuf.set_insts_mark();
4047     emit_opcode(cbuf,0xE8);       // Call into runtime
4048     emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
4049 
4050     // Carry on here...
4051   %}
4052 
4053   enc_class X2D_encoding( regD dst, regX src ) %{
4054     // Allocate a word
4055     emit_opcode(cbuf,0x83);     // SUB ESP,4
4056     emit_opcode(cbuf,0xEC);
4057     emit_d8(cbuf,0x04);
4058 
4059     emit_opcode  (cbuf, 0xF3 ); // MOVSS [ESP], xmm
4060     emit_opcode  (cbuf, 0x0F );
4061     emit_opcode  (cbuf, 0x11 );
4062     encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
4063 
4064     emit_opcode(cbuf,0xD9 );    // FLD_S [ESP]
4065     encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
4066 
4067     emit_opcode(cbuf,0x83);     // ADD ESP,4
4068     emit_opcode(cbuf,0xC4);
4069     emit_d8(cbuf,0x04);
4070 
4071     // Carry on here...
4072   %}
4073 
4074   enc_class AbsXF_encoding(regX dst) %{
4075     address signmask_address=(address)float_signmask_pool;
4076     // andpd:\tANDPS  $dst,[signconst]
4077     emit_opcode(cbuf, 0x0F);
4078     emit_opcode(cbuf, 0x54);
4079     emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
4080     emit_d32(cbuf, (int)signmask_address);
4081   %}
4082 
4083   enc_class AbsXD_encoding(regXD dst) %{
4084     address signmask_address=(address)double_signmask_pool;
4085     // andpd:\tANDPD  $dst,[signconst]
4086     emit_opcode(cbuf, 0x66);
4087     emit_opcode(cbuf, 0x0F);
4088     emit_opcode(cbuf, 0x54);
4089     emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
4090     emit_d32(cbuf, (int)signmask_address);
4091   %}
4092 
4093   enc_class NegXF_encoding(regX dst) %{
4094     address signmask_address=(address)float_signflip_pool;
4095     // andpd:\tXORPS  $dst,[signconst]
4096     emit_opcode(cbuf, 0x0F);
4097     emit_opcode(cbuf, 0x57);
4098     emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
4099     emit_d32(cbuf, (int)signmask_address);
4100   %}
4101 
4102   enc_class NegXD_encoding(regXD dst) %{
4103     address signmask_address=(address)double_signflip_pool;
4104     // andpd:\tXORPD  $dst,[signconst]
4105     emit_opcode(cbuf, 0x66);
4106     emit_opcode(cbuf, 0x0F);
4107     emit_opcode(cbuf, 0x57);
4108     emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
4109     emit_d32(cbuf, (int)signmask_address);
4110   %}
4111 
4112   enc_class FMul_ST_reg( eRegF src1 ) %{
4113     // Operand was loaded from memory into fp ST (stack top)
4114     // FMUL   ST,$src  /* D8 C8+i */
4115     emit_opcode(cbuf, 0xD8);
4116     emit_opcode(cbuf, 0xC8 + $src1$$reg);
4117   %}
4118 
4119   enc_class FAdd_ST_reg( eRegF src2 ) %{
4120     // FADDP  ST,src2  /* D8 C0+i */
4121     emit_opcode(cbuf, 0xD8);
4122     emit_opcode(cbuf, 0xC0 + $src2$$reg);
4123     //could use FADDP  src2,fpST  /* DE C0+i */
4124   %}
4125 
4126   enc_class FAddP_reg_ST( eRegF src2 ) %{
4127     // FADDP  src2,ST  /* DE C0+i */
4128     emit_opcode(cbuf, 0xDE);
4129     emit_opcode(cbuf, 0xC0 + $src2$$reg);
4130   %}
4131

4159     emit_opcode(cbuf, 0xC0 + $src1$$reg);
4160 
4161     // FMULP  src2,ST  /* DE C8+i */
4162     emit_opcode(cbuf, 0xDE);
4163     emit_opcode(cbuf, 0xC8 + $src2$$reg);
4164   %}
4165 
4166   // Atomically load the volatile long
4167   enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{
4168     emit_opcode(cbuf,0xDF);
4169     int rm_byte_opcode = 0x05;
4170     int base     = $mem$$base;
4171     int index    = $mem$$index;
4172     int scale    = $mem$$scale;
4173     int displace = $mem$$disp;
4174     bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
4175     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_is_oop);
4176     store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp );
4177   %}
4178 
4179   enc_class enc_loadLX_volatile( memory mem, stackSlotL dst, regXD tmp ) %{
4180     { // Atomic long load
4181       // UseXmmLoadAndClearUpper ? movsd $tmp,$mem : movlpd $tmp,$mem
4182       emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
4183       emit_opcode(cbuf,0x0F);
4184       emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0x10 : 0x12);
4185       int base     = $mem$$base;
4186       int index    = $mem$$index;
4187       int scale    = $mem$$scale;
4188       int displace = $mem$$disp;
4189       bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
4190       encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
4191     }
4192     { // MOVSD $dst,$tmp ! atomic long store
4193       emit_opcode(cbuf,0xF2);
4194       emit_opcode(cbuf,0x0F);
4195       emit_opcode(cbuf,0x11);
4196       int base     = $dst$$base;
4197       int index    = $dst$$index;
4198       int scale    = $dst$$scale;
4199       int displace = $dst$$disp;
4200       bool disp_is_oop = $dst->disp_is_oop(); // disp-as-oop when working with static globals
4201       encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
4202     }
4203   %}
4204 
4205   enc_class enc_loadLX_reg_volatile( memory mem, eRegL dst, regXD tmp ) %{
4206     { // Atomic long load
4207       // UseXmmLoadAndClearUpper ? movsd $tmp,$mem : movlpd $tmp,$mem
4208       emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
4209       emit_opcode(cbuf,0x0F);
4210       emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0x10 : 0x12);
4211       int base     = $mem$$base;
4212       int index    = $mem$$index;
4213       int scale    = $mem$$scale;
4214       int displace = $mem$$disp;
4215       bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
4216       encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
4217     }
4218     { // MOVD $dst.lo,$tmp
4219       emit_opcode(cbuf,0x66);
4220       emit_opcode(cbuf,0x0F);
4221       emit_opcode(cbuf,0x7E);
4222       emit_rm(cbuf, 0x3, $tmp$$reg, $dst$$reg);
4223     }
4224     { // PSRLQ $tmp,32
4225       emit_opcode(cbuf,0x66);
4226       emit_opcode(cbuf,0x0F);
4227       emit_opcode(cbuf,0x73);
4228       emit_rm(cbuf, 0x3, 0x02, $tmp$$reg);
4229       emit_d8(cbuf, 0x20);
4230     }
4231     { // MOVD $dst.hi,$tmp
4232       emit_opcode(cbuf,0x66);
4233       emit_opcode(cbuf,0x0F);
4234       emit_opcode(cbuf,0x7E);
4235       emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg));
4236     }
4237   %}
4238 
4239   // Volatile Store Long.  Must be atomic, so move it into
4240   // the FP TOS and then do a 64-bit FIST.  Has to probe the
4241   // target address before the store (for null-ptr checks)
4242   // so the memory operand is used twice in the encoding.
4243   enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{
4244     store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp );
4245     cbuf.set_insts_mark();            // Mark start of FIST in case $mem has an oop
4246     emit_opcode(cbuf,0xDF);
4247     int rm_byte_opcode = 0x07;
4248     int base     = $mem$$base;
4249     int index    = $mem$$index;
4250     int scale    = $mem$$scale;
4251     int displace = $mem$$disp;
4252     bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
4253     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_is_oop);
4254   %}
4255 
4256   enc_class enc_storeLX_volatile( memory mem, stackSlotL src, regXD tmp) %{
4257     { // Atomic long load
4258       // UseXmmLoadAndClearUpper ? movsd $tmp,[$src] : movlpd $tmp,[$src]
4259       emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
4260       emit_opcode(cbuf,0x0F);
4261       emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0x10 : 0x12);
4262       int base     = $src$$base;
4263       int index    = $src$$index;
4264       int scale    = $src$$scale;
4265       int displace = $src$$disp;
4266       bool disp_is_oop = $src->disp_is_oop(); // disp-as-oop when working with static globals
4267       encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
4268     }
4269     cbuf.set_insts_mark();            // Mark start of MOVSD in case $mem has an oop
4270     { // MOVSD $mem,$tmp ! atomic long store
4271       emit_opcode(cbuf,0xF2);
4272       emit_opcode(cbuf,0x0F);
4273       emit_opcode(cbuf,0x11);
4274       int base     = $mem$$base;
4275       int index    = $mem$$index;
4276       int scale    = $mem$$scale;
4277       int displace = $mem$$disp;
4278       bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
4279       encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
4280     }
4281   %}
4282 
4283   enc_class enc_storeLX_reg_volatile( memory mem, eRegL src, regXD tmp, regXD tmp2) %{
4284     { // MOVD $tmp,$src.lo
4285       emit_opcode(cbuf,0x66);
4286       emit_opcode(cbuf,0x0F);
4287       emit_opcode(cbuf,0x6E);
4288       emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
4289     }
4290     { // MOVD $tmp2,$src.hi
4291       emit_opcode(cbuf,0x66);
4292       emit_opcode(cbuf,0x0F);
4293       emit_opcode(cbuf,0x6E);
4294       emit_rm(cbuf, 0x3, $tmp2$$reg, HIGH_FROM_LOW($src$$reg));
4295     }
4296     { // PUNPCKLDQ $tmp,$tmp2
4297       emit_opcode(cbuf,0x66);
4298       emit_opcode(cbuf,0x0F);
4299       emit_opcode(cbuf,0x62);
4300       emit_rm(cbuf, 0x3, $tmp$$reg, $tmp2$$reg);
4301     }
4302     cbuf.set_insts_mark();            // Mark start of MOVSD in case $mem has an oop
4303     { // MOVSD $mem,$tmp ! atomic long store
4304       emit_opcode(cbuf,0xF2);
4305       emit_opcode(cbuf,0x0F);
4306       emit_opcode(cbuf,0x11);
4307       int base     = $mem$$base;
4308       int index    = $mem$$index;
4309       int scale    = $mem$$scale;
4310       int displace = $mem$$disp;
4311       bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
4312       encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
4313     }
4314   %}
4315 
4316   // Safepoint Poll.  This polls the safepoint page, and causes an
4317   // exception if it is not readable. Unfortunately, it kills the condition code
4318   // in the process
4319   // We current use TESTL [spp],EDI
4320   // A better choice might be TESTB [spp + pagesize() - CacheLineSize()],0
4321 
4322   enc_class Safepoint_Poll() %{
4323     cbuf.relocate(cbuf.insts_mark(), relocInfo::poll_type, 0);
4324     emit_opcode(cbuf,0x85);
4325     emit_rm (cbuf, 0x0, 0x7, 0x5);
4326     emit_d32(cbuf, (intptr_t)os::get_polling_page());
4327   %}
4328 %}
4329 
4330 
4331 //----------FRAME--------------------------------------------------------------
4332 // Definition of frame structure and management information.
4333 //
4334 //  S T A C K   L A Y O U T    Allocators stack-slot number
4335 //                             |   (to get allocators register number

6860 // then store it down to the stack and reload on the int
6861 // side.
6862 instruct loadL_volatile(stackSlotL dst, memory mem) %{
6863   predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access());
6864   match(Set dst (LoadL mem));
6865 
6866   ins_cost(200);
6867   format %{ "FILD   $mem\t# Atomic volatile long load\n\t"
6868             "FISTp  $dst" %}
6869   ins_encode(enc_loadL_volatile(mem,dst));
6870   ins_pipe( fpu_reg_mem );
6871 %}
6872 
6873 instruct loadLX_volatile(stackSlotL dst, memory mem, regXD tmp) %{
6874   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
6875   match(Set dst (LoadL mem));
6876   effect(TEMP tmp);
6877   ins_cost(180);
6878   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
6879             "MOVSD  $dst,$tmp" %}
6880   ins_encode(enc_loadLX_volatile(mem, dst, tmp));



6881   ins_pipe( pipe_slow );
6882 %}
6883 
6884 instruct loadLX_reg_volatile(eRegL dst, memory mem, regXD tmp) %{
6885   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
6886   match(Set dst (LoadL mem));
6887   effect(TEMP tmp);
6888   ins_cost(160);
6889   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
6890             "MOVD   $dst.lo,$tmp\n\t"
6891             "PSRLQ  $tmp,32\n\t"
6892             "MOVD   $dst.hi,$tmp" %}
6893   ins_encode(enc_loadLX_reg_volatile(mem, dst, tmp));





6894   ins_pipe( pipe_slow );
6895 %}
6896 
6897 // Load Range
6898 instruct loadRange(eRegI dst, memory mem) %{
6899   match(Set dst (LoadRange mem));
6900 
6901   ins_cost(125);
6902   format %{ "MOV    $dst,$mem" %}
6903   opcode(0x8B);
6904   ins_encode( OpcP, RegMem(dst,mem));
6905   ins_pipe( ialu_reg_mem );
6906 %}
6907 
6908 
6909 // Load Pointer
6910 instruct loadP(eRegP dst, memory mem) %{
6911   match(Set dst (LoadP mem));
6912 
6913   ins_cost(125);

6931 // Load Double
6932 instruct loadD(regD dst, memory mem) %{
6933   predicate(UseSSE<=1);
6934   match(Set dst (LoadD mem));
6935 
6936   ins_cost(150);
6937   format %{ "FLD_D  ST,$mem\n\t"
6938             "FSTP   $dst" %}
6939   opcode(0xDD);               /* DD /0 */
6940   ins_encode( OpcP, RMopc_Mem(0x00,mem),
6941               Pop_Reg_D(dst) );
6942   ins_pipe( fpu_reg_mem );
6943 %}
6944 
6945 // Load Double to XMM
6946 instruct loadXD(regXD dst, memory mem) %{
6947   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
6948   match(Set dst (LoadD mem));
6949   ins_cost(145);
6950   format %{ "MOVSD  $dst,$mem" %}
6951   ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x10), RegMem(dst,mem));


6952   ins_pipe( pipe_slow );
6953 %}
6954 
6955 instruct loadXD_partial(regXD dst, memory mem) %{
6956   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
6957   match(Set dst (LoadD mem));
6958   ins_cost(145);
6959   format %{ "MOVLPD $dst,$mem" %}
6960   ins_encode( Opcode(0x66), Opcode(0x0F), Opcode(0x12), RegMem(dst,mem));


6961   ins_pipe( pipe_slow );
6962 %}
6963 
6964 // Load to XMM register (single-precision floating point)
6965 // MOVSS instruction
6966 instruct loadX(regX dst, memory mem) %{
6967   predicate(UseSSE>=1);
6968   match(Set dst (LoadF mem));
6969   ins_cost(145);
6970   format %{ "MOVSS  $dst,$mem" %}
6971   ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x10), RegMem(dst,mem));


6972   ins_pipe( pipe_slow );
6973 %}
6974 
6975 // Load Float
6976 instruct loadF(regF dst, memory mem) %{
6977   predicate(UseSSE==0);
6978   match(Set dst (LoadF mem));
6979 
6980   ins_cost(150);
6981   format %{ "FLD_S  ST,$mem\n\t"
6982             "FSTP   $dst" %}
6983   opcode(0xD9);               /* D9 /0 */
6984   ins_encode( OpcP, RMopc_Mem(0x00,mem),
6985               Pop_Reg_F(dst) );
6986   ins_pipe( fpu_reg_mem );
6987 %}
6988 
6989 // Load Aligned Packed Byte to XMM register
6990 instruct loadA8B(regXD dst, memory mem) %{
6991   predicate(UseSSE>=1);
6992   match(Set dst (Load8B mem));
6993   ins_cost(125);
6994   format %{ "MOVQ  $dst,$mem\t! packed8B" %}
6995   ins_encode( movq_ld(dst, mem));


6996   ins_pipe( pipe_slow );
6997 %}
6998 
6999 // Load Aligned Packed Short to XMM register
7000 instruct loadA4S(regXD dst, memory mem) %{
7001   predicate(UseSSE>=1);
7002   match(Set dst (Load4S mem));
7003   ins_cost(125);
7004   format %{ "MOVQ  $dst,$mem\t! packed4S" %}
7005   ins_encode( movq_ld(dst, mem));


7006   ins_pipe( pipe_slow );
7007 %}
7008 
7009 // Load Aligned Packed Char to XMM register
7010 instruct loadA4C(regXD dst, memory mem) %{
7011   predicate(UseSSE>=1);
7012   match(Set dst (Load4C mem));
7013   ins_cost(125);
7014   format %{ "MOVQ  $dst,$mem\t! packed4C" %}
7015   ins_encode( movq_ld(dst, mem));


7016   ins_pipe( pipe_slow );
7017 %}
7018 
7019 // Load Aligned Packed Integer to XMM register
7020 instruct load2IU(regXD dst, memory mem) %{
7021   predicate(UseSSE>=1);
7022   match(Set dst (Load2I mem));
7023   ins_cost(125);
7024   format %{ "MOVQ  $dst,$mem\t! packed2I" %}
7025   ins_encode( movq_ld(dst, mem));


7026   ins_pipe( pipe_slow );
7027 %}
7028 
7029 // Load Aligned Packed Single to XMM
7030 instruct loadA2F(regXD dst, memory mem) %{
7031   predicate(UseSSE>=1);
7032   match(Set dst (Load2F mem));
7033   ins_cost(145);
7034   format %{ "MOVQ  $dst,$mem\t! packed2F" %}
7035   ins_encode( movq_ld(dst, mem));


7036   ins_pipe( pipe_slow );
7037 %}
7038 
7039 // Load Effective Address
7040 instruct leaP8(eRegP dst, indOffset8 mem) %{
7041   match(Set dst mem);
7042 
7043   ins_cost(110);
7044   format %{ "LEA    $dst,$mem" %}
7045   opcode(0x8D);
7046   ins_encode( OpcP, RegMem(dst,mem));
7047   ins_pipe( ialu_reg_reg_fat );
7048 %}
7049 
7050 instruct leaP32(eRegP dst, indOffset32 mem) %{
7051   match(Set dst mem);
7052 
7053   ins_cost(110);
7054   format %{ "LEA    $dst,$mem" %}
7055   opcode(0x8D);

7241   %}
7242   ins_pipe(fpu_reg_con);
7243 %}
7244 
7245 // The instruction usage is guarded by predicate in operand immXD().
7246 instruct loadConXD(regXD dst, immXD con) %{
7247   match(Set dst con);
7248   ins_cost(125);
7249   format %{ "MOVSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
7250   ins_encode %{
7251     __ movdbl($dst$$XMMRegister, $constantaddress($con));
7252   %}
7253   ins_pipe(pipe_slow);
7254 %}
7255 
7256 // The instruction usage is guarded by predicate in operand immXD0().
7257 instruct loadConXD0(regXD dst, immXD0 src) %{
7258   match(Set dst src);
7259   ins_cost(100);
7260   format %{ "XORPD  $dst,$dst\t# double 0.0" %}
7261   ins_encode( Opcode(0x66), Opcode(0x0F), Opcode(0x57), RegReg(dst,dst));


7262   ins_pipe( pipe_slow );
7263 %}
7264 
7265 // Load Stack Slot
7266 instruct loadSSI(eRegI dst, stackSlotI src) %{
7267   match(Set dst src);
7268   ins_cost(125);
7269 
7270   format %{ "MOV    $dst,$src" %}
7271   opcode(0x8B);
7272   ins_encode( OpcP, RegMem(dst,src));
7273   ins_pipe( ialu_reg_mem );
7274 %}
7275 
7276 instruct loadSSL(eRegL dst, stackSlotL src) %{
7277   match(Set dst src);
7278 
7279   ins_cost(200);
7280   format %{ "MOV    $dst,$src.lo\n\t"
7281             "MOV    $dst+4,$src.hi" %}

7543   predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access());
7544   match(Set mem (StoreL mem src));
7545   effect( KILL cr );
7546   ins_cost(400);
7547   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
7548             "FILD   $src\n\t"
7549             "FISTp  $mem\t # 64-bit atomic volatile long store" %}
7550   opcode(0x3B);
7551   ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src));
7552   ins_pipe( fpu_reg_mem );
7553 %}
7554 
7555 instruct storeLX_volatile(memory mem, stackSlotL src, regXD tmp, eFlagsReg cr) %{
7556   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
7557   match(Set mem (StoreL mem src));
7558   effect( TEMP tmp, KILL cr );
7559   ins_cost(380);
7560   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
7561             "MOVSD  $tmp,$src\n\t"
7562             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
7563   opcode(0x3B);
7564   ins_encode( OpcP, RegMem( EAX, mem ), enc_storeLX_volatile(mem, src, tmp));



7565   ins_pipe( pipe_slow );
7566 %}
7567 
7568 instruct storeLX_reg_volatile(memory mem, eRegL src, regXD tmp2, regXD tmp, eFlagsReg cr) %{
7569   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
7570   match(Set mem (StoreL mem src));
7571   effect( TEMP tmp2 , TEMP tmp, KILL cr );
7572   ins_cost(360);
7573   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
7574             "MOVD   $tmp,$src.lo\n\t"
7575             "MOVD   $tmp2,$src.hi\n\t"
7576             "PUNPCKLDQ $tmp,$tmp2\n\t"
7577             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
7578   opcode(0x3B);
7579   ins_encode( OpcP, RegMem( EAX, mem ), enc_storeLX_reg_volatile(mem, src, tmp, tmp2));





7580   ins_pipe( pipe_slow );
7581 %}
7582 
7583 // Store Pointer; for storing unknown oops and raw pointers
7584 instruct storeP(memory mem, anyRegP src) %{
7585   match(Set mem (StoreP mem src));
7586 
7587   ins_cost(125);
7588   format %{ "MOV    $mem,$src" %}
7589   opcode(0x89);
7590   ins_encode( OpcP, RegMem( src, mem ) );
7591   ins_pipe( ialu_mem_reg );
7592 %}
7593 
7594 // Store Integer Immediate
7595 instruct storeImmI(memory mem, immI src) %{
7596   match(Set mem (StoreI mem src));
7597 
7598   ins_cost(150);
7599   format %{ "MOV    $mem,$src" %}

7626   ins_pipe( ialu_mem_imm );
7627 %}
7628 
7629 // Store Byte Immediate
7630 instruct storeImmB(memory mem, immI8 src) %{
7631   match(Set mem (StoreB mem src));
7632 
7633   ins_cost(150);
7634   format %{ "MOV8   $mem,$src" %}
7635   opcode(0xC6);               /* C6 /0 */
7636   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
7637   ins_pipe( ialu_mem_imm );
7638 %}
7639 
7640 // Store Aligned Packed Byte XMM register to memory
7641 instruct storeA8B(memory mem, regXD src) %{
7642   predicate(UseSSE>=1);
7643   match(Set mem (Store8B mem src));
7644   ins_cost(145);
7645   format %{ "MOVQ  $mem,$src\t! packed8B" %}
7646   ins_encode( movq_st(mem, src));


7647   ins_pipe( pipe_slow );
7648 %}
7649 
7650 // Store Aligned Packed Char/Short XMM register to memory
7651 instruct storeA4C(memory mem, regXD src) %{
7652   predicate(UseSSE>=1);
7653   match(Set mem (Store4C mem src));
7654   ins_cost(145);
7655   format %{ "MOVQ  $mem,$src\t! packed4C" %}
7656   ins_encode( movq_st(mem, src));


7657   ins_pipe( pipe_slow );
7658 %}
7659 
7660 // Store Aligned Packed Integer XMM register to memory
7661 instruct storeA2I(memory mem, regXD src) %{
7662   predicate(UseSSE>=1);
7663   match(Set mem (Store2I mem src));
7664   ins_cost(145);
7665   format %{ "MOVQ  $mem,$src\t! packed2I" %}
7666   ins_encode( movq_st(mem, src));


7667   ins_pipe( pipe_slow );
7668 %}
7669 
7670 // Store CMS card-mark Immediate
7671 instruct storeImmCM(memory mem, immI8 src) %{
7672   match(Set mem (StoreCM mem src));
7673 
7674   ins_cost(150);
7675   format %{ "MOV8   $mem,$src\t! CMS card-mark imm0" %}
7676   opcode(0xC6);               /* C6 /0 */
7677   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
7678   ins_pipe( ialu_mem_imm );
7679 %}
7680 
7681 // Store Double
7682 instruct storeD( memory mem, regDPR1 src) %{
7683   predicate(UseSSE<=1);
7684   match(Set mem (StoreD mem src));
7685 
7686   ins_cost(100);

7692 
7693 // Store double does rounding on x86
7694 instruct storeD_rounded( memory mem, regDPR1 src) %{
7695   predicate(UseSSE<=1);
7696   match(Set mem (StoreD mem (RoundDouble src)));
7697 
7698   ins_cost(100);
7699   format %{ "FST_D  $mem,$src\t# round" %}
7700   opcode(0xDD);       /* DD /2 */
7701   ins_encode( enc_FP_store(mem,src) );
7702   ins_pipe( fpu_mem_reg );
7703 %}
7704 
7705 // Store XMM register to memory (double-precision floating points)
7706 // MOVSD instruction
7707 instruct storeXD(memory mem, regXD src) %{
7708   predicate(UseSSE>=2);
7709   match(Set mem (StoreD mem src));
7710   ins_cost(95);
7711   format %{ "MOVSD  $mem,$src" %}
7712   ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x11), RegMem(src, mem));


7713   ins_pipe( pipe_slow );
7714 %}
7715 
7716 // Store XMM register to memory (single-precision floating point)
7717 // MOVSS instruction
7718 instruct storeX(memory mem, regX src) %{
7719   predicate(UseSSE>=1);
7720   match(Set mem (StoreF mem src));
7721   ins_cost(95);
7722   format %{ "MOVSS  $mem,$src" %}
7723   ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x11), RegMem(src, mem));


7724   ins_pipe( pipe_slow );
7725 %}
7726 
7727 // Store Aligned Packed Single Float XMM register to memory
7728 instruct storeA2F(memory mem, regXD src) %{
7729   predicate(UseSSE>=1);
7730   match(Set mem (Store2F mem src));
7731   ins_cost(145);
7732   format %{ "MOVQ  $mem,$src\t! packed2F" %}
7733   ins_encode( movq_st(mem, src));


7734   ins_pipe( pipe_slow );
7735 %}
7736 
7737 // Store Float
7738 instruct storeF( memory mem, regFPR1 src) %{
7739   predicate(UseSSE==0);
7740   match(Set mem (StoreF mem src));
7741 
7742   ins_cost(100);
7743   format %{ "FST_S  $mem,$src" %}
7744   opcode(0xD9);       /* D9 /2 */
7745   ins_encode( enc_FP_store(mem,src) );
7746   ins_pipe( fpu_mem_reg );
7747 %}
7748 
7749 // Store Float does rounding on x86
7750 instruct storeF_rounded( memory mem, regFPR1 src) %{
7751   predicate(UseSSE==0);
7752   match(Set mem (StoreF mem (RoundFloat src)));
7753

8423   match(Set dst (CastII dst));
8424   format %{ "#castII of $dst" %}
8425   ins_encode( /*empty encoding*/ );
8426   ins_cost(0);
8427   ins_pipe( empty );
8428 %}
8429 
8430 
8431 // Load-locked - same as a regular pointer load when used with compare-swap
8432 instruct loadPLocked(eRegP dst, memory mem) %{
8433   match(Set dst (LoadPLocked mem));
8434 
8435   ins_cost(125);
8436   format %{ "MOV    $dst,$mem\t# Load ptr. locked" %}
8437   opcode(0x8B);
8438   ins_encode( OpcP, RegMem(dst,mem));
8439   ins_pipe( ialu_reg_mem );
8440 %}
8441 
8442 // LoadLong-locked - same as a volatile long load when used with compare-swap
8443 instruct loadLLocked(stackSlotL dst, load_long_memory mem) %{
8444   predicate(UseSSE<=1);
8445   match(Set dst (LoadLLocked mem));
8446 
8447   ins_cost(200);
8448   format %{ "FILD   $mem\t# Atomic volatile long load\n\t"
8449             "FISTp  $dst" %}
8450   ins_encode(enc_loadL_volatile(mem,dst));
8451   ins_pipe( fpu_reg_mem );
8452 %}
8453 
8454 instruct loadLX_Locked(stackSlotL dst, load_long_memory mem, regXD tmp) %{
8455   predicate(UseSSE>=2);
8456   match(Set dst (LoadLLocked mem));
8457   effect(TEMP tmp);
8458   ins_cost(180);
8459   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
8460             "MOVSD  $dst,$tmp" %}
8461   ins_encode(enc_loadLX_volatile(mem, dst, tmp));



8462   ins_pipe( pipe_slow );
8463 %}
8464 
8465 instruct loadLX_reg_Locked(eRegL dst, load_long_memory mem, regXD tmp) %{
8466   predicate(UseSSE>=2);
8467   match(Set dst (LoadLLocked mem));
8468   effect(TEMP tmp);
8469   ins_cost(160);
8470   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
8471             "MOVD   $dst.lo,$tmp\n\t"
8472             "PSRLQ  $tmp,32\n\t"
8473             "MOVD   $dst.hi,$tmp" %}
8474   ins_encode(enc_loadLX_reg_volatile(mem, dst, tmp));





8475   ins_pipe( pipe_slow );
8476 %}
8477 
8478 // Conditional-store of the updated heap-top.
8479 // Used during allocation of the shared heap.
8480 // Sets flags (EQ) on success.  Implemented with a CMPXCHG on Intel.
8481 instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{
8482   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
8483   // EAX is killed if there is contention, but then it's also unused.
8484   // In the common case of no contention, EAX holds the new oop address.
8485   format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %}
8486   ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) );
8487   ins_pipe( pipe_cmpxchg );
8488 %}
8489 
8490 // Conditional-store of an int value.
8491 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG on Intel.
8492 instruct storeIConditional( memory mem, eAXRegI oldval, eRegI newval, eFlagsReg cr ) %{
8493   match(Set cr (StoreIConditional mem (Binary oldval newval)));
8494   effect(KILL oldval);

10116               OpcS, OpcP, PopFPU,
10117               CmpF_Result(dst));
10118   ins_pipe( pipe_slow );
10119 %}
10120 
10121 // Compare into -1,0,1
10122 instruct cmpD_reg(eRegI dst, regD src1, regD src2, eAXRegI rax, eFlagsReg cr) %{
10123   predicate(UseSSE<=1);
10124   match(Set dst (CmpD3 src1 src2));
10125   effect(KILL cr, KILL rax);
10126   ins_cost(300);
10127   format %{ "FCMPD  $dst,$src1,$src2" %}
10128   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10129   ins_encode( Push_Reg_D(src1),
10130               OpcP, RegOpc(src2),
10131               CmpF_Result(dst));
10132   ins_pipe( pipe_slow );
10133 %}
10134 
10135 // float compare and set condition codes in EFLAGS by XMM regs
10136 instruct cmpXD_cc(eFlagsRegU cr, regXD dst, regXD src, eAXRegI rax) %{
10137   predicate(UseSSE>=2);
10138   match(Set cr (CmpD dst src));
10139   effect(KILL rax);
10140   ins_cost(125);
10141   format %{ "COMISD $dst,$src\n"
10142           "\tJNP    exit\n"
10143           "\tMOV    ah,1       // saw a NaN, set CF\n"
10144           "\tSAHF\n"
10145      "exit:\tNOP               // avoid branch to branch" %}
10146   opcode(0x66, 0x0F, 0x2F);
10147   ins_encode(OpcP, OpcS, Opcode(tertiary), RegReg(dst, src), cmpF_P6_fixup);


10148   ins_pipe( pipe_slow );
10149 %}
10150 
10151 instruct cmpXD_ccCF(eFlagsRegUCF cr, regXD dst, regXD src) %{
10152   predicate(UseSSE>=2);
10153   match(Set cr (CmpD dst src));
10154   ins_cost(100);
10155   format %{ "COMISD $dst,$src" %}
10156   opcode(0x66, 0x0F, 0x2F);
10157   ins_encode(OpcP, OpcS, Opcode(tertiary), RegReg(dst, src));

10158   ins_pipe( pipe_slow );
10159 %}
10160 
10161 // float compare and set condition codes in EFLAGS by XMM regs
10162 instruct cmpXD_ccmem(eFlagsRegU cr, regXD dst, memory src, eAXRegI rax) %{
10163   predicate(UseSSE>=2);
10164   match(Set cr (CmpD dst (LoadD src)));
10165   effect(KILL rax);
10166   ins_cost(145);
10167   format %{ "COMISD $dst,$src\n"
10168           "\tJNP    exit\n"
10169           "\tMOV    ah,1       // saw a NaN, set CF\n"
10170           "\tSAHF\n"
10171      "exit:\tNOP               // avoid branch to branch" %}
10172   opcode(0x66, 0x0F, 0x2F);
10173   ins_encode(OpcP, OpcS, Opcode(tertiary), RegMem(dst, src), cmpF_P6_fixup);



10174   ins_pipe( pipe_slow );
10175 %}
10176 
10177 instruct cmpXD_ccmemCF(eFlagsRegUCF cr, regXD dst, memory src) %{
10178   predicate(UseSSE>=2);
10179   match(Set cr (CmpD dst (LoadD src)));
10180   ins_cost(100);
10181   format %{ "COMISD $dst,$src" %}
10182   opcode(0x66, 0x0F, 0x2F);
10183   ins_encode(OpcP, OpcS, Opcode(tertiary), RegMem(dst, src));

10184   ins_pipe( pipe_slow );
10185 %}
10186 
10187 // Compare into -1,0,1 in XMM
10188 instruct cmpXD_reg(eRegI dst, regXD src1, regXD src2, eFlagsReg cr) %{
10189   predicate(UseSSE>=2);
10190   match(Set dst (CmpD3 src1 src2));
10191   effect(KILL cr);
10192   ins_cost(255);
10193   format %{ "XOR    $dst,$dst\n"
10194           "\tCOMISD $src1,$src2\n"
10195           "\tJP,s   nan\n"
10196           "\tJEQ,s  exit\n"
10197           "\tJA,s   inc\n"
10198       "nan:\tDEC    $dst\n"
10199           "\tJMP,s  exit\n"
10200       "inc:\tINC    $dst\n"
10201       "exit:"

10202                 %}
10203   opcode(0x66, 0x0F, 0x2F);
10204   ins_encode(Xor_Reg(dst), OpcP, OpcS, Opcode(tertiary), RegReg(src1, src2),
10205              CmpX_Result(dst));
10206   ins_pipe( pipe_slow );
10207 %}
10208 
10209 // Compare into -1,0,1 in XMM and memory
10210 instruct cmpXD_regmem(eRegI dst, regXD src1, memory mem, eFlagsReg cr) %{
10211   predicate(UseSSE>=2);
10212   match(Set dst (CmpD3 src1 (LoadD mem)));
10213   effect(KILL cr);
10214   ins_cost(275);
10215   format %{ "COMISD $src1,$mem\n"
10216           "\tMOV    $dst,0\t\t# do not blow flags\n"
10217           "\tJP,s   nan\n"
10218           "\tJEQ,s  exit\n"
10219           "\tJA,s   inc\n"
10220       "nan:\tDEC    $dst\n"
10221           "\tJMP,s  exit\n"
10222       "inc:\tINC    $dst\n"
10223       "exit:"

10224                 %}
10225   opcode(0x66, 0x0F, 0x2F);
10226   ins_encode(OpcP, OpcS, Opcode(tertiary), RegMem(src1, mem),
10227              LdImmI(dst,0x0), CmpX_Result(dst));
10228   ins_pipe( pipe_slow );
10229 %}
10230 
10231 
10232 instruct subD_reg(regD dst, regD src) %{
10233   predicate (UseSSE <=1);
10234   match(Set dst (SubD dst src));
10235 
10236   format %{ "FLD    $src\n\t"
10237             "DSUBp  $dst,ST" %}
10238   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
10239   ins_cost(150);
10240   ins_encode( Push_Reg_D(src),
10241               OpcP, RegOpc(dst) );
10242   ins_pipe( fpu_reg_reg );
10243 %}
10244 
10245 instruct subD_reg_round(stackSlotD dst, regD src1, regD src2) %{
10246   predicate (UseSSE <=1);
10247   match(Set dst (RoundDouble (SubD src1 src2)));

10266             "DSUBp  $dst,ST" %}
10267   opcode(0xDE, 0x5, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
10268   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
10269               OpcP, RegOpc(dst) );
10270   ins_pipe( fpu_reg_mem );
10271 %}
10272 
10273 instruct absD_reg(regDPR1 dst, regDPR1 src) %{
10274   predicate (UseSSE<=1);
10275   match(Set dst (AbsD src));
10276   ins_cost(100);
10277   format %{ "FABS" %}
10278   opcode(0xE1, 0xD9);
10279   ins_encode( OpcS, OpcP );
10280   ins_pipe( fpu_reg_reg );
10281 %}
10282 
10283 instruct absXD_reg( regXD dst ) %{
10284   predicate(UseSSE>=2);
10285   match(Set dst (AbsD dst));

10286   format %{ "ANDPD  $dst,[0x7FFFFFFFFFFFFFFF]\t# ABS D by sign masking" %}
10287   ins_encode( AbsXD_encoding(dst));



10288   ins_pipe( pipe_slow );
10289 %}
10290 
10291 instruct negD_reg(regDPR1 dst, regDPR1 src) %{
10292   predicate(UseSSE<=1);
10293   match(Set dst (NegD src));
10294   ins_cost(100);
10295   format %{ "FCHS" %}
10296   opcode(0xE0, 0xD9);
10297   ins_encode( OpcS, OpcP );
10298   ins_pipe( fpu_reg_reg );
10299 %}
10300 
10301 instruct negXD_reg( regXD dst ) %{
10302   predicate(UseSSE>=2);
10303   match(Set dst (NegD dst));

10304   format %{ "XORPD  $dst,[0x8000000000000000]\t# CHS D by sign flipping" %}
10305   ins_encode %{
10306      __ xorpd($dst$$XMMRegister,
10307               ExternalAddress((address)double_signflip_pool));
10308   %}
10309   ins_pipe( pipe_slow );
10310 %}
10311 
10312 instruct addD_reg(regD dst, regD src) %{
10313   predicate(UseSSE<=1);
10314   match(Set dst (AddD dst src));
10315   format %{ "FLD    $src\n\t"
10316             "DADD   $dst,ST" %}
10317   size(4);
10318   ins_cost(150);
10319   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
10320   ins_encode( Push_Reg_D(src),
10321               OpcP, RegOpc(dst) );
10322   ins_pipe( fpu_reg_reg );
10323 %}

10397 instruct addD_reg_imm_round(stackSlotD dst, regD src, immD con) %{
10398   predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 );
10399   match(Set dst (RoundDouble (AddD src con)));
10400   ins_cost(200);
10401   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
10402             "DADD   ST,$src\n\t"
10403             "FSTP_D $dst\t# D-round" %}
10404   ins_encode %{
10405     __ fld_d($constantaddress($con));
10406     __ fadd($src$$reg);
10407     __ fstp_d(Address(rsp, $dst$$disp));
10408   %}
10409   ins_pipe(fpu_mem_reg_con);
10410 %}
10411 
10412 // Add two double precision floating point values in xmm
10413 instruct addXD_reg(regXD dst, regXD src) %{
10414   predicate(UseSSE>=2);
10415   match(Set dst (AddD dst src));
10416   format %{ "ADDSD  $dst,$src" %}
10417   ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x58), RegReg(dst, src));


10418   ins_pipe( pipe_slow );
10419 %}
10420 
10421 instruct addXD_imm(regXD dst, immXD con) %{
10422   predicate(UseSSE>=2);
10423   match(Set dst (AddD dst con));
10424   format %{ "ADDSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
10425   ins_encode %{
10426     __ addsd($dst$$XMMRegister, $constantaddress($con));
10427   %}
10428   ins_pipe(pipe_slow);
10429 %}
10430 
10431 instruct addXD_mem(regXD dst, memory mem) %{
10432   predicate(UseSSE>=2);
10433   match(Set dst (AddD dst (LoadD mem)));
10434   format %{ "ADDSD  $dst,$mem" %}
10435   ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x58), RegMem(dst,mem));


10436   ins_pipe( pipe_slow );
10437 %}
10438 
10439 // Sub two double precision floating point values in xmm
10440 instruct subXD_reg(regXD dst, regXD src) %{
10441   predicate(UseSSE>=2);
10442   match(Set dst (SubD dst src));

10443   format %{ "SUBSD  $dst,$src" %}
10444   ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5C), RegReg(dst, src));


10445   ins_pipe( pipe_slow );
10446 %}
10447 
10448 instruct subXD_imm(regXD dst, immXD con) %{
10449   predicate(UseSSE>=2);
10450   match(Set dst (SubD dst con));

10451   format %{ "SUBSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
10452   ins_encode %{
10453     __ subsd($dst$$XMMRegister, $constantaddress($con));
10454   %}
10455   ins_pipe(pipe_slow);
10456 %}
10457 
10458 instruct subXD_mem(regXD dst, memory mem) %{
10459   predicate(UseSSE>=2);
10460   match(Set dst (SubD dst (LoadD mem)));

10461   format %{ "SUBSD  $dst,$mem" %}
10462   ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5C), RegMem(dst,mem));


10463   ins_pipe( pipe_slow );
10464 %}
10465 
10466 // Mul two double precision floating point values in xmm
10467 instruct mulXD_reg(regXD dst, regXD src) %{
10468   predicate(UseSSE>=2);
10469   match(Set dst (MulD dst src));
10470   format %{ "MULSD  $dst,$src" %}
10471   ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x59), RegReg(dst, src));


10472   ins_pipe( pipe_slow );
10473 %}
10474 
10475 instruct mulXD_imm(regXD dst, immXD con) %{
10476   predicate(UseSSE>=2);
10477   match(Set dst (MulD dst con));
10478   format %{ "MULSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
10479   ins_encode %{
10480     __ mulsd($dst$$XMMRegister, $constantaddress($con));
10481   %}
10482   ins_pipe(pipe_slow);
10483 %}
10484 
10485 instruct mulXD_mem(regXD dst, memory mem) %{
10486   predicate(UseSSE>=2);
10487   match(Set dst (MulD dst (LoadD mem)));
10488   format %{ "MULSD  $dst,$mem" %}
10489   ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x59), RegMem(dst,mem));


10490   ins_pipe( pipe_slow );
10491 %}
10492 
10493 // Div two double precision floating point values in xmm
10494 instruct divXD_reg(regXD dst, regXD src) %{
10495   predicate(UseSSE>=2);
10496   match(Set dst (DivD dst src));
10497   format %{ "DIVSD  $dst,$src" %}
10498   opcode(0xF2, 0x0F, 0x5E);
10499   ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5E), RegReg(dst, src));


10500   ins_pipe( pipe_slow );
10501 %}
10502 
10503 instruct divXD_imm(regXD dst, immXD con) %{
10504   predicate(UseSSE>=2);
10505   match(Set dst (DivD dst con));
10506   format %{ "DIVSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
10507   ins_encode %{
10508     __ divsd($dst$$XMMRegister, $constantaddress($con));
10509   %}
10510   ins_pipe(pipe_slow);
10511 %}
10512 
10513 instruct divXD_mem(regXD dst, memory mem) %{
10514   predicate(UseSSE>=2);
10515   match(Set dst (DivD dst (LoadD mem)));
10516   format %{ "DIVSD  $dst,$mem" %}
10517   ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5E), RegMem(dst,mem));


10518   ins_pipe( pipe_slow );
10519 %}
10520 
10521 
10522 instruct mulD_reg(regD dst, regD src) %{
10523   predicate(UseSSE<=1);
10524   match(Set dst (MulD dst src));
10525   format %{ "FLD    $src\n\t"
10526             "DMULp  $dst,ST" %}
10527   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
10528   ins_cost(150);
10529   ins_encode( Push_Reg_D(src),
10530               OpcP, RegOpc(dst) );
10531   ins_pipe( fpu_reg_reg );
10532 %}
10533 
10534 // Strict FP instruction biases argument before multiply then
10535 // biases result to avoid double rounding of subnormals.
10536 //
10537 // scale arg1 by multiplying arg1 by 2^(-15360)

11129               OpcS, OpcP, PopFPU,
11130               CmpF_Result(dst));
11131   ins_pipe( pipe_slow );
11132 %}
11133 
11134 // Compare into -1,0,1
11135 instruct cmpF_reg(eRegI dst, regF src1, regF src2, eAXRegI rax, eFlagsReg cr) %{
11136   predicate(UseSSE == 0);
11137   match(Set dst (CmpF3 src1 src2));
11138   effect(KILL cr, KILL rax);
11139   ins_cost(300);
11140   format %{ "FCMPF  $dst,$src1,$src2" %}
11141   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
11142   ins_encode( Push_Reg_D(src1),
11143               OpcP, RegOpc(src2),
11144               CmpF_Result(dst));
11145   ins_pipe( pipe_slow );
11146 %}
11147 
11148 // float compare and set condition codes in EFLAGS by XMM regs
11149 instruct cmpX_cc(eFlagsRegU cr, regX dst, regX src, eAXRegI rax) %{
11150   predicate(UseSSE>=1);
11151   match(Set cr (CmpF dst src));
11152   effect(KILL rax);
11153   ins_cost(145);
11154   format %{ "COMISS $dst,$src\n"
11155           "\tJNP    exit\n"
11156           "\tMOV    ah,1       // saw a NaN, set CF\n"
11157           "\tSAHF\n"
11158      "exit:\tNOP               // avoid branch to branch" %}
11159   opcode(0x0F, 0x2F);
11160   ins_encode(OpcP, OpcS, RegReg(dst, src), cmpF_P6_fixup);



11161   ins_pipe( pipe_slow );
11162 %}
11163 
11164 instruct cmpX_ccCF(eFlagsRegUCF cr, regX dst, regX src) %{
11165   predicate(UseSSE>=1);
11166   match(Set cr (CmpF dst src));
11167   ins_cost(100);
11168   format %{ "COMISS $dst,$src" %}
11169   opcode(0x0F, 0x2F);
11170   ins_encode(OpcP, OpcS, RegReg(dst, src));

11171   ins_pipe( pipe_slow );
11172 %}
11173 
11174 // float compare and set condition codes in EFLAGS by XMM regs
11175 instruct cmpX_ccmem(eFlagsRegU cr, regX dst, memory src, eAXRegI rax) %{
11176   predicate(UseSSE>=1);
11177   match(Set cr (CmpF dst (LoadF src)));
11178   effect(KILL rax);
11179   ins_cost(165);
11180   format %{ "COMISS $dst,$src\n"
11181           "\tJNP    exit\n"
11182           "\tMOV    ah,1       // saw a NaN, set CF\n"
11183           "\tSAHF\n"
11184      "exit:\tNOP               // avoid branch to branch" %}
11185   opcode(0x0F, 0x2F);
11186   ins_encode(OpcP, OpcS, RegMem(dst, src), cmpF_P6_fixup);



11187   ins_pipe( pipe_slow );
11188 %}
11189 
11190 instruct cmpX_ccmemCF(eFlagsRegUCF cr, regX dst, memory src) %{
11191   predicate(UseSSE>=1);
11192   match(Set cr (CmpF dst (LoadF src)));
11193   ins_cost(100);
11194   format %{ "COMISS $dst,$src" %}
11195   opcode(0x0F, 0x2F);
11196   ins_encode(OpcP, OpcS, RegMem(dst, src));

11197   ins_pipe( pipe_slow );
11198 %}
11199 
11200 // Compare into -1,0,1 in XMM
11201 instruct cmpX_reg(eRegI dst, regX src1, regX src2, eFlagsReg cr) %{
11202   predicate(UseSSE>=1);
11203   match(Set dst (CmpF3 src1 src2));
11204   effect(KILL cr);
11205   ins_cost(255);
11206   format %{ "XOR    $dst,$dst\n"
11207           "\tCOMISS $src1,$src2\n"
11208           "\tJP,s   nan\n"
11209           "\tJEQ,s  exit\n"
11210           "\tJA,s   inc\n"
11211       "nan:\tDEC    $dst\n"
11212           "\tJMP,s  exit\n"
11213       "inc:\tINC    $dst\n"
11214       "exit:"

11215                 %}
11216   opcode(0x0F, 0x2F);
11217   ins_encode(Xor_Reg(dst), OpcP, OpcS, RegReg(src1, src2), CmpX_Result(dst));
11218   ins_pipe( pipe_slow );
11219 %}
11220 
11221 // Compare into -1,0,1 in XMM and memory
11222 instruct cmpX_regmem(eRegI dst, regX src1, memory mem, eFlagsReg cr) %{
11223   predicate(UseSSE>=1);
11224   match(Set dst (CmpF3 src1 (LoadF mem)));
11225   effect(KILL cr);
11226   ins_cost(275);
11227   format %{ "COMISS $src1,$mem\n"
11228           "\tMOV    $dst,0\t\t# do not blow flags\n"
11229           "\tJP,s   nan\n"
11230           "\tJEQ,s  exit\n"
11231           "\tJA,s   inc\n"
11232       "nan:\tDEC    $dst\n"
11233           "\tJMP,s  exit\n"
11234       "inc:\tINC    $dst\n"
11235       "exit:"

11236                 %}
11237   opcode(0x0F, 0x2F);
11238   ins_encode(OpcP, OpcS, RegMem(src1, mem), LdImmI(dst,0x0), CmpX_Result(dst));
11239   ins_pipe( pipe_slow );
11240 %}
11241 
11242 // Spill to obtain 24-bit precision
11243 instruct subF24_reg(stackSlotF dst, regF src1, regF src2) %{
11244   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11245   match(Set dst (SubF src1 src2));
11246 
11247   format %{ "FSUB   $dst,$src1 - $src2" %}
11248   opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */
11249   ins_encode( Push_Reg_F(src1),
11250               OpcReg_F(src2),
11251               Pop_Mem_F(dst) );
11252   ins_pipe( fpu_mem_reg_reg );
11253 %}
11254 //
11255 // This instruction does not round to 24-bits
11256 instruct subF_reg(regF dst, regF src) %{
11257   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11258   match(Set dst (SubF dst src));

11278 %}
11279 //
11280 // This instruction does not round to 24-bits
11281 instruct addF_reg(regF dst, regF src) %{
11282   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11283   match(Set dst (AddF dst src));
11284 
11285   format %{ "FLD    $src\n\t"
11286             "FADDp  $dst,ST" %}
11287   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
11288   ins_encode( Push_Reg_F(src),
11289               OpcP, RegOpc(dst) );
11290   ins_pipe( fpu_reg_reg );
11291 %}
11292 
11293 // Add two single precision floating point values in xmm
11294 instruct addX_reg(regX dst, regX src) %{
11295   predicate(UseSSE>=1);
11296   match(Set dst (AddF dst src));
11297   format %{ "ADDSS  $dst,$src" %}
11298   ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x58), RegReg(dst, src));


11299   ins_pipe( pipe_slow );
11300 %}
11301 
11302 instruct addX_imm(regX dst, immXF con) %{
11303   predicate(UseSSE>=1);
11304   match(Set dst (AddF dst con));
11305   format %{ "ADDSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
11306   ins_encode %{
11307     __ addss($dst$$XMMRegister, $constantaddress($con));
11308   %}
11309   ins_pipe(pipe_slow);
11310 %}
11311 
11312 instruct addX_mem(regX dst, memory mem) %{
11313   predicate(UseSSE>=1);
11314   match(Set dst (AddF dst (LoadF mem)));
11315   format %{ "ADDSS  $dst,$mem" %}
11316   ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x58), RegMem(dst, mem));


11317   ins_pipe( pipe_slow );
11318 %}
11319 
11320 // Subtract two single precision floating point values in xmm
11321 instruct subX_reg(regX dst, regX src) %{
11322   predicate(UseSSE>=1);
11323   match(Set dst (SubF dst src));

11324   format %{ "SUBSS  $dst,$src" %}
11325   ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5C), RegReg(dst, src));


11326   ins_pipe( pipe_slow );
11327 %}
11328 
11329 instruct subX_imm(regX dst, immXF con) %{
11330   predicate(UseSSE>=1);
11331   match(Set dst (SubF dst con));

11332   format %{ "SUBSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
11333   ins_encode %{
11334     __ subss($dst$$XMMRegister, $constantaddress($con));
11335   %}
11336   ins_pipe(pipe_slow);
11337 %}
11338 
11339 instruct subX_mem(regX dst, memory mem) %{
11340   predicate(UseSSE>=1);
11341   match(Set dst (SubF dst (LoadF mem)));

11342   format %{ "SUBSS  $dst,$mem" %}
11343   ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5C), RegMem(dst,mem));


11344   ins_pipe( pipe_slow );
11345 %}
11346 
11347 // Multiply two single precision floating point values in xmm
11348 instruct mulX_reg(regX dst, regX src) %{
11349   predicate(UseSSE>=1);
11350   match(Set dst (MulF dst src));
11351   format %{ "MULSS  $dst,$src" %}
11352   ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x59), RegReg(dst, src));


11353   ins_pipe( pipe_slow );
11354 %}
11355 
11356 instruct mulX_imm(regX dst, immXF con) %{
11357   predicate(UseSSE>=1);
11358   match(Set dst (MulF dst con));
11359   format %{ "MULSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
11360   ins_encode %{
11361     __ mulss($dst$$XMMRegister, $constantaddress($con));
11362   %}
11363   ins_pipe(pipe_slow);
11364 %}
11365 
11366 instruct mulX_mem(regX dst, memory mem) %{
11367   predicate(UseSSE>=1);
11368   match(Set dst (MulF dst (LoadF mem)));
11369   format %{ "MULSS  $dst,$mem" %}
11370   ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x59), RegMem(dst,mem));


11371   ins_pipe( pipe_slow );
11372 %}
11373 
11374 // Divide two single precision floating point values in xmm
11375 instruct divX_reg(regX dst, regX src) %{
11376   predicate(UseSSE>=1);
11377   match(Set dst (DivF dst src));
11378   format %{ "DIVSS  $dst,$src" %}
11379   ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5E), RegReg(dst, src));


11380   ins_pipe( pipe_slow );
11381 %}
11382 
11383 instruct divX_imm(regX dst, immXF con) %{
11384   predicate(UseSSE>=1);
11385   match(Set dst (DivF dst con));
11386   format %{ "DIVSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
11387   ins_encode %{
11388     __ divss($dst$$XMMRegister, $constantaddress($con));
11389   %}
11390   ins_pipe(pipe_slow);
11391 %}
11392 
11393 instruct divX_mem(regX dst, memory mem) %{
11394   predicate(UseSSE>=1);
11395   match(Set dst (DivF dst (LoadF mem)));
11396   format %{ "DIVSS  $dst,$mem" %}
11397   ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5E), RegMem(dst,mem));


11398   ins_pipe( pipe_slow );
11399 %}
11400 
11401 // Get the square root of a single precision floating point values in xmm
11402 instruct sqrtX_reg(regX dst, regX src) %{
11403   predicate(UseSSE>=1);
11404   match(Set dst (ConvD2F (SqrtD (ConvF2D src))));

11405   format %{ "SQRTSS $dst,$src" %}
11406   ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x51), RegReg(dst, src));


11407   ins_pipe( pipe_slow );
11408 %}
11409 
11410 instruct sqrtX_mem(regX dst, memory mem) %{
11411   predicate(UseSSE>=1);
11412   match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF mem)))));

11413   format %{ "SQRTSS $dst,$mem" %}
11414   ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x51), RegMem(dst, mem));


11415   ins_pipe( pipe_slow );
11416 %}
11417 
11418 // Get the square root of a double precision floating point values in xmm
11419 instruct sqrtXD_reg(regXD dst, regXD src) %{
11420   predicate(UseSSE>=2);
11421   match(Set dst (SqrtD src));

11422   format %{ "SQRTSD $dst,$src" %}
11423   ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x51), RegReg(dst, src));


11424   ins_pipe( pipe_slow );
11425 %}
11426 
11427 instruct sqrtXD_mem(regXD dst, memory mem) %{
11428   predicate(UseSSE>=2);
11429   match(Set dst (SqrtD (LoadD mem)));

11430   format %{ "SQRTSD $dst,$mem" %}
11431   ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x51), RegMem(dst, mem));


11432   ins_pipe( pipe_slow );
11433 %}
11434 
11435 instruct absF_reg(regFPR1 dst, regFPR1 src) %{
11436   predicate(UseSSE==0);
11437   match(Set dst (AbsF src));
11438   ins_cost(100);
11439   format %{ "FABS" %}
11440   opcode(0xE1, 0xD9);
11441   ins_encode( OpcS, OpcP );
11442   ins_pipe( fpu_reg_reg );
11443 %}
11444 
11445 instruct absX_reg(regX dst ) %{
11446   predicate(UseSSE>=1);
11447   match(Set dst (AbsF dst));

11448   format %{ "ANDPS  $dst,[0x7FFFFFFF]\t# ABS F by sign masking" %}
11449   ins_encode( AbsXF_encoding(dst));



11450   ins_pipe( pipe_slow );
11451 %}
11452 
11453 instruct negF_reg(regFPR1 dst, regFPR1 src) %{
11454   predicate(UseSSE==0);
11455   match(Set dst (NegF src));
11456   ins_cost(100);
11457   format %{ "FCHS" %}
11458   opcode(0xE0, 0xD9);
11459   ins_encode( OpcS, OpcP );
11460   ins_pipe( fpu_reg_reg );
11461 %}
11462 
11463 instruct negX_reg( regX dst ) %{
11464   predicate(UseSSE>=1);
11465   match(Set dst (NegF dst));

11466   format %{ "XORPS  $dst,[0x80000000]\t# CHS F by sign flipping" %}
11467   ins_encode( NegXF_encoding(dst));



11468   ins_pipe( pipe_slow );
11469 %}
11470 
11471 // Cisc-alternate to addF_reg
11472 // Spill to obtain 24-bit precision
11473 instruct addF24_reg_mem(stackSlotF dst, regF src1, memory src2) %{
11474   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11475   match(Set dst (AddF src1 (LoadF src2)));
11476 
11477   format %{ "FLD    $src2\n\t"
11478             "FADD   ST,$src1\n\t"
11479             "FSTP_S $dst" %}
11480   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
11481   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
11482               OpcReg_F(src1),
11483               Pop_Mem_F(dst) );
11484   ins_pipe( fpu_mem_reg_mem );
11485 %}
11486 //
11487 // Cisc-alternate to addF_reg

11853 
11854 // Force rounding to 24-bit precision and 6-bit exponent
11855 instruct convD2F_reg(stackSlotF dst, regD src) %{
11856   predicate(UseSSE==0);
11857   match(Set dst (ConvD2F src));
11858   format %{ "FST_S  $dst,$src\t# F-round" %}
11859   expand %{
11860     roundFloat_mem_reg(dst,src);
11861   %}
11862 %}
11863 
11864 // Force rounding to 24-bit precision and 6-bit exponent
11865 instruct convD2X_reg(regX dst, regD src, eFlagsReg cr) %{
11866   predicate(UseSSE==1);
11867   match(Set dst (ConvD2F src));
11868   effect( KILL cr );
11869   format %{ "SUB    ESP,4\n\t"
11870             "FST_S  [ESP],$src\t# F-round\n\t"
11871             "MOVSS  $dst,[ESP]\n\t"
11872             "ADD ESP,4" %}
11873   ins_encode( D2X_encoding(dst, src) );










11874   ins_pipe( pipe_slow );
11875 %}
11876 
11877 // Force rounding double precision to single precision
11878 instruct convXD2X_reg(regX dst, regXD src) %{
11879   predicate(UseSSE>=2);
11880   match(Set dst (ConvD2F src));
11881   format %{ "CVTSD2SS $dst,$src\t# F-round" %}
11882   opcode(0xF2, 0x0F, 0x5A);
11883   ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src));

11884   ins_pipe( pipe_slow );
11885 %}
11886 
11887 instruct convF2D_reg_reg(regD dst, regF src) %{
11888   predicate(UseSSE==0);
11889   match(Set dst (ConvF2D src));
11890   format %{ "FST_S  $dst,$src\t# D-round" %}
11891   ins_encode( Pop_Reg_Reg_D(dst, src));
11892   ins_pipe( fpu_reg_reg );
11893 %}
11894 
11895 instruct convF2D_reg(stackSlotD dst, regF src) %{
11896   predicate(UseSSE==1);
11897   match(Set dst (ConvF2D src));
11898   format %{ "FST_D  $dst,$src\t# D-round" %}
11899   expand %{
11900     roundDouble_mem_reg(dst,src);
11901   %}
11902 %}
11903 
11904 instruct convX2D_reg(regD dst, regX src, eFlagsReg cr) %{
11905   predicate(UseSSE==1);
11906   match(Set dst (ConvF2D src));
11907   effect( KILL cr );
11908   format %{ "SUB    ESP,4\n\t"
11909             "MOVSS  [ESP] $src\n\t"
11910             "FLD_S  [ESP]\n\t"
11911             "ADD    ESP,4\n\t"
11912             "FSTP   $dst\t# D-round" %}
11913   ins_encode( X2D_encoding(dst, src), Pop_Reg_D(dst));






11914   ins_pipe( pipe_slow );
11915 %}
11916 
11917 instruct convX2XD_reg(regXD dst, regX src) %{
11918   predicate(UseSSE>=2);
11919   match(Set dst (ConvF2D src));
11920   format %{ "CVTSS2SD $dst,$src\t# D-round" %}
11921   opcode(0xF3, 0x0F, 0x5A);
11922   ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src));

11923   ins_pipe( pipe_slow );
11924 %}
11925 
11926 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
11927 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
11928   predicate(UseSSE<=1);
11929   match(Set dst (ConvD2I src));
11930   effect( KILL tmp, KILL cr );
11931   format %{ "FLD    $src\t# Convert double to int \n\t"
11932             "FLDCW  trunc mode\n\t"
11933             "SUB    ESP,4\n\t"
11934             "FISTp  [ESP + #0]\n\t"
11935             "FLDCW  std/24-bit mode\n\t"
11936             "POP    EAX\n\t"
11937             "CMP    EAX,0x80000000\n\t"
11938             "JNE,s  fast\n\t"
11939             "FLD_D  $src\n\t"
11940             "CALL   d2i_wrapper\n"
11941       "fast:" %}
11942   ins_encode( Push_Reg_D(src), D2I_encoding(src) );
11943   ins_pipe( pipe_slow );
11944 %}
11945 
11946 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
11947 instruct convXD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regXD src, eFlagsReg cr ) %{
11948   predicate(UseSSE>=2);
11949   match(Set dst (ConvD2I src));
11950   effect( KILL tmp, KILL cr );
11951   format %{ "CVTTSD2SI $dst, $src\n\t"
11952             "CMP    $dst,0x80000000\n\t"
11953             "JNE,s  fast\n\t"
11954             "SUB    ESP, 8\n\t"
11955             "MOVSD  [ESP], $src\n\t"
11956             "FLD_D  [ESP]\n\t"
11957             "ADD    ESP, 8\n\t"
11958             "CALL   d2i_wrapper\n"
11959       "fast:" %}
11960   opcode(0x1); // double-precision conversion
11961   ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x2C), FX2I_encoding(src,dst));










11962   ins_pipe( pipe_slow );
11963 %}
11964 
11965 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
11966   predicate(UseSSE<=1);
11967   match(Set dst (ConvD2L src));
11968   effect( KILL cr );
11969   format %{ "FLD    $src\t# Convert double to long\n\t"
11970             "FLDCW  trunc mode\n\t"
11971             "SUB    ESP,8\n\t"
11972             "FISTp  [ESP + #0]\n\t"
11973             "FLDCW  std/24-bit mode\n\t"
11974             "POP    EAX\n\t"
11975             "POP    EDX\n\t"
11976             "CMP    EDX,0x80000000\n\t"
11977             "JNE,s  fast\n\t"
11978             "TEST   EAX,EAX\n\t"
11979             "JNE,s  fast\n\t"
11980             "FLD    $src\n\t"
11981             "CALL   d2l_wrapper\n"

11987 // XMM lacks a float/double->long conversion, so use the old FPU stack.
11988 instruct convXD2L_reg_reg( eADXRegL dst, regXD src, eFlagsReg cr ) %{
11989   predicate (UseSSE>=2);
11990   match(Set dst (ConvD2L src));
11991   effect( KILL cr );
11992   format %{ "SUB    ESP,8\t# Convert double to long\n\t"
11993             "MOVSD  [ESP],$src\n\t"
11994             "FLD_D  [ESP]\n\t"
11995             "FLDCW  trunc mode\n\t"
11996             "FISTp  [ESP + #0]\n\t"
11997             "FLDCW  std/24-bit mode\n\t"
11998             "POP    EAX\n\t"
11999             "POP    EDX\n\t"
12000             "CMP    EDX,0x80000000\n\t"
12001             "JNE,s  fast\n\t"
12002             "TEST   EAX,EAX\n\t"
12003             "JNE,s  fast\n\t"
12004             "SUB    ESP,8\n\t"
12005             "MOVSD  [ESP],$src\n\t"
12006             "FLD_D  [ESP]\n\t"

12007             "CALL   d2l_wrapper\n"
12008       "fast:" %}
12009   ins_encode( XD2L_encoding(src) );


























12010   ins_pipe( pipe_slow );
12011 %}
12012 
12013 // Convert a double to an int.  Java semantics require we do complex
12014 // manglations in the corner cases.  So we set the rounding mode to
12015 // 'zero', store the darned double down as an int, and reset the
12016 // rounding mode to 'nearest'.  The hardware stores a flag value down
12017 // if we would overflow or converted a NAN; we check for this and
12018 // and go the slow path if needed.
12019 instruct convF2I_reg_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
12020   predicate(UseSSE==0);
12021   match(Set dst (ConvF2I src));
12022   effect( KILL tmp, KILL cr );
12023   format %{ "FLD    $src\t# Convert float to int \n\t"
12024             "FLDCW  trunc mode\n\t"
12025             "SUB    ESP,4\n\t"
12026             "FISTp  [ESP + #0]\n\t"
12027             "FLDCW  std/24-bit mode\n\t"
12028             "POP    EAX\n\t"
12029             "CMP    EAX,0x80000000\n\t"

12033       "fast:" %}
12034   // D2I_encoding works for F2I
12035   ins_encode( Push_Reg_F(src), D2I_encoding(src) );
12036   ins_pipe( pipe_slow );
12037 %}
12038 
12039 // Convert a float in xmm to an int reg.
12040 instruct convX2I_reg(eAXRegI dst, eDXRegI tmp, regX src, eFlagsReg cr ) %{
12041   predicate(UseSSE>=1);
12042   match(Set dst (ConvF2I src));
12043   effect( KILL tmp, KILL cr );
12044   format %{ "CVTTSS2SI $dst, $src\n\t"
12045             "CMP    $dst,0x80000000\n\t"
12046             "JNE,s  fast\n\t"
12047             "SUB    ESP, 4\n\t"
12048             "MOVSS  [ESP], $src\n\t"
12049             "FLD    [ESP]\n\t"
12050             "ADD    ESP, 4\n\t"
12051             "CALL   d2i_wrapper\n"
12052       "fast:" %}
12053   opcode(0x0); // single-precision conversion
12054   ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x2C), FX2I_encoding(src,dst));










12055   ins_pipe( pipe_slow );
12056 %}
12057 
12058 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
12059   predicate(UseSSE==0);
12060   match(Set dst (ConvF2L src));
12061   effect( KILL cr );
12062   format %{ "FLD    $src\t# Convert float to long\n\t"
12063             "FLDCW  trunc mode\n\t"
12064             "SUB    ESP,8\n\t"
12065             "FISTp  [ESP + #0]\n\t"
12066             "FLDCW  std/24-bit mode\n\t"
12067             "POP    EAX\n\t"
12068             "POP    EDX\n\t"
12069             "CMP    EDX,0x80000000\n\t"
12070             "JNE,s  fast\n\t"
12071             "TEST   EAX,EAX\n\t"
12072             "JNE,s  fast\n\t"
12073             "FLD    $src\n\t"
12074             "CALL   d2l_wrapper\n"

12084   match(Set dst (ConvF2L src));
12085   effect( KILL cr );
12086   format %{ "SUB    ESP,8\t# Convert float to long\n\t"
12087             "MOVSS  [ESP],$src\n\t"
12088             "FLD_S  [ESP]\n\t"
12089             "FLDCW  trunc mode\n\t"
12090             "FISTp  [ESP + #0]\n\t"
12091             "FLDCW  std/24-bit mode\n\t"
12092             "POP    EAX\n\t"
12093             "POP    EDX\n\t"
12094             "CMP    EDX,0x80000000\n\t"
12095             "JNE,s  fast\n\t"
12096             "TEST   EAX,EAX\n\t"
12097             "JNE,s  fast\n\t"
12098             "SUB    ESP,4\t# Convert float to long\n\t"
12099             "MOVSS  [ESP],$src\n\t"
12100             "FLD_S  [ESP]\n\t"
12101             "ADD    ESP,4\n\t"
12102             "CALL   d2l_wrapper\n"
12103       "fast:" %}
12104   ins_encode( X2L_encoding(src) );


























12105   ins_pipe( pipe_slow );
12106 %}
12107 
12108 instruct convI2D_reg(regD dst, stackSlotI src) %{
12109   predicate( UseSSE<=1 );
12110   match(Set dst (ConvI2D src));
12111   format %{ "FILD   $src\n\t"
12112             "FSTP   $dst" %}
12113   opcode(0xDB, 0x0);  /* DB /0 */
12114   ins_encode(Push_Mem_I(src), Pop_Reg_D(dst));
12115   ins_pipe( fpu_reg_mem );
12116 %}
12117 
12118 instruct convI2XD_reg(regXD dst, eRegI src) %{
12119   predicate( UseSSE>=2 && !UseXmmI2D );
12120   match(Set dst (ConvI2D src));
12121   format %{ "CVTSI2SD $dst,$src" %}
12122   opcode(0xF2, 0x0F, 0x2A);
12123   ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src));

12124   ins_pipe( pipe_slow );
12125 %}
12126 
12127 instruct convI2XD_mem(regXD dst, memory mem) %{
12128   predicate( UseSSE>=2 );
12129   match(Set dst (ConvI2D (LoadI mem)));
12130   format %{ "CVTSI2SD $dst,$mem" %}
12131   opcode(0xF2, 0x0F, 0x2A);
12132   ins_encode( OpcP, OpcS, Opcode(tertiary), RegMem(dst, mem));

12133   ins_pipe( pipe_slow );
12134 %}
12135 
12136 instruct convXI2XD_reg(regXD dst, eRegI src)
12137 %{
12138   predicate( UseSSE>=2 && UseXmmI2D );
12139   match(Set dst (ConvI2D src));
12140 
12141   format %{ "MOVD  $dst,$src\n\t"
12142             "CVTDQ2PD $dst,$dst\t# i2d" %}
12143   ins_encode %{
12144     __ movdl($dst$$XMMRegister, $src$$Register);
12145     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
12146   %}
12147   ins_pipe(pipe_slow); // XXX
12148 %}
12149 
12150 instruct convI2D_mem(regD dst, memory mem) %{
12151   predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
12152   match(Set dst (ConvI2D (LoadI mem)));

12208   ins_pipe( fpu_reg_mem );
12209 %}
12210 
12211 // This instruction does not round to 24-bits
12212 instruct convI2F_mem(regF dst, memory mem) %{
12213   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
12214   match(Set dst (ConvI2F (LoadI mem)));
12215   format %{ "FILD   $mem\n\t"
12216             "FSTP   $dst" %}
12217   opcode(0xDB);      /* DB /0 */
12218   ins_encode( OpcP, RMopc_Mem(0x00,mem),
12219               Pop_Reg_F(dst));
12220   ins_pipe( fpu_reg_mem );
12221 %}
12222 
12223 // Convert an int to a float in xmm; no rounding step needed.
12224 instruct convI2X_reg(regX dst, eRegI src) %{
12225   predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F );
12226   match(Set dst (ConvI2F src));
12227   format %{ "CVTSI2SS $dst, $src" %}
12228 
12229   opcode(0xF3, 0x0F, 0x2A);  /* F3 0F 2A /r */
12230   ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src));
12231   ins_pipe( pipe_slow );
12232 %}
12233 
12234  instruct convXI2X_reg(regX dst, eRegI src)
12235 %{
12236   predicate( UseSSE>=2 && UseXmmI2F );
12237   match(Set dst (ConvI2F src));
12238 
12239   format %{ "MOVD  $dst,$src\n\t"
12240             "CVTDQ2PS $dst,$dst\t# i2f" %}
12241   ins_encode %{
12242     __ movdl($dst$$XMMRegister, $src$$Register);
12243     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
12244   %}
12245   ins_pipe(pipe_slow); // XXX
12246 %}
12247 
12248 instruct convI2L_reg( eRegL dst, eRegI src, eFlagsReg cr) %{
12249   match(Set dst (ConvI2L src));
12250   effect(KILL cr);

12334             "FSTP_S $dst\t# F-round" %}
12335   opcode(0xDF, 0x5);  /* DF /5 */
12336   ins_encode(convert_long_double(src), Pop_Mem_F(dst));
12337   ins_pipe( pipe_slow );
12338 %}
12339 
12340 instruct convL2I_reg( eRegI dst, eRegL src ) %{
12341   match(Set dst (ConvL2I src));
12342   effect( DEF dst, USE src );
12343   format %{ "MOV    $dst,$src.lo" %}
12344   ins_encode(enc_CopyL_Lo(dst,src));
12345   ins_pipe( ialu_reg_reg );
12346 %}
12347 
12348 
12349 instruct MoveF2I_stack_reg(eRegI dst, stackSlotF src) %{
12350   match(Set dst (MoveF2I src));
12351   effect( DEF dst, USE src );
12352   ins_cost(100);
12353   format %{ "MOV    $dst,$src\t# MoveF2I_stack_reg" %}
12354   opcode(0x8B);
12355   ins_encode( OpcP, RegMem(dst,src));

12356   ins_pipe( ialu_reg_mem );
12357 %}
12358 
12359 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
12360   predicate(UseSSE==0);
12361   match(Set dst (MoveF2I src));
12362   effect( DEF dst, USE src );
12363 
12364   ins_cost(125);
12365   format %{ "FST_S  $dst,$src\t# MoveF2I_reg_stack" %}
12366   ins_encode( Pop_Mem_Reg_F(dst, src) );
12367   ins_pipe( fpu_mem_reg );
12368 %}
12369 
12370 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regX src) %{
12371   predicate(UseSSE>=1);
12372   match(Set dst (MoveF2I src));
12373   effect( DEF dst, USE src );
12374 
12375   ins_cost(95);
12376   format %{ "MOVSS  $dst,$src\t# MoveF2I_reg_stack_sse" %}
12377   ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x11), RegMem(src, dst));


12378   ins_pipe( pipe_slow );
12379 %}
12380 
12381 instruct MoveF2I_reg_reg_sse(eRegI dst, regX src) %{
12382   predicate(UseSSE>=2);
12383   match(Set dst (MoveF2I src));
12384   effect( DEF dst, USE src );
12385   ins_cost(85);
12386   format %{ "MOVD   $dst,$src\t# MoveF2I_reg_reg_sse" %}
12387   ins_encode( MovX2I_reg(dst, src));


12388   ins_pipe( pipe_slow );
12389 %}
12390 
12391 instruct MoveI2F_reg_stack(stackSlotF dst, eRegI src) %{
12392   match(Set dst (MoveI2F src));
12393   effect( DEF dst, USE src );
12394 
12395   ins_cost(100);
12396   format %{ "MOV    $dst,$src\t# MoveI2F_reg_stack" %}
12397   opcode(0x89);
12398   ins_encode( OpcPRegSS( dst, src ) );

12399   ins_pipe( ialu_mem_reg );
12400 %}
12401 
12402 
12403 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
12404   predicate(UseSSE==0);
12405   match(Set dst (MoveI2F src));
12406   effect(DEF dst, USE src);
12407 
12408   ins_cost(125);
12409   format %{ "FLD_S  $src\n\t"
12410             "FSTP   $dst\t# MoveI2F_stack_reg" %}
12411   opcode(0xD9);               /* D9 /0, FLD m32real */
12412   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
12413               Pop_Reg_F(dst) );
12414   ins_pipe( fpu_reg_mem );
12415 %}
12416 
12417 instruct MoveI2F_stack_reg_sse(regX dst, stackSlotI src) %{
12418   predicate(UseSSE>=1);
12419   match(Set dst (MoveI2F src));
12420   effect( DEF dst, USE src );
12421 
12422   ins_cost(95);
12423   format %{ "MOVSS  $dst,$src\t# MoveI2F_stack_reg_sse" %}
12424   ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x10), RegMem(dst,src));


12425   ins_pipe( pipe_slow );
12426 %}
12427 
12428 instruct MoveI2F_reg_reg_sse(regX dst, eRegI src) %{
12429   predicate(UseSSE>=2);
12430   match(Set dst (MoveI2F src));
12431   effect( DEF dst, USE src );
12432 
12433   ins_cost(85);
12434   format %{ "MOVD   $dst,$src\t# MoveI2F_reg_reg_sse" %}
12435   ins_encode( MovI2X_reg(dst, src) );


12436   ins_pipe( pipe_slow );
12437 %}
12438 
12439 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{
12440   match(Set dst (MoveD2L src));
12441   effect(DEF dst, USE src);
12442 
12443   ins_cost(250);
12444   format %{ "MOV    $dst.lo,$src\n\t"
12445             "MOV    $dst.hi,$src+4\t# MoveD2L_stack_reg" %}
12446   opcode(0x8B, 0x8B);
12447   ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src));
12448   ins_pipe( ialu_mem_long_reg );
12449 %}
12450 
12451 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
12452   predicate(UseSSE<=1);
12453   match(Set dst (MoveD2L src));
12454   effect(DEF dst, USE src);
12455 
12456   ins_cost(125);
12457   format %{ "FST_D  $dst,$src\t# MoveD2L_reg_stack" %}
12458   ins_encode( Pop_Mem_Reg_D(dst, src) );
12459   ins_pipe( fpu_mem_reg );
12460 %}
12461 
12462 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regXD src) %{
12463   predicate(UseSSE>=2);
12464   match(Set dst (MoveD2L src));
12465   effect(DEF dst, USE src);
12466   ins_cost(95);
12467 
12468   format %{ "MOVSD  $dst,$src\t# MoveD2L_reg_stack_sse" %}
12469   ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x11), RegMem(src,dst));


12470   ins_pipe( pipe_slow );
12471 %}
12472 
12473 instruct MoveD2L_reg_reg_sse(eRegL dst, regXD src, regXD tmp) %{
12474   predicate(UseSSE>=2);
12475   match(Set dst (MoveD2L src));
12476   effect(DEF dst, USE src, TEMP tmp);
12477   ins_cost(85);
12478   format %{ "MOVD   $dst.lo,$src\n\t"
12479             "PSHUFLW $tmp,$src,0x4E\n\t"
12480             "MOVD   $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %}
12481   ins_encode( MovXD2L_reg(dst, src, tmp) );




12482   ins_pipe( pipe_slow );
12483 %}
12484 
12485 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{
12486   match(Set dst (MoveL2D src));
12487   effect(DEF dst, USE src);
12488 
12489   ins_cost(200);
12490   format %{ "MOV    $dst,$src.lo\n\t"
12491             "MOV    $dst+4,$src.hi\t# MoveL2D_reg_stack" %}
12492   opcode(0x89, 0x89);
12493   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
12494   ins_pipe( ialu_mem_long_reg );
12495 %}
12496 
12497 
12498 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
12499   predicate(UseSSE<=1);
12500   match(Set dst (MoveL2D src));
12501   effect(DEF dst, USE src);
12502   ins_cost(125);
12503 
12504   format %{ "FLD_D  $src\n\t"
12505             "FSTP   $dst\t# MoveL2D_stack_reg" %}
12506   opcode(0xDD);               /* DD /0, FLD m64real */
12507   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
12508               Pop_Reg_D(dst) );
12509   ins_pipe( fpu_reg_mem );
12510 %}
12511 
12512 
12513 instruct MoveL2D_stack_reg_sse(regXD dst, stackSlotL src) %{
12514   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
12515   match(Set dst (MoveL2D src));
12516   effect(DEF dst, USE src);
12517 
12518   ins_cost(95);
12519   format %{ "MOVSD  $dst,$src\t# MoveL2D_stack_reg_sse" %}
12520   ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x10), RegMem(dst,src));


12521   ins_pipe( pipe_slow );
12522 %}
12523 
12524 instruct MoveL2D_stack_reg_sse_partial(regXD dst, stackSlotL src) %{
12525   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
12526   match(Set dst (MoveL2D src));
12527   effect(DEF dst, USE src);
12528 
12529   ins_cost(95);
12530   format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %}
12531   ins_encode( Opcode(0x66), Opcode(0x0F), Opcode(0x12), RegMem(dst,src));


12532   ins_pipe( pipe_slow );
12533 %}
12534 
12535 instruct MoveL2D_reg_reg_sse(regXD dst, eRegL src, regXD tmp) %{
12536   predicate(UseSSE>=2);
12537   match(Set dst (MoveL2D src));
12538   effect(TEMP dst, USE src, TEMP tmp);
12539   ins_cost(85);
12540   format %{ "MOVD   $dst,$src.lo\n\t"
12541             "MOVD   $tmp,$src.hi\n\t"
12542             "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %}
12543   ins_encode( MovL2XD_reg(dst, src, tmp) );




12544   ins_pipe( pipe_slow );
12545 %}
12546 
12547 // Replicate scalar to packed byte (1 byte) values in xmm
12548 instruct Repl8B_reg(regXD dst, regXD src) %{
12549   predicate(UseSSE>=2);
12550   match(Set dst (Replicate8B src));
12551   format %{ "MOVDQA  $dst,$src\n\t"
12552             "PUNPCKLBW $dst,$dst\n\t"
12553             "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
12554   ins_encode( pshufd_8x8(dst, src));






12555   ins_pipe( pipe_slow );
12556 %}
12557 
12558 // Replicate scalar to packed byte (1 byte) values in xmm
12559 instruct Repl8B_eRegI(regXD dst, eRegI src) %{
12560   predicate(UseSSE>=2);
12561   match(Set dst (Replicate8B src));
12562   format %{ "MOVD    $dst,$src\n\t"
12563             "PUNPCKLBW $dst,$dst\n\t"
12564             "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
12565   ins_encode( mov_i2x(dst, src), pshufd_8x8(dst, dst));




12566   ins_pipe( pipe_slow );
12567 %}
12568 
12569 // Replicate scalar zero to packed byte (1 byte) values in xmm
12570 instruct Repl8B_immI0(regXD dst, immI0 zero) %{
12571   predicate(UseSSE>=2);
12572   match(Set dst (Replicate8B zero));
12573   format %{ "PXOR  $dst,$dst\t! replicate8B" %}
12574   ins_encode( pxor(dst, dst));


12575   ins_pipe( fpu_reg_reg );
12576 %}
12577 
12578 // Replicate scalar to packed shore (2 byte) values in xmm
12579 instruct Repl4S_reg(regXD dst, regXD src) %{
12580   predicate(UseSSE>=2);
12581   match(Set dst (Replicate4S src));
12582   format %{ "PSHUFLW $dst,$src,0x00\t! replicate4S" %}
12583   ins_encode( pshufd_4x16(dst, src));


12584   ins_pipe( fpu_reg_reg );
12585 %}
12586 
12587 // Replicate scalar to packed shore (2 byte) values in xmm
12588 instruct Repl4S_eRegI(regXD dst, eRegI src) %{
12589   predicate(UseSSE>=2);
12590   match(Set dst (Replicate4S src));
12591   format %{ "MOVD    $dst,$src\n\t"
12592             "PSHUFLW $dst,$dst,0x00\t! replicate4S" %}
12593   ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst));



12594   ins_pipe( fpu_reg_reg );
12595 %}
12596 
12597 // Replicate scalar zero to packed short (2 byte) values in xmm
12598 instruct Repl4S_immI0(regXD dst, immI0 zero) %{
12599   predicate(UseSSE>=2);
12600   match(Set dst (Replicate4S zero));
12601   format %{ "PXOR  $dst,$dst\t! replicate4S" %}
12602   ins_encode( pxor(dst, dst));


12603   ins_pipe( fpu_reg_reg );
12604 %}
12605 
12606 // Replicate scalar to packed char (2 byte) values in xmm
12607 instruct Repl4C_reg(regXD dst, regXD src) %{
12608   predicate(UseSSE>=2);
12609   match(Set dst (Replicate4C src));
12610   format %{ "PSHUFLW $dst,$src,0x00\t! replicate4C" %}
12611   ins_encode( pshufd_4x16(dst, src));


12612   ins_pipe( fpu_reg_reg );
12613 %}
12614 
12615 // Replicate scalar to packed char (2 byte) values in xmm
12616 instruct Repl4C_eRegI(regXD dst, eRegI src) %{
12617   predicate(UseSSE>=2);
12618   match(Set dst (Replicate4C src));
12619   format %{ "MOVD    $dst,$src\n\t"
12620             "PSHUFLW $dst,$dst,0x00\t! replicate4C" %}
12621   ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst));



12622   ins_pipe( fpu_reg_reg );
12623 %}
12624 
12625 // Replicate scalar zero to packed char (2 byte) values in xmm
12626 instruct Repl4C_immI0(regXD dst, immI0 zero) %{
12627   predicate(UseSSE>=2);
12628   match(Set dst (Replicate4C zero));
12629   format %{ "PXOR  $dst,$dst\t! replicate4C" %}
12630   ins_encode( pxor(dst, dst));


12631   ins_pipe( fpu_reg_reg );
12632 %}
12633 
12634 // Replicate scalar to packed integer (4 byte) values in xmm
12635 instruct Repl2I_reg(regXD dst, regXD src) %{
12636   predicate(UseSSE>=2);
12637   match(Set dst (Replicate2I src));
12638   format %{ "PSHUFD $dst,$src,0x00\t! replicate2I" %}
12639   ins_encode( pshufd(dst, src, 0x00));


12640   ins_pipe( fpu_reg_reg );
12641 %}
12642 
12643 // Replicate scalar to packed integer (4 byte) values in xmm
12644 instruct Repl2I_eRegI(regXD dst, eRegI src) %{
12645   predicate(UseSSE>=2);
12646   match(Set dst (Replicate2I src));
12647   format %{ "MOVD   $dst,$src\n\t"
12648             "PSHUFD $dst,$dst,0x00\t! replicate2I" %}
12649   ins_encode( mov_i2x(dst, src), pshufd(dst, dst, 0x00));



12650   ins_pipe( fpu_reg_reg );
12651 %}
12652 
12653 // Replicate scalar zero to packed integer (2 byte) values in xmm
12654 instruct Repl2I_immI0(regXD dst, immI0 zero) %{
12655   predicate(UseSSE>=2);
12656   match(Set dst (Replicate2I zero));
12657   format %{ "PXOR  $dst,$dst\t! replicate2I" %}
12658   ins_encode( pxor(dst, dst));


12659   ins_pipe( fpu_reg_reg );
12660 %}
12661 
12662 // Replicate scalar to packed single precision floating point values in xmm
12663 instruct Repl2F_reg(regXD dst, regXD src) %{
12664   predicate(UseSSE>=2);
12665   match(Set dst (Replicate2F src));
12666   format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
12667   ins_encode( pshufd(dst, src, 0xe0));


12668   ins_pipe( fpu_reg_reg );
12669 %}
12670 
12671 // Replicate scalar to packed single precision floating point values in xmm
12672 instruct Repl2F_regX(regXD dst, regX src) %{
12673   predicate(UseSSE>=2);
12674   match(Set dst (Replicate2F src));
12675   format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
12676   ins_encode( pshufd(dst, src, 0xe0));


12677   ins_pipe( fpu_reg_reg );
12678 %}
12679 
12680 // Replicate scalar to packed single precision floating point values in xmm
12681 instruct Repl2F_immXF0(regXD dst, immXF0 zero) %{
12682   predicate(UseSSE>=2);
12683   match(Set dst (Replicate2F zero));
12684   format %{ "PXOR  $dst,$dst\t! replicate2F" %}
12685   ins_encode( pxor(dst, dst));


12686   ins_pipe( fpu_reg_reg );
12687 %}
12688 
12689 // =======================================================================
12690 // fast clearing of an array
12691 instruct rep_stos(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
12692   match(Set dummy (ClearArray cnt base));
12693   effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
12694   format %{ "SHL    ECX,1\t# Convert doublewords to words\n\t"
12695             "XOR    EAX,EAX\n\t"
12696             "REP STOS\t# store EAX into [EDI++] while ECX--" %}
12697   opcode(0,0x4);
12698   ins_encode( Opcode(0xD1), RegOpc(ECX),
12699               OpcRegReg(0x33,EAX,EAX),
12700               Opcode(0xF3), Opcode(0xAB) );
12701   ins_pipe( pipe_slow );
12702 %}
12703 
12704 instruct string_compare(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
12705                         eAXRegI result, regXD tmp1, eFlagsReg cr) %{

 264   return operand;
 265 }
 266 
 267 // Buffer for 128-bits masks used by SSE instructions.
 268 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment)
 269 
 270 // Static initialization during VM startup.
 271 static jlong *float_signmask_pool  = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF));
 272 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF));
 273 static jlong *float_signflip_pool  = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000));
 274 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000));
 275 
 276 // Offset hacking within calls.
 277 static int pre_call_FPU_size() {
 278   if (Compile::current()->in_24_bit_fp_mode())
 279     return 6; // fldcw
 280   return 0;
 281 }
 282 
 283 static int preserve_SP_size() {
 284   return 2;  // op, rm(reg/reg)
 285 }
 286 
 287 // !!!!! Special hack to get all type of calls to specify the byte offset
 288 //       from the start of the call to the point where the return address
 289 //       will point.
 290 int MachCallStaticJavaNode::ret_addr_offset() {
 291   int offset = 5 + pre_call_FPU_size();  // 5 bytes from start of call to where return address points
 292   if (_method_handle_invoke)
 293     offset += preserve_SP_size();
 294   return offset;
 295 }
 296 
 297 int MachCallDynamicJavaNode::ret_addr_offset() {
 298   return 10 + pre_call_FPU_size();  // 10 bytes from start of call to where return address points
 299 }
 300 
 301 static int sizeof_FFree_Float_Stack_All = -1;
 302 
 303 int MachCallRuntimeNode::ret_addr_offset() {
 304   assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already");

 478         if ( displace_is_oop ) {
 479           emit_d32_reloc(cbuf, displace, relocInfo::oop_type, 1);
 480         } else {
 481           emit_d32      (cbuf, displace);
 482         }
 483       }
 484     }
 485   }
 486 }
 487 
 488 
 489 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
 490   if( dst_encoding == src_encoding ) {
 491     // reg-reg copy, use an empty encoding
 492   } else {
 493     emit_opcode( cbuf, 0x8B );
 494     emit_rm(cbuf, 0x3, dst_encoding, src_encoding );
 495   }
 496 }
 497 
 498 void emit_cmpfp_fixup(MacroAssembler& _masm) {
 499   Label exit;
 500   __ jccb(Assembler::noParity, exit);
 501   __ pushf();
 502   //
 503   // comiss/ucomiss instructions set ZF,PF,CF flags and
 504   // zero OF,AF,SF for NaN values.
 505   // Fixup flags by zeroing ZF,PF so that compare of NaN
 506   // values returns 'less than' result (CF is set).
 507   // Leave the rest of flags unchanged.
 508   //
 509   //    7 6 5 4 3 2 1 0
 510   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
 511   //    0 0 1 0 1 0 1 1   (0x2B)
 512   //
 513   __ andl(Address(rsp, 0), 0xffffff2b);
 514   __ popf();
 515   __ bind(exit);
 516 }
 517 
 518 void emit_cmpfp3(MacroAssembler& _masm, Register dst) {
 519   Label done;
 520   __ movl(dst, -1);
 521   __ jcc(Assembler::parity, done);
 522   __ jcc(Assembler::below, done);
 523   __ setb(Assembler::notEqual, dst);
 524   __ movzbl(dst, dst);
 525   __ bind(done);
 526 }
 527 
 528 
 529 //=============================================================================
 530 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
 531 
 532 int Compile::ConstantTable::calculate_table_base_offset() const {
 533   return 0;  // absolute addressing, no offset
 534 }
 535 
 536 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
 537   // Empty encoding
 538 }
 539 
 540 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
 541   return 0;
 542 }
 543 
 544 #ifndef PRODUCT
 545 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {

 795     emit_opcode  (*cbuf, opcode );
 796     encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, false);
 797 #ifndef PRODUCT
 798   } else if( !do_size ) {
 799     if( size != 0 ) st->print("\n\t");
 800     if( opcode == 0x8B || opcode == 0x89 ) { // MOV
 801       if( is_load ) st->print("%s   %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset);
 802       else          st->print("%s   [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]);
 803     } else { // FLD, FST, PUSH, POP
 804       st->print("%s [ESP + #%d]",op_str,offset);
 805     }
 806 #endif
 807   }
 808   int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 809   return size+3+offset_size;
 810 }
 811 
 812 // Helper for XMM registers.  Extra opcode bits, limited syntax.
 813 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load,
 814                          int offset, int reg_lo, int reg_hi, int size, outputStream* st ) {
 815   if (cbuf) {
 816     MacroAssembler _masm(cbuf);
 817     if (reg_lo+1 == reg_hi) { // double move?
 818       if (is_load) {
 819         __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));

 820       } else {
 821         __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
 822       }
 823     } else {
 824       if (is_load) {
 825         __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
 826       } else {
 827         __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
 828       }
 829     }
 830 #ifndef PRODUCT
 831   } else if (!do_size) {
 832     if (size != 0) st->print("\n\t");
 833     if (reg_lo+1 == reg_hi) { // double move?
 834       if (is_load) st->print("%s %s,[ESP + #%d]",
 835                               UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD",
 836                               Matcher::regName[reg_lo], offset);
 837       else         st->print("MOVSD  [ESP + #%d],%s",
 838                               offset, Matcher::regName[reg_lo]);
 839     } else {
 840       if (is_load) st->print("MOVSS  %s,[ESP + #%d]",
 841                               Matcher::regName[reg_lo], offset);
 842       else         st->print("MOVSS  [ESP + #%d],%s",
 843                               offset, Matcher::regName[reg_lo]);
 844     }
 845 #endif
 846   }
 847   int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 848   // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes.
 849   return size+5+offset_size;
 850 }
 851 
 852 
 853 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 854                             int src_hi, int dst_hi, int size, outputStream* st ) {
 855   if (cbuf) {
 856     MacroAssembler _masm(cbuf);
 857     if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
 858       __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
 859                 as_XMMRegister(Matcher::_regEncode[src_lo]));
 860     } else {
 861       __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]),
 862                 as_XMMRegister(Matcher::_regEncode[src_lo]));
 863     }



 864 #ifndef PRODUCT
 865   } else if (!do_size) {
 866     if (size != 0) st->print("\n\t");
 867     if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers
 868       if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
 869         st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 870       } else {
 871         st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 872       }



 873     } else {








 874       if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
 875         st->print("MOVSD  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 876       } else {
 877         st->print("MOVSS  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 878       }
 879     }
 880 #endif
 881   }
 882   // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes.
 883   // Only MOVAPS SSE prefix uses 1 byte.
 884   int sz = 4;
 885   if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) &&
 886       UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3;
 887   return size + sz;
 888 }
 889 
 890 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 891                             int src_hi, int dst_hi, int size, outputStream* st ) {
 892   // 32-bit
 893   if (cbuf) {
 894     MacroAssembler _masm(cbuf);
 895     __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
 896              as_Register(Matcher::_regEncode[src_lo]));

 897 #ifndef PRODUCT
 898   } else if (!do_size) {
 899     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
 900 #endif
 901   }
 902   return 4;
 903 }
 904 
 905 
 906 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 907                                  int src_hi, int dst_hi, int size, outputStream* st ) {
 908   // 32-bit
 909   if (cbuf) {
 910     MacroAssembler _masm(cbuf);
 911     __ movdl(as_Register(Matcher::_regEncode[dst_lo]),
 912              as_XMMRegister(Matcher::_regEncode[src_lo]));

 913 #ifndef PRODUCT
 914   } else if (!do_size) {
 915     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
 916 #endif
 917   }
 918   return 4;
 919 }
 920 
 921 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) {
 922   if( cbuf ) {
 923     emit_opcode(*cbuf, 0x8B );
 924     emit_rm    (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] );
 925 #ifndef PRODUCT
 926   } else if( !do_size ) {
 927     if( size != 0 ) st->print("\n\t");
 928     st->print("MOV    %s,%s",Matcher::regName[dst],Matcher::regName[src]);
 929 #endif
 930   }
 931   return size+2;
 932 }

1929     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
1930     // who we intended to call.
1931     cbuf.set_insts_mark();
1932     $$$emit8$primary;
1933     emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1934                 virtual_call_Relocation::spec(virtual_call_oop_addr), RELOC_IMM32 );
1935   %}
1936 
1937   enc_class Java_Compiled_Call (method meth) %{    // JAVA COMPILED CALL
1938     int disp = in_bytes(methodOopDesc::from_compiled_offset());
1939     assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small");
1940 
1941     // CALL *[EAX+in_bytes(methodOopDesc::from_compiled_code_entry_point_offset())]
1942     cbuf.set_insts_mark();
1943     $$$emit8$primary;
1944     emit_rm(cbuf, 0x01, $secondary, EAX_enc );  // R/M byte
1945     emit_d8(cbuf, disp);             // Displacement
1946 
1947   %}
1948 





1949 //   Following encoding is no longer used, but may be restored if calling
1950 //   convention changes significantly.
1951 //   Became: Xor_Reg(EBP), Java_To_Runtime( labl )
1952 //
1953 //   enc_class Java_Interpreter_Call (label labl) %{    // JAVA INTERPRETER CALL
1954 //     // int ic_reg     = Matcher::inline_cache_reg();
1955 //     // int ic_encode  = Matcher::_regEncode[ic_reg];
1956 //     // int imo_reg    = Matcher::interpreter_method_oop_reg();
1957 //     // int imo_encode = Matcher::_regEncode[imo_reg];
1958 //
1959 //     // // Interpreter expects method_oop in EBX, currently a callee-saved register,
1960 //     // // so we load it immediately before the call
1961 //     // emit_opcode(cbuf, 0x8B);                     // MOV    imo_reg,ic_reg  # method_oop
1962 //     // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte
1963 //
1964 //     // xor rbp,ebp
1965 //     emit_opcode(cbuf, 0x33);
1966 //     emit_rm(cbuf, 0x3, EBP_enc, EBP_enc);
1967 //
1968 //     // CALL to interpreter.

2006       emit_d32(cbuf, src_con);
2007     }
2008   %}
2009 
2010   enc_class LdImmL_Hi( eRegL dst, immL src) %{    // Load Immediate
2011     // Load immediate does not have a zero or sign extended version
2012     // for 8-bit immediates
2013     int dst_enc = $dst$$reg + 2;
2014     int src_con = ((julong)($src$$constant)) >> 32;
2015     if (src_con == 0) {
2016       // xor dst, dst
2017       emit_opcode(cbuf, 0x33);
2018       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
2019     } else {
2020       emit_opcode(cbuf, $primary + dst_enc);
2021       emit_d32(cbuf, src_con);
2022     }
2023   %}
2024 
2025 


























































2026   // Encode a reg-reg copy.  If it is useless, then empty encoding.
2027   enc_class enc_Copy( eRegI dst, eRegI src ) %{
2028     encode_Copy( cbuf, $dst$$reg, $src$$reg );
2029   %}
2030 
2031   enc_class enc_CopyL_Lo( eRegI dst, eRegL src ) %{
2032     encode_Copy( cbuf, $dst$$reg, $src$$reg );
2033   %}
2034 





2035   enc_class RegReg (eRegI dst, eRegI src) %{    // RegReg(Many)
2036     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2037   %}
2038 
2039   enc_class RegReg_Lo(eRegL dst, eRegL src) %{    // RegReg(Many)
2040     $$$emit8$primary;
2041     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2042   %}
2043 
2044   enc_class RegReg_Hi(eRegL dst, eRegL src) %{    // RegReg(Many)
2045     $$$emit8$secondary;
2046     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2047   %}
2048 
2049   enc_class RegReg_Lo2(eRegL dst, eRegL src) %{    // RegReg(Many)
2050     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2051   %}
2052 
2053   enc_class RegReg_Hi2(eRegL dst, eRegL src) %{    // RegReg(Many)
2054     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));

2564 
2565 
2566   enc_class Push_Reg_Mod_D( regD dst, regD src) %{
2567     // load dst in FPR0
2568     emit_opcode( cbuf, 0xD9 );
2569     emit_d8( cbuf, 0xC0-1+$dst$$reg );
2570     if ($src$$reg != FPR1L_enc) {
2571       // fincstp
2572       emit_opcode (cbuf, 0xD9);
2573       emit_opcode (cbuf, 0xF7);
2574       // swap src with FPR1:
2575       // FXCH FPR1 with src
2576       emit_opcode(cbuf, 0xD9);
2577       emit_d8(cbuf, 0xC8-1+$src$$reg );
2578       // fdecstp
2579       emit_opcode (cbuf, 0xD9);
2580       emit_opcode (cbuf, 0xF6);
2581     }
2582   %}
2583 
2584   enc_class Push_ModD_encoding(regXD src0, regXD src1) %{
2585     MacroAssembler _masm(&cbuf);
2586     __ subptr(rsp, 8);
2587     __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
2588     __ fld_d(Address(rsp, 0));
2589     __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
2590     __ fld_d(Address(rsp, 0));















2591   %}
2592 
2593   enc_class Push_ModX_encoding(regX src0, regX src1) %{
2594     MacroAssembler _masm(&cbuf);
2595     __ subptr(rsp, 4);
2596     __ movflt(Address(rsp, 0), $src1$$XMMRegister);
2597     __ fld_s(Address(rsp, 0));
2598     __ movflt(Address(rsp, 0), $src0$$XMMRegister);
2599     __ fld_s(Address(rsp, 0));















2600   %}
2601 
2602   enc_class Push_ResultXD(regXD dst) %{
2603     MacroAssembler _masm(&cbuf);
2604     __ fstp_d(Address(rsp, 0));
2605     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
2606     __ addptr(rsp, 8);







2607   %}
2608 
2609   enc_class Push_ResultX(regX dst, immI d8) %{
2610     MacroAssembler _masm(&cbuf);
2611     __ fstp_s(Address(rsp, 0));
2612     __ movflt($dst$$XMMRegister, Address(rsp, 0));
2613     __ addptr(rsp, $d8$$constant);






2614   %}
2615 
2616   enc_class Push_SrcXD(regXD src) %{
2617     MacroAssembler _masm(&cbuf);
2618     __ subptr(rsp, 8);
2619     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2620     __ fld_d(Address(rsp, 0));








2621   %}
2622 
2623   enc_class push_stack_temp_qword() %{
2624     MacroAssembler _masm(&cbuf);
2625     __ subptr(rsp, 8);

2626   %}
2627 
2628   enc_class pop_stack_temp_qword() %{
2629     MacroAssembler _masm(&cbuf);
2630     __ addptr(rsp, 8);

2631   %}
2632 
2633   enc_class push_xmm_to_fpr1(regXD src) %{
2634     MacroAssembler _masm(&cbuf);
2635     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2636     __ fld_d(Address(rsp, 0));




2637   %}
2638 
2639   // Compute X^Y using Intel's fast hardware instructions, if possible.
2640   // Otherwise return a NaN.
2641   enc_class pow_exp_core_encoding %{
2642     // FPR1 holds Y*ln2(X).  Compute FPR1 = 2^(Y*ln2(X))
2643     emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xC0);  // fdup = fld st(0)          Q       Q
2644     emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xFC);  // frndint               int(Q)      Q
2645     emit_opcode(cbuf,0xDC); emit_opcode(cbuf,0xE9);  // fsub st(1) -= st(0);  int(Q) frac(Q)
2646     emit_opcode(cbuf,0xDB);                          // FISTP [ESP]           frac(Q)
2647     emit_opcode(cbuf,0x1C);
2648     emit_d8(cbuf,0x24);
2649     emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xF0);  // f2xm1                 2^frac(Q)-1
2650     emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xE8);  // fld1                  1 2^frac(Q)-1
2651     emit_opcode(cbuf,0xDE); emit_opcode(cbuf,0xC1);  // faddp                 2^frac(Q)
2652     emit_opcode(cbuf,0x8B);                          // mov rax,[esp+0]=int(Q)
2653     encode_RegMem(cbuf, EAX_enc, ESP_enc, 0x4, 0, 0, false);
2654     emit_opcode(cbuf,0xC7);                          // mov rcx,0xFFFFF800 - overflow mask
2655     emit_rm(cbuf, 0x3, 0x0, ECX_enc);
2656     emit_d32(cbuf,0xFFFFF800);

2795     emit_opcode( cbuf, 0x7A );
2796     emit_d8    ( cbuf, 0x13 );
2797     // movl(dst, less_result);
2798     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2799     emit_d32( cbuf, -1 );
2800     // jcc(Assembler::below, exit);
2801     emit_opcode( cbuf, 0x72 );
2802     emit_d8    ( cbuf, 0x0C );
2803     // movl(dst, equal_result);
2804     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2805     emit_d32( cbuf, 0 );
2806     // jcc(Assembler::equal, exit);
2807     emit_opcode( cbuf, 0x74 );
2808     emit_d8    ( cbuf, 0x05 );
2809     // movl(dst, greater_result);
2810     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2811     emit_d32( cbuf, 1 );
2812   %}
2813 
2814 


















2815   // Compare the longs and set flags
2816   // BROKEN!  Do Not use as-is
2817   enc_class cmpl_test( eRegL src1, eRegL src2 ) %{
2818     // CMP    $src1.hi,$src2.hi
2819     emit_opcode( cbuf, 0x3B );
2820     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
2821     // JNE,s  done
2822     emit_opcode(cbuf,0x75);
2823     emit_d8(cbuf, 2 );
2824     // CMP    $src1.lo,$src2.lo
2825     emit_opcode( cbuf, 0x3B );
2826     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2827 // done:
2828   %}
2829 
2830   enc_class convert_int_long( regL dst, eRegI src ) %{
2831     // mov $dst.lo,$src
2832     int dst_encoding = $dst$$reg;
2833     int src_encoding = $src$$reg;
2834     encode_Copy( cbuf, dst_encoding  , src_encoding );

3017     emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg);
3018     // CMP    $tmp,$src.lo
3019     emit_opcode( cbuf, 0x3B );
3020     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg );
3021     // SBB    $tmp,$src.hi
3022     emit_opcode( cbuf, 0x1B );
3023     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) );
3024   %}
3025 
3026  // Sniff, sniff... smells like Gnu Superoptimizer
3027   enc_class neg_long( eRegL dst ) %{
3028     emit_opcode(cbuf,0xF7);    // NEG hi
3029     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
3030     emit_opcode(cbuf,0xF7);    // NEG lo
3031     emit_rm    (cbuf,0x3, 0x3,               $dst$$reg );
3032     emit_opcode(cbuf,0x83);    // SBB hi,0
3033     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
3034     emit_d8    (cbuf,0 );
3035   %}
3036 




3037 






































3038   // Because the transitions from emitted code to the runtime
3039   // monitorenter/exit helper stubs are so slow it's critical that
3040   // we inline both the stack-locking fast-path and the inflated fast path.
3041   //
3042   // See also: cmpFastLock and cmpFastUnlock.
3043   //
3044   // What follows is a specialized inline transliteration of the code
3045   // in slow_enter() and slow_exit().  If we're concerned about I$ bloat
3046   // another option would be to emit TrySlowEnter and TrySlowExit methods
3047   // at startup-time.  These methods would accept arguments as
3048   // (rax,=Obj, rbx=Self, rcx=box, rdx=Scratch) and return success-failure
3049   // indications in the icc.ZFlag.  Fast_Lock and Fast_Unlock would simply
3050   // marshal the arguments and emit calls to TrySlowEnter and TrySlowExit.
3051   // In practice, however, the # of lock sites is bounded and is usually small.
3052   // Besides the call overhead, TrySlowEnter and TrySlowExit might suffer
3053   // if the processor uses simple bimodal branch predictors keyed by EIP
3054   // Since the helper routines would be called from multiple synchronization
3055   // sites.
3056   //
3057   // An even better approach would be write "MonitorEnter()" and "MonitorExit()"

3655     emit_opcode(cbuf,0x5A);       // POP EDX
3656     emit_opcode(cbuf,0x81);       // CMP EDX,imm
3657     emit_d8    (cbuf,0xFA);       // rdx
3658     emit_d32   (cbuf,0x80000000); //         0x80000000
3659     emit_opcode(cbuf,0x75);       // JNE around_slow_call
3660     emit_d8    (cbuf,0x07+4);     // Size of slow_call
3661     emit_opcode(cbuf,0x85);       // TEST EAX,EAX
3662     emit_opcode(cbuf,0xC0);       // 2/rax,/rax,
3663     emit_opcode(cbuf,0x75);       // JNE around_slow_call
3664     emit_d8    (cbuf,0x07);       // Size of slow_call
3665     // Push src onto stack slow-path
3666     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
3667     emit_d8    (cbuf,0xC0-1+$src$$reg );
3668     // CALL directly to the runtime
3669     cbuf.set_insts_mark();
3670     emit_opcode(cbuf,0xE8);       // Call into runtime
3671     emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3672     // Carry on here...
3673   %}
3674 











































































































































































































































































3675   enc_class FMul_ST_reg( eRegF src1 ) %{
3676     // Operand was loaded from memory into fp ST (stack top)
3677     // FMUL   ST,$src  /* D8 C8+i */
3678     emit_opcode(cbuf, 0xD8);
3679     emit_opcode(cbuf, 0xC8 + $src1$$reg);
3680   %}
3681 
3682   enc_class FAdd_ST_reg( eRegF src2 ) %{
3683     // FADDP  ST,src2  /* D8 C0+i */
3684     emit_opcode(cbuf, 0xD8);
3685     emit_opcode(cbuf, 0xC0 + $src2$$reg);
3686     //could use FADDP  src2,fpST  /* DE C0+i */
3687   %}
3688 
3689   enc_class FAddP_reg_ST( eRegF src2 ) %{
3690     // FADDP  src2,ST  /* DE C0+i */
3691     emit_opcode(cbuf, 0xDE);
3692     emit_opcode(cbuf, 0xC0 + $src2$$reg);
3693   %}
3694

3722     emit_opcode(cbuf, 0xC0 + $src1$$reg);
3723 
3724     // FMULP  src2,ST  /* DE C8+i */
3725     emit_opcode(cbuf, 0xDE);
3726     emit_opcode(cbuf, 0xC8 + $src2$$reg);
3727   %}
3728 
3729   // Atomically load the volatile long
3730   enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{
3731     emit_opcode(cbuf,0xDF);
3732     int rm_byte_opcode = 0x05;
3733     int base     = $mem$$base;
3734     int index    = $mem$$index;
3735     int scale    = $mem$$scale;
3736     int displace = $mem$$disp;
3737     bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
3738     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_is_oop);
3739     store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp );
3740   %}
3741 




























































3742   // Volatile Store Long.  Must be atomic, so move it into
3743   // the FP TOS and then do a 64-bit FIST.  Has to probe the
3744   // target address before the store (for null-ptr checks)
3745   // so the memory operand is used twice in the encoding.
3746   enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{
3747     store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp );
3748     cbuf.set_insts_mark();            // Mark start of FIST in case $mem has an oop
3749     emit_opcode(cbuf,0xDF);
3750     int rm_byte_opcode = 0x07;
3751     int base     = $mem$$base;
3752     int index    = $mem$$index;
3753     int scale    = $mem$$scale;
3754     int displace = $mem$$disp;
3755     bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
3756     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_is_oop);
3757   %}
3758 




























































3759   // Safepoint Poll.  This polls the safepoint page, and causes an
3760   // exception if it is not readable. Unfortunately, it kills the condition code
3761   // in the process
3762   // We current use TESTL [spp],EDI
3763   // A better choice might be TESTB [spp + pagesize() - CacheLineSize()],0
3764 
3765   enc_class Safepoint_Poll() %{
3766     cbuf.relocate(cbuf.insts_mark(), relocInfo::poll_type, 0);
3767     emit_opcode(cbuf,0x85);
3768     emit_rm (cbuf, 0x0, 0x7, 0x5);
3769     emit_d32(cbuf, (intptr_t)os::get_polling_page());
3770   %}
3771 %}
3772 
3773 
3774 //----------FRAME--------------------------------------------------------------
3775 // Definition of frame structure and management information.
3776 //
3777 //  S T A C K   L A Y O U T    Allocators stack-slot number
3778 //                             |   (to get allocators register number

6303 // then store it down to the stack and reload on the int
6304 // side.
6305 instruct loadL_volatile(stackSlotL dst, memory mem) %{
6306   predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access());
6307   match(Set dst (LoadL mem));
6308 
6309   ins_cost(200);
6310   format %{ "FILD   $mem\t# Atomic volatile long load\n\t"
6311             "FISTp  $dst" %}
6312   ins_encode(enc_loadL_volatile(mem,dst));
6313   ins_pipe( fpu_reg_mem );
6314 %}
6315 
6316 instruct loadLX_volatile(stackSlotL dst, memory mem, regXD tmp) %{
6317   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
6318   match(Set dst (LoadL mem));
6319   effect(TEMP tmp);
6320   ins_cost(180);
6321   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
6322             "MOVSD  $dst,$tmp" %}
6323   ins_encode %{
6324     __ movdbl($tmp$$XMMRegister, $mem$$Address);
6325     __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister);
6326   %}
6327   ins_pipe( pipe_slow );
6328 %}
6329 
6330 instruct loadLX_reg_volatile(eRegL dst, memory mem, regXD tmp) %{
6331   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
6332   match(Set dst (LoadL mem));
6333   effect(TEMP tmp);
6334   ins_cost(160);
6335   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
6336             "MOVD   $dst.lo,$tmp\n\t"
6337             "PSRLQ  $tmp,32\n\t"
6338             "MOVD   $dst.hi,$tmp" %}
6339   ins_encode %{
6340     __ movdbl($tmp$$XMMRegister, $mem$$Address);
6341     __ movdl($dst$$Register, $tmp$$XMMRegister);
6342     __ psrlq($tmp$$XMMRegister, 32);
6343     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
6344   %}
6345   ins_pipe( pipe_slow );
6346 %}
6347 
6348 // Load Range
6349 instruct loadRange(eRegI dst, memory mem) %{
6350   match(Set dst (LoadRange mem));
6351 
6352   ins_cost(125);
6353   format %{ "MOV    $dst,$mem" %}
6354   opcode(0x8B);
6355   ins_encode( OpcP, RegMem(dst,mem));
6356   ins_pipe( ialu_reg_mem );
6357 %}
6358 
6359 
6360 // Load Pointer
6361 instruct loadP(eRegP dst, memory mem) %{
6362   match(Set dst (LoadP mem));
6363 
6364   ins_cost(125);

6382 // Load Double
6383 instruct loadD(regD dst, memory mem) %{
6384   predicate(UseSSE<=1);
6385   match(Set dst (LoadD mem));
6386 
6387   ins_cost(150);
6388   format %{ "FLD_D  ST,$mem\n\t"
6389             "FSTP   $dst" %}
6390   opcode(0xDD);               /* DD /0 */
6391   ins_encode( OpcP, RMopc_Mem(0x00,mem),
6392               Pop_Reg_D(dst) );
6393   ins_pipe( fpu_reg_mem );
6394 %}
6395 
6396 // Load Double to XMM
6397 instruct loadXD(regXD dst, memory mem) %{
6398   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
6399   match(Set dst (LoadD mem));
6400   ins_cost(145);
6401   format %{ "MOVSD  $dst,$mem" %}
6402   ins_encode %{
6403     __ movdbl ($dst$$XMMRegister, $mem$$Address);
6404   %}
6405   ins_pipe( pipe_slow );
6406 %}
6407 
6408 instruct loadXD_partial(regXD dst, memory mem) %{
6409   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
6410   match(Set dst (LoadD mem));
6411   ins_cost(145);
6412   format %{ "MOVLPD $dst,$mem" %}
6413   ins_encode %{
6414     __ movdbl ($dst$$XMMRegister, $mem$$Address);
6415   %}
6416   ins_pipe( pipe_slow );
6417 %}
6418 
6419 // Load to XMM register (single-precision floating point)
6420 // MOVSS instruction
6421 instruct loadX(regX dst, memory mem) %{
6422   predicate(UseSSE>=1);
6423   match(Set dst (LoadF mem));
6424   ins_cost(145);
6425   format %{ "MOVSS  $dst,$mem" %}
6426   ins_encode %{
6427     __ movflt ($dst$$XMMRegister, $mem$$Address);
6428   %}
6429   ins_pipe( pipe_slow );
6430 %}
6431 
6432 // Load Float
6433 instruct loadF(regF dst, memory mem) %{
6434   predicate(UseSSE==0);
6435   match(Set dst (LoadF mem));
6436 
6437   ins_cost(150);
6438   format %{ "FLD_S  ST,$mem\n\t"
6439             "FSTP   $dst" %}
6440   opcode(0xD9);               /* D9 /0 */
6441   ins_encode( OpcP, RMopc_Mem(0x00,mem),
6442               Pop_Reg_F(dst) );
6443   ins_pipe( fpu_reg_mem );
6444 %}
6445 
6446 // Load Aligned Packed Byte to XMM register
6447 instruct loadA8B(regXD dst, memory mem) %{
6448   predicate(UseSSE>=1);
6449   match(Set dst (Load8B mem));
6450   ins_cost(125);
6451   format %{ "MOVQ  $dst,$mem\t! packed8B" %}
6452   ins_encode %{
6453     __ movq($dst$$XMMRegister, $mem$$Address);
6454   %}
6455   ins_pipe( pipe_slow );
6456 %}
6457 
6458 // Load Aligned Packed Short to XMM register
6459 instruct loadA4S(regXD dst, memory mem) %{
6460   predicate(UseSSE>=1);
6461   match(Set dst (Load4S mem));
6462   ins_cost(125);
6463   format %{ "MOVQ  $dst,$mem\t! packed4S" %}
6464   ins_encode %{
6465     __ movq($dst$$XMMRegister, $mem$$Address);
6466   %}
6467   ins_pipe( pipe_slow );
6468 %}
6469 
6470 // Load Aligned Packed Char to XMM register
6471 instruct loadA4C(regXD dst, memory mem) %{
6472   predicate(UseSSE>=1);
6473   match(Set dst (Load4C mem));
6474   ins_cost(125);
6475   format %{ "MOVQ  $dst,$mem\t! packed4C" %}
6476   ins_encode %{
6477     __ movq($dst$$XMMRegister, $mem$$Address);
6478   %}
6479   ins_pipe( pipe_slow );
6480 %}
6481 
6482 // Load Aligned Packed Integer to XMM register
6483 instruct load2IU(regXD dst, memory mem) %{
6484   predicate(UseSSE>=1);
6485   match(Set dst (Load2I mem));
6486   ins_cost(125);
6487   format %{ "MOVQ  $dst,$mem\t! packed2I" %}
6488   ins_encode %{
6489     __ movq($dst$$XMMRegister, $mem$$Address);
6490   %}
6491   ins_pipe( pipe_slow );
6492 %}
6493 
6494 // Load Aligned Packed Single to XMM
6495 instruct loadA2F(regXD dst, memory mem) %{
6496   predicate(UseSSE>=1);
6497   match(Set dst (Load2F mem));
6498   ins_cost(145);
6499   format %{ "MOVQ  $dst,$mem\t! packed2F" %}
6500   ins_encode %{
6501     __ movq($dst$$XMMRegister, $mem$$Address);
6502   %}
6503   ins_pipe( pipe_slow );
6504 %}
6505 
6506 // Load Effective Address
6507 instruct leaP8(eRegP dst, indOffset8 mem) %{
6508   match(Set dst mem);
6509 
6510   ins_cost(110);
6511   format %{ "LEA    $dst,$mem" %}
6512   opcode(0x8D);
6513   ins_encode( OpcP, RegMem(dst,mem));
6514   ins_pipe( ialu_reg_reg_fat );
6515 %}
6516 
6517 instruct leaP32(eRegP dst, indOffset32 mem) %{
6518   match(Set dst mem);
6519 
6520   ins_cost(110);
6521   format %{ "LEA    $dst,$mem" %}
6522   opcode(0x8D);

6708   %}
6709   ins_pipe(fpu_reg_con);
6710 %}
6711 
6712 // The instruction usage is guarded by predicate in operand immXD().
6713 instruct loadConXD(regXD dst, immXD con) %{
6714   match(Set dst con);
6715   ins_cost(125);
6716   format %{ "MOVSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
6717   ins_encode %{
6718     __ movdbl($dst$$XMMRegister, $constantaddress($con));
6719   %}
6720   ins_pipe(pipe_slow);
6721 %}
6722 
6723 // The instruction usage is guarded by predicate in operand immXD0().
6724 instruct loadConXD0(regXD dst, immXD0 src) %{
6725   match(Set dst src);
6726   ins_cost(100);
6727   format %{ "XORPD  $dst,$dst\t# double 0.0" %}
6728   ins_encode %{
6729     __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
6730   %}
6731   ins_pipe( pipe_slow );
6732 %}
6733 
6734 // Load Stack Slot
6735 instruct loadSSI(eRegI dst, stackSlotI src) %{
6736   match(Set dst src);
6737   ins_cost(125);
6738 
6739   format %{ "MOV    $dst,$src" %}
6740   opcode(0x8B);
6741   ins_encode( OpcP, RegMem(dst,src));
6742   ins_pipe( ialu_reg_mem );
6743 %}
6744 
6745 instruct loadSSL(eRegL dst, stackSlotL src) %{
6746   match(Set dst src);
6747 
6748   ins_cost(200);
6749   format %{ "MOV    $dst,$src.lo\n\t"
6750             "MOV    $dst+4,$src.hi" %}

7012   predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access());
7013   match(Set mem (StoreL mem src));
7014   effect( KILL cr );
7015   ins_cost(400);
7016   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
7017             "FILD   $src\n\t"
7018             "FISTp  $mem\t # 64-bit atomic volatile long store" %}
7019   opcode(0x3B);
7020   ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src));
7021   ins_pipe( fpu_reg_mem );
7022 %}
7023 
7024 instruct storeLX_volatile(memory mem, stackSlotL src, regXD tmp, eFlagsReg cr) %{
7025   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
7026   match(Set mem (StoreL mem src));
7027   effect( TEMP tmp, KILL cr );
7028   ins_cost(380);
7029   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
7030             "MOVSD  $tmp,$src\n\t"
7031             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
7032   ins_encode %{
7033     __ cmpl(rax, $mem$$Address);
7034     __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp));
7035     __ movdbl($mem$$Address, $tmp$$XMMRegister);
7036   %}
7037   ins_pipe( pipe_slow );
7038 %}
7039 
7040 instruct storeLX_reg_volatile(memory mem, eRegL src, regXD tmp2, regXD tmp, eFlagsReg cr) %{
7041   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
7042   match(Set mem (StoreL mem src));
7043   effect( TEMP tmp2 , TEMP tmp, KILL cr );
7044   ins_cost(360);
7045   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
7046             "MOVD   $tmp,$src.lo\n\t"
7047             "MOVD   $tmp2,$src.hi\n\t"
7048             "PUNPCKLDQ $tmp,$tmp2\n\t"
7049             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
7050   ins_encode %{
7051     __ cmpl(rax, $mem$$Address);
7052     __ movdl($tmp$$XMMRegister, $src$$Register);
7053     __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register));
7054     __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister);
7055     __ movdbl($mem$$Address, $tmp$$XMMRegister);
7056   %}
7057   ins_pipe( pipe_slow );
7058 %}
7059 
7060 // Store Pointer; for storing unknown oops and raw pointers
7061 instruct storeP(memory mem, anyRegP src) %{
7062   match(Set mem (StoreP mem src));
7063 
7064   ins_cost(125);
7065   format %{ "MOV    $mem,$src" %}
7066   opcode(0x89);
7067   ins_encode( OpcP, RegMem( src, mem ) );
7068   ins_pipe( ialu_mem_reg );
7069 %}
7070 
7071 // Store Integer Immediate
7072 instruct storeImmI(memory mem, immI src) %{
7073   match(Set mem (StoreI mem src));
7074 
7075   ins_cost(150);
7076   format %{ "MOV    $mem,$src" %}

7103   ins_pipe( ialu_mem_imm );
7104 %}
7105 
7106 // Store Byte Immediate
7107 instruct storeImmB(memory mem, immI8 src) %{
7108   match(Set mem (StoreB mem src));
7109 
7110   ins_cost(150);
7111   format %{ "MOV8   $mem,$src" %}
7112   opcode(0xC6);               /* C6 /0 */
7113   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
7114   ins_pipe( ialu_mem_imm );
7115 %}
7116 
7117 // Store Aligned Packed Byte XMM register to memory
7118 instruct storeA8B(memory mem, regXD src) %{
7119   predicate(UseSSE>=1);
7120   match(Set mem (Store8B mem src));
7121   ins_cost(145);
7122   format %{ "MOVQ  $mem,$src\t! packed8B" %}
7123   ins_encode %{
7124     __ movq($mem$$Address, $src$$XMMRegister);
7125   %}
7126   ins_pipe( pipe_slow );
7127 %}
7128 
7129 // Store Aligned Packed Char/Short XMM register to memory
7130 instruct storeA4C(memory mem, regXD src) %{
7131   predicate(UseSSE>=1);
7132   match(Set mem (Store4C mem src));
7133   ins_cost(145);
7134   format %{ "MOVQ  $mem,$src\t! packed4C" %}
7135   ins_encode %{
7136     __ movq($mem$$Address, $src$$XMMRegister);
7137   %}
7138   ins_pipe( pipe_slow );
7139 %}
7140 
7141 // Store Aligned Packed Integer XMM register to memory
7142 instruct storeA2I(memory mem, regXD src) %{
7143   predicate(UseSSE>=1);
7144   match(Set mem (Store2I mem src));
7145   ins_cost(145);
7146   format %{ "MOVQ  $mem,$src\t! packed2I" %}
7147   ins_encode %{
7148     __ movq($mem$$Address, $src$$XMMRegister);
7149   %}
7150   ins_pipe( pipe_slow );
7151 %}
7152 
7153 // Store CMS card-mark Immediate
7154 instruct storeImmCM(memory mem, immI8 src) %{
7155   match(Set mem (StoreCM mem src));
7156 
7157   ins_cost(150);
7158   format %{ "MOV8   $mem,$src\t! CMS card-mark imm0" %}
7159   opcode(0xC6);               /* C6 /0 */
7160   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
7161   ins_pipe( ialu_mem_imm );
7162 %}
7163 
7164 // Store Double
7165 instruct storeD( memory mem, regDPR1 src) %{
7166   predicate(UseSSE<=1);
7167   match(Set mem (StoreD mem src));
7168 
7169   ins_cost(100);

7175 
7176 // Store double does rounding on x86
7177 instruct storeD_rounded( memory mem, regDPR1 src) %{
7178   predicate(UseSSE<=1);
7179   match(Set mem (StoreD mem (RoundDouble src)));
7180 
7181   ins_cost(100);
7182   format %{ "FST_D  $mem,$src\t# round" %}
7183   opcode(0xDD);       /* DD /2 */
7184   ins_encode( enc_FP_store(mem,src) );
7185   ins_pipe( fpu_mem_reg );
7186 %}
7187 
7188 // Store XMM register to memory (double-precision floating points)
7189 // MOVSD instruction
7190 instruct storeXD(memory mem, regXD src) %{
7191   predicate(UseSSE>=2);
7192   match(Set mem (StoreD mem src));
7193   ins_cost(95);
7194   format %{ "MOVSD  $mem,$src" %}
7195   ins_encode %{
7196     __ movdbl($mem$$Address, $src$$XMMRegister);
7197   %}
7198   ins_pipe( pipe_slow );
7199 %}
7200 
7201 // Store XMM register to memory (single-precision floating point)
7202 // MOVSS instruction
7203 instruct storeX(memory mem, regX src) %{
7204   predicate(UseSSE>=1);
7205   match(Set mem (StoreF mem src));
7206   ins_cost(95);
7207   format %{ "MOVSS  $mem,$src" %}
7208   ins_encode %{
7209     __ movflt($mem$$Address, $src$$XMMRegister);
7210   %}
7211   ins_pipe( pipe_slow );
7212 %}
7213 
7214 // Store Aligned Packed Single Float XMM register to memory
7215 instruct storeA2F(memory mem, regXD src) %{
7216   predicate(UseSSE>=1);
7217   match(Set mem (Store2F mem src));
7218   ins_cost(145);
7219   format %{ "MOVQ  $mem,$src\t! packed2F" %}
7220   ins_encode %{
7221     __ movq($mem$$Address, $src$$XMMRegister);
7222   %}
7223   ins_pipe( pipe_slow );
7224 %}
7225 
7226 // Store Float
7227 instruct storeF( memory mem, regFPR1 src) %{
7228   predicate(UseSSE==0);
7229   match(Set mem (StoreF mem src));
7230 
7231   ins_cost(100);
7232   format %{ "FST_S  $mem,$src" %}
7233   opcode(0xD9);       /* D9 /2 */
7234   ins_encode( enc_FP_store(mem,src) );
7235   ins_pipe( fpu_mem_reg );
7236 %}
7237 
7238 // Store Float does rounding on x86
7239 instruct storeF_rounded( memory mem, regFPR1 src) %{
7240   predicate(UseSSE==0);
7241   match(Set mem (StoreF mem (RoundFloat src)));
7242

7912   match(Set dst (CastII dst));
7913   format %{ "#castII of $dst" %}
7914   ins_encode( /*empty encoding*/ );
7915   ins_cost(0);
7916   ins_pipe( empty );
7917 %}
7918 
7919 
7920 // Load-locked - same as a regular pointer load when used with compare-swap
7921 instruct loadPLocked(eRegP dst, memory mem) %{
7922   match(Set dst (LoadPLocked mem));
7923 
7924   ins_cost(125);
7925   format %{ "MOV    $dst,$mem\t# Load ptr. locked" %}
7926   opcode(0x8B);
7927   ins_encode( OpcP, RegMem(dst,mem));
7928   ins_pipe( ialu_reg_mem );
7929 %}
7930 
7931 // LoadLong-locked - same as a volatile long load when used with compare-swap
7932 instruct loadLLocked(stackSlotL dst, memory mem) %{
7933   predicate(UseSSE<=1);
7934   match(Set dst (LoadLLocked mem));
7935 
7936   ins_cost(200);
7937   format %{ "FILD   $mem\t# Atomic volatile long load\n\t"
7938             "FISTp  $dst" %}
7939   ins_encode(enc_loadL_volatile(mem,dst));
7940   ins_pipe( fpu_reg_mem );
7941 %}
7942 
7943 instruct loadLX_Locked(stackSlotL dst, memory mem, regXD tmp) %{
7944   predicate(UseSSE>=2);
7945   match(Set dst (LoadLLocked mem));
7946   effect(TEMP tmp);
7947   ins_cost(180);
7948   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
7949             "MOVSD  $dst,$tmp" %}
7950   ins_encode %{
7951     __ movdbl($tmp$$XMMRegister, $mem$$Address);
7952     __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister);
7953   %}
7954   ins_pipe( pipe_slow );
7955 %}
7956 
7957 instruct loadLX_reg_Locked(eRegL dst, memory mem, regXD tmp) %{
7958   predicate(UseSSE>=2);
7959   match(Set dst (LoadLLocked mem));
7960   effect(TEMP tmp);
7961   ins_cost(160);
7962   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
7963             "MOVD   $dst.lo,$tmp\n\t"
7964             "PSRLQ  $tmp,32\n\t"
7965             "MOVD   $dst.hi,$tmp" %}
7966   ins_encode %{
7967     __ movdbl($tmp$$XMMRegister, $mem$$Address);
7968     __ movdl($dst$$Register, $tmp$$XMMRegister);
7969     __ psrlq($tmp$$XMMRegister, 32);
7970     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
7971   %}
7972   ins_pipe( pipe_slow );
7973 %}
7974 
7975 // Conditional-store of the updated heap-top.
7976 // Used during allocation of the shared heap.
7977 // Sets flags (EQ) on success.  Implemented with a CMPXCHG on Intel.
7978 instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{
7979   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
7980   // EAX is killed if there is contention, but then it's also unused.
7981   // In the common case of no contention, EAX holds the new oop address.
7982   format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %}
7983   ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) );
7984   ins_pipe( pipe_cmpxchg );
7985 %}
7986 
7987 // Conditional-store of an int value.
7988 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG on Intel.
7989 instruct storeIConditional( memory mem, eAXRegI oldval, eRegI newval, eFlagsReg cr ) %{
7990   match(Set cr (StoreIConditional mem (Binary oldval newval)));
7991   effect(KILL oldval);

9613               OpcS, OpcP, PopFPU,
9614               CmpF_Result(dst));
9615   ins_pipe( pipe_slow );
9616 %}
9617 
9618 // Compare into -1,0,1
9619 instruct cmpD_reg(eRegI dst, regD src1, regD src2, eAXRegI rax, eFlagsReg cr) %{
9620   predicate(UseSSE<=1);
9621   match(Set dst (CmpD3 src1 src2));
9622   effect(KILL cr, KILL rax);
9623   ins_cost(300);
9624   format %{ "FCMPD  $dst,$src1,$src2" %}
9625   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9626   ins_encode( Push_Reg_D(src1),
9627               OpcP, RegOpc(src2),
9628               CmpF_Result(dst));
9629   ins_pipe( pipe_slow );
9630 %}
9631 
9632 // float compare and set condition codes in EFLAGS by XMM regs
9633 instruct cmpXD_cc(eFlagsRegU cr, regXD src1, regXD src2) %{
9634   predicate(UseSSE>=2);
9635   match(Set cr (CmpD src1 src2));
9636   ins_cost(145);
9637   format %{ "UCOMISD $src1,$src2\n\t"
9638             "JNP,s   exit\n\t"
9639             "PUSHF\t# saw NaN, set CF\n\t"
9640             "AND     [rsp], #0xffffff2b\n\t"
9641             "POPF\n"
9642     "exit:" %}
9643   ins_encode %{
9644     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9645     emit_cmpfp_fixup(_masm);
9646   %}
9647   ins_pipe( pipe_slow );
9648 %}
9649 
9650 instruct cmpXD_ccCF(eFlagsRegUCF cr, regXD src1, regXD src2) %{
9651   predicate(UseSSE>=2);
9652   match(Set cr (CmpD src1 src2));
9653   ins_cost(100);
9654   format %{ "UCOMISD $src1,$src2" %}
9655   ins_encode %{
9656     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9657   %}
9658   ins_pipe( pipe_slow );
9659 %}
9660 
9661 // float compare and set condition codes in EFLAGS by XMM regs
9662 instruct cmpXD_ccmem(eFlagsRegU cr, regXD src1, memory src2) %{
9663   predicate(UseSSE>=2);
9664   match(Set cr (CmpD src1 (LoadD src2)));

9665   ins_cost(145);
9666   format %{ "UCOMISD $src1,$src2\n\t"
9667             "JNP,s   exit\n\t"
9668             "PUSHF\t# saw NaN, set CF\n\t"
9669             "AND     [rsp], #0xffffff2b\n\t"
9670             "POPF\n"
9671     "exit:" %}
9672   ins_encode %{
9673     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9674     emit_cmpfp_fixup(_masm);
9675   %}
9676   ins_pipe( pipe_slow );
9677 %}
9678 
9679 instruct cmpXD_ccmemCF(eFlagsRegUCF cr, regXD src1, memory src2) %{
9680   predicate(UseSSE>=2);
9681   match(Set cr (CmpD src1 (LoadD src2)));
9682   ins_cost(100);
9683   format %{ "UCOMISD $src1,$src2" %}
9684   ins_encode %{
9685     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9686   %}
9687   ins_pipe( pipe_slow );
9688 %}
9689 
9690 // Compare into -1,0,1 in XMM
9691 instruct cmpXD_reg(xRegI dst, regXD src1, regXD src2, eFlagsReg cr) %{
9692   predicate(UseSSE>=2);
9693   match(Set dst (CmpD3 src1 src2));
9694   effect(KILL cr);
9695   ins_cost(255);
9696   format %{ "UCOMISD $src1, $src2\n\t"
9697             "MOV     $dst, #-1\n\t"
9698             "JP,s    done\n\t"
9699             "JB,s    done\n\t"
9700             "SETNE   $dst\n\t"
9701             "MOVZB   $dst, $dst\n"
9702     "done:" %}
9703   ins_encode %{
9704     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9705     emit_cmpfp3(_masm, $dst$$Register);
9706   %}



9707   ins_pipe( pipe_slow );
9708 %}
9709 
9710 // Compare into -1,0,1 in XMM and memory
9711 instruct cmpXD_regmem(xRegI dst, regXD src1, memory src2, eFlagsReg cr) %{
9712   predicate(UseSSE>=2);
9713   match(Set dst (CmpD3 src1 (LoadD src2)));
9714   effect(KILL cr);
9715   ins_cost(275);
9716   format %{ "UCOMISD $src1, $src2\n\t"
9717             "MOV     $dst, #-1\n\t"
9718             "JP,s    done\n\t"
9719             "JB,s    done\n\t"
9720             "SETNE   $dst\n\t"
9721             "MOVZB   $dst, $dst\n"
9722     "done:" %}
9723   ins_encode %{
9724     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9725     emit_cmpfp3(_masm, $dst$$Register);
9726   %}



9727   ins_pipe( pipe_slow );
9728 %}
9729 
9730 
9731 instruct subD_reg(regD dst, regD src) %{
9732   predicate (UseSSE <=1);
9733   match(Set dst (SubD dst src));
9734 
9735   format %{ "FLD    $src\n\t"
9736             "DSUBp  $dst,ST" %}
9737   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
9738   ins_cost(150);
9739   ins_encode( Push_Reg_D(src),
9740               OpcP, RegOpc(dst) );
9741   ins_pipe( fpu_reg_reg );
9742 %}
9743 
9744 instruct subD_reg_round(stackSlotD dst, regD src1, regD src2) %{
9745   predicate (UseSSE <=1);
9746   match(Set dst (RoundDouble (SubD src1 src2)));

9765             "DSUBp  $dst,ST" %}
9766   opcode(0xDE, 0x5, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
9767   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9768               OpcP, RegOpc(dst) );
9769   ins_pipe( fpu_reg_mem );
9770 %}
9771 
9772 instruct absD_reg(regDPR1 dst, regDPR1 src) %{
9773   predicate (UseSSE<=1);
9774   match(Set dst (AbsD src));
9775   ins_cost(100);
9776   format %{ "FABS" %}
9777   opcode(0xE1, 0xD9);
9778   ins_encode( OpcS, OpcP );
9779   ins_pipe( fpu_reg_reg );
9780 %}
9781 
9782 instruct absXD_reg( regXD dst ) %{
9783   predicate(UseSSE>=2);
9784   match(Set dst (AbsD dst));
9785   ins_cost(150);
9786   format %{ "ANDPD  $dst,[0x7FFFFFFFFFFFFFFF]\t# ABS D by sign masking" %}
9787   ins_encode %{
9788     __ andpd($dst$$XMMRegister,
9789              ExternalAddress((address)double_signmask_pool));
9790   %}
9791   ins_pipe( pipe_slow );
9792 %}
9793 
9794 instruct negD_reg(regDPR1 dst, regDPR1 src) %{
9795   predicate(UseSSE<=1);
9796   match(Set dst (NegD src));
9797   ins_cost(100);
9798   format %{ "FCHS" %}
9799   opcode(0xE0, 0xD9);
9800   ins_encode( OpcS, OpcP );
9801   ins_pipe( fpu_reg_reg );
9802 %}
9803 
9804 instruct negXD_reg( regXD dst ) %{
9805   predicate(UseSSE>=2);
9806   match(Set dst (NegD dst));
9807   ins_cost(150);
9808   format %{ "XORPD  $dst,[0x8000000000000000]\t# CHS D by sign flipping" %}
9809   ins_encode %{
9810     __ xorpd($dst$$XMMRegister,
9811              ExternalAddress((address)double_signflip_pool));
9812   %}
9813   ins_pipe( pipe_slow );
9814 %}
9815 
9816 instruct addD_reg(regD dst, regD src) %{
9817   predicate(UseSSE<=1);
9818   match(Set dst (AddD dst src));
9819   format %{ "FLD    $src\n\t"
9820             "DADD   $dst,ST" %}
9821   size(4);
9822   ins_cost(150);
9823   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
9824   ins_encode( Push_Reg_D(src),
9825               OpcP, RegOpc(dst) );
9826   ins_pipe( fpu_reg_reg );
9827 %}

9901 instruct addD_reg_imm_round(stackSlotD dst, regD src, immD con) %{
9902   predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 );
9903   match(Set dst (RoundDouble (AddD src con)));
9904   ins_cost(200);
9905   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9906             "DADD   ST,$src\n\t"
9907             "FSTP_D $dst\t# D-round" %}
9908   ins_encode %{
9909     __ fld_d($constantaddress($con));
9910     __ fadd($src$$reg);
9911     __ fstp_d(Address(rsp, $dst$$disp));
9912   %}
9913   ins_pipe(fpu_mem_reg_con);
9914 %}
9915 
9916 // Add two double precision floating point values in xmm
9917 instruct addXD_reg(regXD dst, regXD src) %{
9918   predicate(UseSSE>=2);
9919   match(Set dst (AddD dst src));
9920   format %{ "ADDSD  $dst,$src" %}
9921   ins_encode %{
9922     __ addsd($dst$$XMMRegister, $src$$XMMRegister);
9923   %}
9924   ins_pipe( pipe_slow );
9925 %}
9926 
9927 instruct addXD_imm(regXD dst, immXD con) %{
9928   predicate(UseSSE>=2);
9929   match(Set dst (AddD dst con));
9930   format %{ "ADDSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
9931   ins_encode %{
9932     __ addsd($dst$$XMMRegister, $constantaddress($con));
9933   %}
9934   ins_pipe(pipe_slow);
9935 %}
9936 
9937 instruct addXD_mem(regXD dst, memory mem) %{
9938   predicate(UseSSE>=2);
9939   match(Set dst (AddD dst (LoadD mem)));
9940   format %{ "ADDSD  $dst,$mem" %}
9941   ins_encode %{
9942     __ addsd($dst$$XMMRegister, $mem$$Address);
9943   %}
9944   ins_pipe( pipe_slow );
9945 %}
9946 
9947 // Sub two double precision floating point values in xmm
9948 instruct subXD_reg(regXD dst, regXD src) %{
9949   predicate(UseSSE>=2);
9950   match(Set dst (SubD dst src));
9951   ins_cost(150);
9952   format %{ "SUBSD  $dst,$src" %}
9953   ins_encode %{
9954     __ subsd($dst$$XMMRegister, $src$$XMMRegister);
9955   %}
9956   ins_pipe( pipe_slow );
9957 %}
9958 
9959 instruct subXD_imm(regXD dst, immXD con) %{
9960   predicate(UseSSE>=2);
9961   match(Set dst (SubD dst con));
9962   ins_cost(150);
9963   format %{ "SUBSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
9964   ins_encode %{
9965     __ subsd($dst$$XMMRegister, $constantaddress($con));
9966   %}
9967   ins_pipe(pipe_slow);
9968 %}
9969 
9970 instruct subXD_mem(regXD dst, memory mem) %{
9971   predicate(UseSSE>=2);
9972   match(Set dst (SubD dst (LoadD mem)));
9973   ins_cost(150);
9974   format %{ "SUBSD  $dst,$mem" %}
9975   ins_encode %{
9976     __ subsd($dst$$XMMRegister, $mem$$Address);
9977   %}
9978   ins_pipe( pipe_slow );
9979 %}
9980 
9981 // Mul two double precision floating point values in xmm
9982 instruct mulXD_reg(regXD dst, regXD src) %{
9983   predicate(UseSSE>=2);
9984   match(Set dst (MulD dst src));
9985   format %{ "MULSD  $dst,$src" %}
9986   ins_encode %{
9987     __ mulsd($dst$$XMMRegister, $src$$XMMRegister);
9988   %}
9989   ins_pipe( pipe_slow );
9990 %}
9991 
9992 instruct mulXD_imm(regXD dst, immXD con) %{
9993   predicate(UseSSE>=2);
9994   match(Set dst (MulD dst con));
9995   format %{ "MULSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
9996   ins_encode %{
9997     __ mulsd($dst$$XMMRegister, $constantaddress($con));
9998   %}
9999   ins_pipe(pipe_slow);
10000 %}
10001 
10002 instruct mulXD_mem(regXD dst, memory mem) %{
10003   predicate(UseSSE>=2);
10004   match(Set dst (MulD dst (LoadD mem)));
10005   format %{ "MULSD  $dst,$mem" %}
10006   ins_encode %{
10007     __ mulsd($dst$$XMMRegister, $mem$$Address);
10008   %}
10009   ins_pipe( pipe_slow );
10010 %}
10011 
10012 // Div two double precision floating point values in xmm
10013 instruct divXD_reg(regXD dst, regXD src) %{
10014   predicate(UseSSE>=2);
10015   match(Set dst (DivD dst src));
10016   format %{ "DIVSD  $dst,$src" %}
10017   opcode(0xF2, 0x0F, 0x5E);
10018   ins_encode %{
10019     __ divsd($dst$$XMMRegister, $src$$XMMRegister);
10020   %}
10021   ins_pipe( pipe_slow );
10022 %}
10023 
10024 instruct divXD_imm(regXD dst, immXD con) %{
10025   predicate(UseSSE>=2);
10026   match(Set dst (DivD dst con));
10027   format %{ "DIVSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
10028   ins_encode %{
10029     __ divsd($dst$$XMMRegister, $constantaddress($con));
10030   %}
10031   ins_pipe(pipe_slow);
10032 %}
10033 
10034 instruct divXD_mem(regXD dst, memory mem) %{
10035   predicate(UseSSE>=2);
10036   match(Set dst (DivD dst (LoadD mem)));
10037   format %{ "DIVSD  $dst,$mem" %}
10038   ins_encode %{
10039     __ divsd($dst$$XMMRegister, $mem$$Address);
10040   %}
10041   ins_pipe( pipe_slow );
10042 %}
10043 
10044 
10045 instruct mulD_reg(regD dst, regD src) %{
10046   predicate(UseSSE<=1);
10047   match(Set dst (MulD dst src));
10048   format %{ "FLD    $src\n\t"
10049             "DMULp  $dst,ST" %}
10050   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
10051   ins_cost(150);
10052   ins_encode( Push_Reg_D(src),
10053               OpcP, RegOpc(dst) );
10054   ins_pipe( fpu_reg_reg );
10055 %}
10056 
10057 // Strict FP instruction biases argument before multiply then
10058 // biases result to avoid double rounding of subnormals.
10059 //
10060 // scale arg1 by multiplying arg1 by 2^(-15360)

10652               OpcS, OpcP, PopFPU,
10653               CmpF_Result(dst));
10654   ins_pipe( pipe_slow );
10655 %}
10656 
10657 // Compare into -1,0,1
10658 instruct cmpF_reg(eRegI dst, regF src1, regF src2, eAXRegI rax, eFlagsReg cr) %{
10659   predicate(UseSSE == 0);
10660   match(Set dst (CmpF3 src1 src2));
10661   effect(KILL cr, KILL rax);
10662   ins_cost(300);
10663   format %{ "FCMPF  $dst,$src1,$src2" %}
10664   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10665   ins_encode( Push_Reg_D(src1),
10666               OpcP, RegOpc(src2),
10667               CmpF_Result(dst));
10668   ins_pipe( pipe_slow );
10669 %}
10670 
10671 // float compare and set condition codes in EFLAGS by XMM regs
10672 instruct cmpX_cc(eFlagsRegU cr, regX src1, regX src2) %{
10673   predicate(UseSSE>=1);
10674   match(Set cr (CmpF src1 src2));

10675   ins_cost(145);
10676   format %{ "UCOMISS $src1,$src2\n\t"
10677             "JNP,s   exit\n\t"
10678             "PUSHF\t# saw NaN, set CF\n\t"
10679             "AND     [rsp], #0xffffff2b\n\t"
10680             "POPF\n"
10681     "exit:" %}
10682   ins_encode %{
10683     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10684     emit_cmpfp_fixup(_masm);
10685   %}
10686   ins_pipe( pipe_slow );
10687 %}
10688 
10689 instruct cmpX_ccCF(eFlagsRegUCF cr, regX src1, regX src2) %{
10690   predicate(UseSSE>=1);
10691   match(Set cr (CmpF src1 src2));
10692   ins_cost(100);
10693   format %{ "UCOMISS $src1,$src2" %}
10694   ins_encode %{
10695     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10696   %}
10697   ins_pipe( pipe_slow );
10698 %}
10699 
10700 // float compare and set condition codes in EFLAGS by XMM regs
10701 instruct cmpX_ccmem(eFlagsRegU cr, regX src1, memory src2) %{
10702   predicate(UseSSE>=1);
10703   match(Set cr (CmpF src1 (LoadF src2)));

10704   ins_cost(165);
10705   format %{ "UCOMISS $src1,$src2\n\t"
10706             "JNP,s   exit\n\t"
10707             "PUSHF\t# saw NaN, set CF\n\t"
10708             "AND     [rsp], #0xffffff2b\n\t"
10709             "POPF\n"
10710     "exit:" %}
10711   ins_encode %{
10712     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10713     emit_cmpfp_fixup(_masm);
10714   %}
10715   ins_pipe( pipe_slow );
10716 %}
10717 
10718 instruct cmpX_ccmemCF(eFlagsRegUCF cr, regX src1, memory src2) %{
10719   predicate(UseSSE>=1);
10720   match(Set cr (CmpF src1 (LoadF src2)));
10721   ins_cost(100);
10722   format %{ "UCOMISS $src1,$src2" %}
10723   ins_encode %{
10724     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10725   %}
10726   ins_pipe( pipe_slow );
10727 %}
10728 
10729 // Compare into -1,0,1 in XMM
10730 instruct cmpX_reg(xRegI dst, regX src1, regX src2, eFlagsReg cr) %{
10731   predicate(UseSSE>=1);
10732   match(Set dst (CmpF3 src1 src2));
10733   effect(KILL cr);
10734   ins_cost(255);
10735   format %{ "UCOMISS $src1, $src2\n\t"
10736             "MOV     $dst, #-1\n\t"
10737             "JP,s    done\n\t"
10738             "JB,s    done\n\t"
10739             "SETNE   $dst\n\t"
10740             "MOVZB   $dst, $dst\n"
10741     "done:" %}
10742   ins_encode %{
10743     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10744     emit_cmpfp3(_masm, $dst$$Register);
10745   %}


10746   ins_pipe( pipe_slow );
10747 %}
10748 
10749 // Compare into -1,0,1 in XMM and memory
10750 instruct cmpX_regmem(xRegI dst, regX src1, memory src2, eFlagsReg cr) %{
10751   predicate(UseSSE>=1);
10752   match(Set dst (CmpF3 src1 (LoadF src2)));
10753   effect(KILL cr);
10754   ins_cost(275);
10755   format %{ "UCOMISS $src1, $src2\n\t"
10756             "MOV     $dst, #-1\n\t"
10757             "JP,s    done\n\t"
10758             "JB,s    done\n\t"
10759             "SETNE   $dst\n\t"
10760             "MOVZB   $dst, $dst\n"
10761     "done:" %}
10762   ins_encode %{
10763     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10764     emit_cmpfp3(_masm, $dst$$Register);
10765   %}


10766   ins_pipe( pipe_slow );
10767 %}
10768 
10769 // Spill to obtain 24-bit precision
10770 instruct subF24_reg(stackSlotF dst, regF src1, regF src2) %{
10771   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10772   match(Set dst (SubF src1 src2));
10773 
10774   format %{ "FSUB   $dst,$src1 - $src2" %}
10775   opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */
10776   ins_encode( Push_Reg_F(src1),
10777               OpcReg_F(src2),
10778               Pop_Mem_F(dst) );
10779   ins_pipe( fpu_mem_reg_reg );
10780 %}
10781 //
10782 // This instruction does not round to 24-bits
10783 instruct subF_reg(regF dst, regF src) %{
10784   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10785   match(Set dst (SubF dst src));

10805 %}
10806 //
10807 // This instruction does not round to 24-bits
10808 instruct addF_reg(regF dst, regF src) %{
10809   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10810   match(Set dst (AddF dst src));
10811 
10812   format %{ "FLD    $src\n\t"
10813             "FADDp  $dst,ST" %}
10814   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
10815   ins_encode( Push_Reg_F(src),
10816               OpcP, RegOpc(dst) );
10817   ins_pipe( fpu_reg_reg );
10818 %}
10819 
10820 // Add two single precision floating point values in xmm
10821 instruct addX_reg(regX dst, regX src) %{
10822   predicate(UseSSE>=1);
10823   match(Set dst (AddF dst src));
10824   format %{ "ADDSS  $dst,$src" %}
10825   ins_encode %{
10826     __ addss($dst$$XMMRegister, $src$$XMMRegister);
10827   %}
10828   ins_pipe( pipe_slow );
10829 %}
10830 
10831 instruct addX_imm(regX dst, immXF con) %{
10832   predicate(UseSSE>=1);
10833   match(Set dst (AddF dst con));
10834   format %{ "ADDSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
10835   ins_encode %{
10836     __ addss($dst$$XMMRegister, $constantaddress($con));
10837   %}
10838   ins_pipe(pipe_slow);
10839 %}
10840 
10841 instruct addX_mem(regX dst, memory mem) %{
10842   predicate(UseSSE>=1);
10843   match(Set dst (AddF dst (LoadF mem)));
10844   format %{ "ADDSS  $dst,$mem" %}
10845   ins_encode %{
10846     __ addss($dst$$XMMRegister, $mem$$Address);
10847   %}
10848   ins_pipe( pipe_slow );
10849 %}
10850 
10851 // Subtract two single precision floating point values in xmm
10852 instruct subX_reg(regX dst, regX src) %{
10853   predicate(UseSSE>=1);
10854   match(Set dst (SubF dst src));
10855   ins_cost(150);
10856   format %{ "SUBSS  $dst,$src" %}
10857   ins_encode %{
10858     __ subss($dst$$XMMRegister, $src$$XMMRegister);
10859   %}
10860   ins_pipe( pipe_slow );
10861 %}
10862 
10863 instruct subX_imm(regX dst, immXF con) %{
10864   predicate(UseSSE>=1);
10865   match(Set dst (SubF dst con));
10866   ins_cost(150);
10867   format %{ "SUBSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
10868   ins_encode %{
10869     __ subss($dst$$XMMRegister, $constantaddress($con));
10870   %}
10871   ins_pipe(pipe_slow);
10872 %}
10873 
10874 instruct subX_mem(regX dst, memory mem) %{
10875   predicate(UseSSE>=1);
10876   match(Set dst (SubF dst (LoadF mem)));
10877   ins_cost(150);
10878   format %{ "SUBSS  $dst,$mem" %}
10879   ins_encode %{
10880     __ subss($dst$$XMMRegister, $mem$$Address);
10881   %}
10882   ins_pipe( pipe_slow );
10883 %}
10884 
10885 // Multiply two single precision floating point values in xmm
10886 instruct mulX_reg(regX dst, regX src) %{
10887   predicate(UseSSE>=1);
10888   match(Set dst (MulF dst src));
10889   format %{ "MULSS  $dst,$src" %}
10890   ins_encode %{
10891     __ mulss($dst$$XMMRegister, $src$$XMMRegister);
10892   %}
10893   ins_pipe( pipe_slow );
10894 %}
10895 
10896 instruct mulX_imm(regX dst, immXF con) %{
10897   predicate(UseSSE>=1);
10898   match(Set dst (MulF dst con));
10899   format %{ "MULSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
10900   ins_encode %{
10901     __ mulss($dst$$XMMRegister, $constantaddress($con));
10902   %}
10903   ins_pipe(pipe_slow);
10904 %}
10905 
10906 instruct mulX_mem(regX dst, memory mem) %{
10907   predicate(UseSSE>=1);
10908   match(Set dst (MulF dst (LoadF mem)));
10909   format %{ "MULSS  $dst,$mem" %}
10910   ins_encode %{
10911     __ mulss($dst$$XMMRegister, $mem$$Address);
10912   %}
10913   ins_pipe( pipe_slow );
10914 %}
10915 
10916 // Divide two single precision floating point values in xmm
10917 instruct divX_reg(regX dst, regX src) %{
10918   predicate(UseSSE>=1);
10919   match(Set dst (DivF dst src));
10920   format %{ "DIVSS  $dst,$src" %}
10921   ins_encode %{
10922     __ divss($dst$$XMMRegister, $src$$XMMRegister);
10923   %}
10924   ins_pipe( pipe_slow );
10925 %}
10926 
10927 instruct divX_imm(regX dst, immXF con) %{
10928   predicate(UseSSE>=1);
10929   match(Set dst (DivF dst con));
10930   format %{ "DIVSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
10931   ins_encode %{
10932     __ divss($dst$$XMMRegister, $constantaddress($con));
10933   %}
10934   ins_pipe(pipe_slow);
10935 %}
10936 
10937 instruct divX_mem(regX dst, memory mem) %{
10938   predicate(UseSSE>=1);
10939   match(Set dst (DivF dst (LoadF mem)));
10940   format %{ "DIVSS  $dst,$mem" %}
10941   ins_encode %{
10942     __ divss($dst$$XMMRegister, $mem$$Address);
10943   %}
10944   ins_pipe( pipe_slow );
10945 %}
10946 
10947 // Get the square root of a single precision floating point values in xmm
10948 instruct sqrtX_reg(regX dst, regX src) %{
10949   predicate(UseSSE>=1);
10950   match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
10951   ins_cost(150);
10952   format %{ "SQRTSS $dst,$src" %}
10953   ins_encode %{
10954     __ sqrtss($dst$$XMMRegister, $src$$XMMRegister);
10955   %}
10956   ins_pipe( pipe_slow );
10957 %}
10958 
10959 instruct sqrtX_mem(regX dst, memory mem) %{
10960   predicate(UseSSE>=1);
10961   match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF mem)))));
10962   ins_cost(150);
10963   format %{ "SQRTSS $dst,$mem" %}
10964   ins_encode %{
10965     __ sqrtss($dst$$XMMRegister, $mem$$Address);
10966   %}
10967   ins_pipe( pipe_slow );
10968 %}
10969 
10970 // Get the square root of a double precision floating point values in xmm
10971 instruct sqrtXD_reg(regXD dst, regXD src) %{
10972   predicate(UseSSE>=2);
10973   match(Set dst (SqrtD src));
10974   ins_cost(150);
10975   format %{ "SQRTSD $dst,$src" %}
10976   ins_encode %{
10977     __ sqrtsd($dst$$XMMRegister, $src$$XMMRegister);
10978   %}
10979   ins_pipe( pipe_slow );
10980 %}
10981 
10982 instruct sqrtXD_mem(regXD dst, memory mem) %{
10983   predicate(UseSSE>=2);
10984   match(Set dst (SqrtD (LoadD mem)));
10985   ins_cost(150);
10986   format %{ "SQRTSD $dst,$mem" %}
10987   ins_encode %{
10988     __ sqrtsd($dst$$XMMRegister, $mem$$Address);
10989   %}
10990   ins_pipe( pipe_slow );
10991 %}
10992 
10993 instruct absF_reg(regFPR1 dst, regFPR1 src) %{
10994   predicate(UseSSE==0);
10995   match(Set dst (AbsF src));
10996   ins_cost(100);
10997   format %{ "FABS" %}
10998   opcode(0xE1, 0xD9);
10999   ins_encode( OpcS, OpcP );
11000   ins_pipe( fpu_reg_reg );
11001 %}
11002 
11003 instruct absX_reg(regX dst ) %{
11004   predicate(UseSSE>=1);
11005   match(Set dst (AbsF dst));
11006   ins_cost(150);
11007   format %{ "ANDPS  $dst,[0x7FFFFFFF]\t# ABS F by sign masking" %}
11008   ins_encode %{
11009     __ andps($dst$$XMMRegister,
11010              ExternalAddress((address)float_signmask_pool));
11011   %}
11012   ins_pipe( pipe_slow );
11013 %}
11014 
11015 instruct negF_reg(regFPR1 dst, regFPR1 src) %{
11016   predicate(UseSSE==0);
11017   match(Set dst (NegF src));
11018   ins_cost(100);
11019   format %{ "FCHS" %}
11020   opcode(0xE0, 0xD9);
11021   ins_encode( OpcS, OpcP );
11022   ins_pipe( fpu_reg_reg );
11023 %}
11024 
11025 instruct negX_reg( regX dst ) %{
11026   predicate(UseSSE>=1);
11027   match(Set dst (NegF dst));
11028   ins_cost(150);
11029   format %{ "XORPS  $dst,[0x80000000]\t# CHS F by sign flipping" %}
11030   ins_encode %{
11031     __ xorps($dst$$XMMRegister,
11032              ExternalAddress((address)float_signflip_pool));
11033   %}
11034   ins_pipe( pipe_slow );
11035 %}
11036 
11037 // Cisc-alternate to addF_reg
11038 // Spill to obtain 24-bit precision
11039 instruct addF24_reg_mem(stackSlotF dst, regF src1, memory src2) %{
11040   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11041   match(Set dst (AddF src1 (LoadF src2)));
11042 
11043   format %{ "FLD    $src2\n\t"
11044             "FADD   ST,$src1\n\t"
11045             "FSTP_S $dst" %}
11046   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
11047   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
11048               OpcReg_F(src1),
11049               Pop_Mem_F(dst) );
11050   ins_pipe( fpu_mem_reg_mem );
11051 %}
11052 //
11053 // Cisc-alternate to addF_reg

11419 
11420 // Force rounding to 24-bit precision and 6-bit exponent
11421 instruct convD2F_reg(stackSlotF dst, regD src) %{
11422   predicate(UseSSE==0);
11423   match(Set dst (ConvD2F src));
11424   format %{ "FST_S  $dst,$src\t# F-round" %}
11425   expand %{
11426     roundFloat_mem_reg(dst,src);
11427   %}
11428 %}
11429 
11430 // Force rounding to 24-bit precision and 6-bit exponent
11431 instruct convD2X_reg(regX dst, regD src, eFlagsReg cr) %{
11432   predicate(UseSSE==1);
11433   match(Set dst (ConvD2F src));
11434   effect( KILL cr );
11435   format %{ "SUB    ESP,4\n\t"
11436             "FST_S  [ESP],$src\t# F-round\n\t"
11437             "MOVSS  $dst,[ESP]\n\t"
11438             "ADD ESP,4" %}
11439   ins_encode %{
11440     __ subptr(rsp, 4);
11441     if ($src$$reg != FPR1L_enc) {
11442       __ fld_s($src$$reg-1);
11443       __ fstp_s(Address(rsp, 0));
11444     } else {
11445       __ fst_s(Address(rsp, 0));
11446     }
11447     __ movflt($dst$$XMMRegister, Address(rsp, 0));
11448     __ addptr(rsp, 4);
11449   %}
11450   ins_pipe( pipe_slow );
11451 %}
11452 
11453 // Force rounding double precision to single precision
11454 instruct convXD2X_reg(regX dst, regXD src) %{
11455   predicate(UseSSE>=2);
11456   match(Set dst (ConvD2F src));
11457   format %{ "CVTSD2SS $dst,$src\t# F-round" %}
11458   ins_encode %{
11459     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
11460   %}
11461   ins_pipe( pipe_slow );
11462 %}
11463 
11464 instruct convF2D_reg_reg(regD dst, regF src) %{
11465   predicate(UseSSE==0);
11466   match(Set dst (ConvF2D src));
11467   format %{ "FST_S  $dst,$src\t# D-round" %}
11468   ins_encode( Pop_Reg_Reg_D(dst, src));
11469   ins_pipe( fpu_reg_reg );
11470 %}
11471 
11472 instruct convF2D_reg(stackSlotD dst, regF src) %{
11473   predicate(UseSSE==1);
11474   match(Set dst (ConvF2D src));
11475   format %{ "FST_D  $dst,$src\t# D-round" %}
11476   expand %{
11477     roundDouble_mem_reg(dst,src);
11478   %}
11479 %}
11480 
11481 instruct convX2D_reg(regD dst, regX src, eFlagsReg cr) %{
11482   predicate(UseSSE==1);
11483   match(Set dst (ConvF2D src));
11484   effect( KILL cr );
11485   format %{ "SUB    ESP,4\n\t"
11486             "MOVSS  [ESP] $src\n\t"
11487             "FLD_S  [ESP]\n\t"
11488             "ADD    ESP,4\n\t"
11489             "FSTP   $dst\t# D-round" %}
11490   ins_encode %{
11491     __ subptr(rsp, 4);
11492     __ movflt(Address(rsp, 0), $src$$XMMRegister);
11493     __ fld_s(Address(rsp, 0));
11494     __ addptr(rsp, 4);
11495     __ fstp_d($dst$$reg);
11496   %}
11497   ins_pipe( pipe_slow );
11498 %}
11499 
11500 instruct convX2XD_reg(regXD dst, regX src) %{
11501   predicate(UseSSE>=2);
11502   match(Set dst (ConvF2D src));
11503   format %{ "CVTSS2SD $dst,$src\t# D-round" %}
11504   ins_encode %{
11505     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
11506   %}
11507   ins_pipe( pipe_slow );
11508 %}
11509 
11510 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
11511 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
11512   predicate(UseSSE<=1);
11513   match(Set dst (ConvD2I src));
11514   effect( KILL tmp, KILL cr );
11515   format %{ "FLD    $src\t# Convert double to int \n\t"
11516             "FLDCW  trunc mode\n\t"
11517             "SUB    ESP,4\n\t"
11518             "FISTp  [ESP + #0]\n\t"
11519             "FLDCW  std/24-bit mode\n\t"
11520             "POP    EAX\n\t"
11521             "CMP    EAX,0x80000000\n\t"
11522             "JNE,s  fast\n\t"
11523             "FLD_D  $src\n\t"
11524             "CALL   d2i_wrapper\n"
11525       "fast:" %}
11526   ins_encode( Push_Reg_D(src), D2I_encoding(src) );
11527   ins_pipe( pipe_slow );
11528 %}
11529 
11530 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
11531 instruct convXD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regXD src, eFlagsReg cr ) %{
11532   predicate(UseSSE>=2);
11533   match(Set dst (ConvD2I src));
11534   effect( KILL tmp, KILL cr );
11535   format %{ "CVTTSD2SI $dst, $src\n\t"
11536             "CMP    $dst,0x80000000\n\t"
11537             "JNE,s  fast\n\t"
11538             "SUB    ESP, 8\n\t"
11539             "MOVSD  [ESP], $src\n\t"
11540             "FLD_D  [ESP]\n\t"
11541             "ADD    ESP, 8\n\t"
11542             "CALL   d2i_wrapper\n"
11543       "fast:" %}
11544   ins_encode %{
11545     Label fast;
11546     __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
11547     __ cmpl($dst$$Register, 0x80000000);
11548     __ jccb(Assembler::notEqual, fast);
11549     __ subptr(rsp, 8);
11550     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
11551     __ fld_d(Address(rsp, 0));
11552     __ addptr(rsp, 8);
11553     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper())));
11554     __ bind(fast);
11555   %}
11556   ins_pipe( pipe_slow );
11557 %}
11558 
11559 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
11560   predicate(UseSSE<=1);
11561   match(Set dst (ConvD2L src));
11562   effect( KILL cr );
11563   format %{ "FLD    $src\t# Convert double to long\n\t"
11564             "FLDCW  trunc mode\n\t"
11565             "SUB    ESP,8\n\t"
11566             "FISTp  [ESP + #0]\n\t"
11567             "FLDCW  std/24-bit mode\n\t"
11568             "POP    EAX\n\t"
11569             "POP    EDX\n\t"
11570             "CMP    EDX,0x80000000\n\t"
11571             "JNE,s  fast\n\t"
11572             "TEST   EAX,EAX\n\t"
11573             "JNE,s  fast\n\t"
11574             "FLD    $src\n\t"
11575             "CALL   d2l_wrapper\n"

11581 // XMM lacks a float/double->long conversion, so use the old FPU stack.
11582 instruct convXD2L_reg_reg( eADXRegL dst, regXD src, eFlagsReg cr ) %{
11583   predicate (UseSSE>=2);
11584   match(Set dst (ConvD2L src));
11585   effect( KILL cr );
11586   format %{ "SUB    ESP,8\t# Convert double to long\n\t"
11587             "MOVSD  [ESP],$src\n\t"
11588             "FLD_D  [ESP]\n\t"
11589             "FLDCW  trunc mode\n\t"
11590             "FISTp  [ESP + #0]\n\t"
11591             "FLDCW  std/24-bit mode\n\t"
11592             "POP    EAX\n\t"
11593             "POP    EDX\n\t"
11594             "CMP    EDX,0x80000000\n\t"
11595             "JNE,s  fast\n\t"
11596             "TEST   EAX,EAX\n\t"
11597             "JNE,s  fast\n\t"
11598             "SUB    ESP,8\n\t"
11599             "MOVSD  [ESP],$src\n\t"
11600             "FLD_D  [ESP]\n\t"
11601             "ADD    ESP,8\n\t"
11602             "CALL   d2l_wrapper\n"
11603       "fast:" %}
11604   ins_encode %{
11605     Label fast;
11606     __ subptr(rsp, 8);
11607     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
11608     __ fld_d(Address(rsp, 0));
11609     __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc()));
11610     __ fistp_d(Address(rsp, 0));
11611     // Restore the rounding mode, mask the exception
11612     if (Compile::current()->in_24_bit_fp_mode()) {
11613       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
11614     } else {
11615       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
11616     }
11617     // Load the converted long, adjust CPU stack
11618     __ pop(rax);
11619     __ pop(rdx);
11620     __ cmpl(rdx, 0x80000000);
11621     __ jccb(Assembler::notEqual, fast);
11622     __ testl(rax, rax);
11623     __ jccb(Assembler::notEqual, fast);
11624     __ subptr(rsp, 8);
11625     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
11626     __ fld_d(Address(rsp, 0));
11627     __ addptr(rsp, 8);
11628     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper())));
11629     __ bind(fast);
11630   %}
11631   ins_pipe( pipe_slow );
11632 %}
11633 
11634 // Convert a double to an int.  Java semantics require we do complex
11635 // manglations in the corner cases.  So we set the rounding mode to
11636 // 'zero', store the darned double down as an int, and reset the
11637 // rounding mode to 'nearest'.  The hardware stores a flag value down
11638 // if we would overflow or converted a NAN; we check for this and
11639 // and go the slow path if needed.
11640 instruct convF2I_reg_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
11641   predicate(UseSSE==0);
11642   match(Set dst (ConvF2I src));
11643   effect( KILL tmp, KILL cr );
11644   format %{ "FLD    $src\t# Convert float to int \n\t"
11645             "FLDCW  trunc mode\n\t"
11646             "SUB    ESP,4\n\t"
11647             "FISTp  [ESP + #0]\n\t"
11648             "FLDCW  std/24-bit mode\n\t"
11649             "POP    EAX\n\t"
11650             "CMP    EAX,0x80000000\n\t"

11654       "fast:" %}
11655   // D2I_encoding works for F2I
11656   ins_encode( Push_Reg_F(src), D2I_encoding(src) );
11657   ins_pipe( pipe_slow );
11658 %}
11659 
11660 // Convert a float in xmm to an int reg.
11661 instruct convX2I_reg(eAXRegI dst, eDXRegI tmp, regX src, eFlagsReg cr ) %{
11662   predicate(UseSSE>=1);
11663   match(Set dst (ConvF2I src));
11664   effect( KILL tmp, KILL cr );
11665   format %{ "CVTTSS2SI $dst, $src\n\t"
11666             "CMP    $dst,0x80000000\n\t"
11667             "JNE,s  fast\n\t"
11668             "SUB    ESP, 4\n\t"
11669             "MOVSS  [ESP], $src\n\t"
11670             "FLD    [ESP]\n\t"
11671             "ADD    ESP, 4\n\t"
11672             "CALL   d2i_wrapper\n"
11673       "fast:" %}
11674   ins_encode %{
11675     Label fast;
11676     __ cvttss2sil($dst$$Register, $src$$XMMRegister);
11677     __ cmpl($dst$$Register, 0x80000000);
11678     __ jccb(Assembler::notEqual, fast);
11679     __ subptr(rsp, 4);
11680     __ movflt(Address(rsp, 0), $src$$XMMRegister);
11681     __ fld_s(Address(rsp, 0));
11682     __ addptr(rsp, 4);
11683     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper())));
11684     __ bind(fast);
11685   %}
11686   ins_pipe( pipe_slow );
11687 %}
11688 
11689 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
11690   predicate(UseSSE==0);
11691   match(Set dst (ConvF2L src));
11692   effect( KILL cr );
11693   format %{ "FLD    $src\t# Convert float to long\n\t"
11694             "FLDCW  trunc mode\n\t"
11695             "SUB    ESP,8\n\t"
11696             "FISTp  [ESP + #0]\n\t"
11697             "FLDCW  std/24-bit mode\n\t"
11698             "POP    EAX\n\t"
11699             "POP    EDX\n\t"
11700             "CMP    EDX,0x80000000\n\t"
11701             "JNE,s  fast\n\t"
11702             "TEST   EAX,EAX\n\t"
11703             "JNE,s  fast\n\t"
11704             "FLD    $src\n\t"
11705             "CALL   d2l_wrapper\n"

11715   match(Set dst (ConvF2L src));
11716   effect( KILL cr );
11717   format %{ "SUB    ESP,8\t# Convert float to long\n\t"
11718             "MOVSS  [ESP],$src\n\t"
11719             "FLD_S  [ESP]\n\t"
11720             "FLDCW  trunc mode\n\t"
11721             "FISTp  [ESP + #0]\n\t"
11722             "FLDCW  std/24-bit mode\n\t"
11723             "POP    EAX\n\t"
11724             "POP    EDX\n\t"
11725             "CMP    EDX,0x80000000\n\t"
11726             "JNE,s  fast\n\t"
11727             "TEST   EAX,EAX\n\t"
11728             "JNE,s  fast\n\t"
11729             "SUB    ESP,4\t# Convert float to long\n\t"
11730             "MOVSS  [ESP],$src\n\t"
11731             "FLD_S  [ESP]\n\t"
11732             "ADD    ESP,4\n\t"
11733             "CALL   d2l_wrapper\n"
11734       "fast:" %}
11735   ins_encode %{
11736     Label fast;
11737     __ subptr(rsp, 8);
11738     __ movflt(Address(rsp, 0), $src$$XMMRegister);
11739     __ fld_s(Address(rsp, 0));
11740     __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc()));
11741     __ fistp_d(Address(rsp, 0));
11742     // Restore the rounding mode, mask the exception
11743     if (Compile::current()->in_24_bit_fp_mode()) {
11744       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
11745     } else {
11746       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
11747     }
11748     // Load the converted long, adjust CPU stack
11749     __ pop(rax);
11750     __ pop(rdx);
11751     __ cmpl(rdx, 0x80000000);
11752     __ jccb(Assembler::notEqual, fast);
11753     __ testl(rax, rax);
11754     __ jccb(Assembler::notEqual, fast);
11755     __ subptr(rsp, 4);
11756     __ movflt(Address(rsp, 0), $src$$XMMRegister);
11757     __ fld_s(Address(rsp, 0));
11758     __ addptr(rsp, 4);
11759     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper())));
11760     __ bind(fast);
11761   %}
11762   ins_pipe( pipe_slow );
11763 %}
11764 
11765 instruct convI2D_reg(regD dst, stackSlotI src) %{
11766   predicate( UseSSE<=1 );
11767   match(Set dst (ConvI2D src));
11768   format %{ "FILD   $src\n\t"
11769             "FSTP   $dst" %}
11770   opcode(0xDB, 0x0);  /* DB /0 */
11771   ins_encode(Push_Mem_I(src), Pop_Reg_D(dst));
11772   ins_pipe( fpu_reg_mem );
11773 %}
11774 
11775 instruct convI2XD_reg(regXD dst, eRegI src) %{
11776   predicate( UseSSE>=2 && !UseXmmI2D );
11777   match(Set dst (ConvI2D src));
11778   format %{ "CVTSI2SD $dst,$src" %}
11779   ins_encode %{
11780     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
11781   %}
11782   ins_pipe( pipe_slow );
11783 %}
11784 
11785 instruct convI2XD_mem(regXD dst, memory mem) %{
11786   predicate( UseSSE>=2 );
11787   match(Set dst (ConvI2D (LoadI mem)));
11788   format %{ "CVTSI2SD $dst,$mem" %}
11789   ins_encode %{
11790     __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address);
11791   %}
11792   ins_pipe( pipe_slow );
11793 %}
11794 
11795 instruct convXI2XD_reg(regXD dst, eRegI src)
11796 %{
11797   predicate( UseSSE>=2 && UseXmmI2D );
11798   match(Set dst (ConvI2D src));
11799 
11800   format %{ "MOVD  $dst,$src\n\t"
11801             "CVTDQ2PD $dst,$dst\t# i2d" %}
11802   ins_encode %{
11803     __ movdl($dst$$XMMRegister, $src$$Register);
11804     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
11805   %}
11806   ins_pipe(pipe_slow); // XXX
11807 %}
11808 
11809 instruct convI2D_mem(regD dst, memory mem) %{
11810   predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
11811   match(Set dst (ConvI2D (LoadI mem)));

11867   ins_pipe( fpu_reg_mem );
11868 %}
11869 
11870 // This instruction does not round to 24-bits
11871 instruct convI2F_mem(regF dst, memory mem) %{
11872   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11873   match(Set dst (ConvI2F (LoadI mem)));
11874   format %{ "FILD   $mem\n\t"
11875             "FSTP   $dst" %}
11876   opcode(0xDB);      /* DB /0 */
11877   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11878               Pop_Reg_F(dst));
11879   ins_pipe( fpu_reg_mem );
11880 %}
11881 
11882 // Convert an int to a float in xmm; no rounding step needed.
11883 instruct convI2X_reg(regX dst, eRegI src) %{
11884   predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F );
11885   match(Set dst (ConvI2F src));
11886   format %{ "CVTSI2SS $dst, $src" %}
11887   ins_encode %{
11888     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
11889   %}
11890   ins_pipe( pipe_slow );
11891 %}
11892 
11893  instruct convXI2X_reg(regX dst, eRegI src)
11894 %{
11895   predicate( UseSSE>=2 && UseXmmI2F );
11896   match(Set dst (ConvI2F src));
11897 
11898   format %{ "MOVD  $dst,$src\n\t"
11899             "CVTDQ2PS $dst,$dst\t# i2f" %}
11900   ins_encode %{
11901     __ movdl($dst$$XMMRegister, $src$$Register);
11902     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11903   %}
11904   ins_pipe(pipe_slow); // XXX
11905 %}
11906 
11907 instruct convI2L_reg( eRegL dst, eRegI src, eFlagsReg cr) %{
11908   match(Set dst (ConvI2L src));
11909   effect(KILL cr);

11993             "FSTP_S $dst\t# F-round" %}
11994   opcode(0xDF, 0x5);  /* DF /5 */
11995   ins_encode(convert_long_double(src), Pop_Mem_F(dst));
11996   ins_pipe( pipe_slow );
11997 %}
11998 
11999 instruct convL2I_reg( eRegI dst, eRegL src ) %{
12000   match(Set dst (ConvL2I src));
12001   effect( DEF dst, USE src );
12002   format %{ "MOV    $dst,$src.lo" %}
12003   ins_encode(enc_CopyL_Lo(dst,src));
12004   ins_pipe( ialu_reg_reg );
12005 %}
12006 
12007 
12008 instruct MoveF2I_stack_reg(eRegI dst, stackSlotF src) %{
12009   match(Set dst (MoveF2I src));
12010   effect( DEF dst, USE src );
12011   ins_cost(100);
12012   format %{ "MOV    $dst,$src\t# MoveF2I_stack_reg" %}
12013   ins_encode %{
12014     __ movl($dst$$Register, Address(rsp, $src$$disp));
12015   %}
12016   ins_pipe( ialu_reg_mem );
12017 %}
12018 
12019 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
12020   predicate(UseSSE==0);
12021   match(Set dst (MoveF2I src));
12022   effect( DEF dst, USE src );
12023 
12024   ins_cost(125);
12025   format %{ "FST_S  $dst,$src\t# MoveF2I_reg_stack" %}
12026   ins_encode( Pop_Mem_Reg_F(dst, src) );
12027   ins_pipe( fpu_mem_reg );
12028 %}
12029 
12030 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regX src) %{
12031   predicate(UseSSE>=1);
12032   match(Set dst (MoveF2I src));
12033   effect( DEF dst, USE src );
12034 
12035   ins_cost(95);
12036   format %{ "MOVSS  $dst,$src\t# MoveF2I_reg_stack_sse" %}
12037   ins_encode %{
12038     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
12039   %}
12040   ins_pipe( pipe_slow );
12041 %}
12042 
12043 instruct MoveF2I_reg_reg_sse(eRegI dst, regX src) %{
12044   predicate(UseSSE>=2);
12045   match(Set dst (MoveF2I src));
12046   effect( DEF dst, USE src );
12047   ins_cost(85);
12048   format %{ "MOVD   $dst,$src\t# MoveF2I_reg_reg_sse" %}
12049   ins_encode %{
12050     __ movdl($dst$$Register, $src$$XMMRegister);
12051   %}
12052   ins_pipe( pipe_slow );
12053 %}
12054 
12055 instruct MoveI2F_reg_stack(stackSlotF dst, eRegI src) %{
12056   match(Set dst (MoveI2F src));
12057   effect( DEF dst, USE src );
12058 
12059   ins_cost(100);
12060   format %{ "MOV    $dst,$src\t# MoveI2F_reg_stack" %}
12061   ins_encode %{
12062     __ movl(Address(rsp, $dst$$disp), $src$$Register);
12063   %}
12064   ins_pipe( ialu_mem_reg );
12065 %}
12066 
12067 
12068 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
12069   predicate(UseSSE==0);
12070   match(Set dst (MoveI2F src));
12071   effect(DEF dst, USE src);
12072 
12073   ins_cost(125);
12074   format %{ "FLD_S  $src\n\t"
12075             "FSTP   $dst\t# MoveI2F_stack_reg" %}
12076   opcode(0xD9);               /* D9 /0, FLD m32real */
12077   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
12078               Pop_Reg_F(dst) );
12079   ins_pipe( fpu_reg_mem );
12080 %}
12081 
12082 instruct MoveI2F_stack_reg_sse(regX dst, stackSlotI src) %{
12083   predicate(UseSSE>=1);
12084   match(Set dst (MoveI2F src));
12085   effect( DEF dst, USE src );
12086 
12087   ins_cost(95);
12088   format %{ "MOVSS  $dst,$src\t# MoveI2F_stack_reg_sse" %}
12089   ins_encode %{
12090     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
12091   %}
12092   ins_pipe( pipe_slow );
12093 %}
12094 
12095 instruct MoveI2F_reg_reg_sse(regX dst, eRegI src) %{
12096   predicate(UseSSE>=2);
12097   match(Set dst (MoveI2F src));
12098   effect( DEF dst, USE src );
12099 
12100   ins_cost(85);
12101   format %{ "MOVD   $dst,$src\t# MoveI2F_reg_reg_sse" %}
12102   ins_encode %{
12103     __ movdl($dst$$XMMRegister, $src$$Register);
12104   %}
12105   ins_pipe( pipe_slow );
12106 %}
12107 
12108 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{
12109   match(Set dst (MoveD2L src));
12110   effect(DEF dst, USE src);
12111 
12112   ins_cost(250);
12113   format %{ "MOV    $dst.lo,$src\n\t"
12114             "MOV    $dst.hi,$src+4\t# MoveD2L_stack_reg" %}
12115   opcode(0x8B, 0x8B);
12116   ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src));
12117   ins_pipe( ialu_mem_long_reg );
12118 %}
12119 
12120 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
12121   predicate(UseSSE<=1);
12122   match(Set dst (MoveD2L src));
12123   effect(DEF dst, USE src);
12124 
12125   ins_cost(125);
12126   format %{ "FST_D  $dst,$src\t# MoveD2L_reg_stack" %}
12127   ins_encode( Pop_Mem_Reg_D(dst, src) );
12128   ins_pipe( fpu_mem_reg );
12129 %}
12130 
12131 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regXD src) %{
12132   predicate(UseSSE>=2);
12133   match(Set dst (MoveD2L src));
12134   effect(DEF dst, USE src);
12135   ins_cost(95);

12136   format %{ "MOVSD  $dst,$src\t# MoveD2L_reg_stack_sse" %}
12137   ins_encode %{
12138     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
12139   %}
12140   ins_pipe( pipe_slow );
12141 %}
12142 
12143 instruct MoveD2L_reg_reg_sse(eRegL dst, regXD src, regXD tmp) %{
12144   predicate(UseSSE>=2);
12145   match(Set dst (MoveD2L src));
12146   effect(DEF dst, USE src, TEMP tmp);
12147   ins_cost(85);
12148   format %{ "MOVD   $dst.lo,$src\n\t"
12149             "PSHUFLW $tmp,$src,0x4E\n\t"
12150             "MOVD   $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %}
12151   ins_encode %{
12152     __ movdl($dst$$Register, $src$$XMMRegister);
12153     __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e);
12154     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
12155   %}
12156   ins_pipe( pipe_slow );
12157 %}
12158 
12159 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{
12160   match(Set dst (MoveL2D src));
12161   effect(DEF dst, USE src);
12162 
12163   ins_cost(200);
12164   format %{ "MOV    $dst,$src.lo\n\t"
12165             "MOV    $dst+4,$src.hi\t# MoveL2D_reg_stack" %}
12166   opcode(0x89, 0x89);
12167   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
12168   ins_pipe( ialu_mem_long_reg );
12169 %}
12170 
12171 
12172 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
12173   predicate(UseSSE<=1);
12174   match(Set dst (MoveL2D src));
12175   effect(DEF dst, USE src);
12176   ins_cost(125);
12177 
12178   format %{ "FLD_D  $src\n\t"
12179             "FSTP   $dst\t# MoveL2D_stack_reg" %}
12180   opcode(0xDD);               /* DD /0, FLD m64real */
12181   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
12182               Pop_Reg_D(dst) );
12183   ins_pipe( fpu_reg_mem );
12184 %}
12185 
12186 
12187 instruct MoveL2D_stack_reg_sse(regXD dst, stackSlotL src) %{
12188   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
12189   match(Set dst (MoveL2D src));
12190   effect(DEF dst, USE src);
12191 
12192   ins_cost(95);
12193   format %{ "MOVSD  $dst,$src\t# MoveL2D_stack_reg_sse" %}
12194   ins_encode %{
12195     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
12196   %}
12197   ins_pipe( pipe_slow );
12198 %}
12199 
12200 instruct MoveL2D_stack_reg_sse_partial(regXD dst, stackSlotL src) %{
12201   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
12202   match(Set dst (MoveL2D src));
12203   effect(DEF dst, USE src);
12204 
12205   ins_cost(95);
12206   format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %}
12207   ins_encode %{
12208     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
12209   %}
12210   ins_pipe( pipe_slow );
12211 %}
12212 
12213 instruct MoveL2D_reg_reg_sse(regXD dst, eRegL src, regXD tmp) %{
12214   predicate(UseSSE>=2);
12215   match(Set dst (MoveL2D src));
12216   effect(TEMP dst, USE src, TEMP tmp);
12217   ins_cost(85);
12218   format %{ "MOVD   $dst,$src.lo\n\t"
12219             "MOVD   $tmp,$src.hi\n\t"
12220             "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %}
12221   ins_encode %{
12222     __ movdl($dst$$XMMRegister, $src$$Register);
12223     __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
12224     __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
12225   %}
12226   ins_pipe( pipe_slow );
12227 %}
12228 
12229 // Replicate scalar to packed byte (1 byte) values in xmm
12230 instruct Repl8B_reg(regXD dst, regXD src) %{
12231   predicate(UseSSE>=2);
12232   match(Set dst (Replicate8B src));
12233   format %{ "MOVDQA  $dst,$src\n\t"
12234             "PUNPCKLBW $dst,$dst\n\t"
12235             "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
12236   ins_encode %{
12237     if ($dst$$reg != $src$$reg) {
12238       __ movdqa($dst$$XMMRegister, $src$$XMMRegister);
12239     }
12240     __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
12241     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
12242   %}
12243   ins_pipe( pipe_slow );
12244 %}
12245 
12246 // Replicate scalar to packed byte (1 byte) values in xmm
12247 instruct Repl8B_eRegI(regXD dst, eRegI src) %{
12248   predicate(UseSSE>=2);
12249   match(Set dst (Replicate8B src));
12250   format %{ "MOVD    $dst,$src\n\t"
12251             "PUNPCKLBW $dst,$dst\n\t"
12252             "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
12253   ins_encode %{
12254     __ movdl($dst$$XMMRegister, $src$$Register);
12255     __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
12256     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
12257   %}
12258   ins_pipe( pipe_slow );
12259 %}
12260 
12261 // Replicate scalar zero to packed byte (1 byte) values in xmm
12262 instruct Repl8B_immI0(regXD dst, immI0 zero) %{
12263   predicate(UseSSE>=2);
12264   match(Set dst (Replicate8B zero));
12265   format %{ "PXOR  $dst,$dst\t! replicate8B" %}
12266   ins_encode %{
12267     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
12268   %}
12269   ins_pipe( fpu_reg_reg );
12270 %}
12271 
12272 // Replicate scalar to packed shore (2 byte) values in xmm
12273 instruct Repl4S_reg(regXD dst, regXD src) %{
12274   predicate(UseSSE>=2);
12275   match(Set dst (Replicate4S src));
12276   format %{ "PSHUFLW $dst,$src,0x00\t! replicate4S" %}
12277   ins_encode %{
12278     __ pshuflw($dst$$XMMRegister, $src$$XMMRegister, 0x00);
12279   %}
12280   ins_pipe( fpu_reg_reg );
12281 %}
12282 
12283 // Replicate scalar to packed shore (2 byte) values in xmm
12284 instruct Repl4S_eRegI(regXD dst, eRegI src) %{
12285   predicate(UseSSE>=2);
12286   match(Set dst (Replicate4S src));
12287   format %{ "MOVD    $dst,$src\n\t"
12288             "PSHUFLW $dst,$dst,0x00\t! replicate4S" %}
12289   ins_encode %{
12290     __ movdl($dst$$XMMRegister, $src$$Register);
12291     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
12292   %}
12293   ins_pipe( fpu_reg_reg );
12294 %}
12295 
12296 // Replicate scalar zero to packed short (2 byte) values in xmm
12297 instruct Repl4S_immI0(regXD dst, immI0 zero) %{
12298   predicate(UseSSE>=2);
12299   match(Set dst (Replicate4S zero));
12300   format %{ "PXOR  $dst,$dst\t! replicate4S" %}
12301   ins_encode %{
12302     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
12303   %}
12304   ins_pipe( fpu_reg_reg );
12305 %}
12306 
12307 // Replicate scalar to packed char (2 byte) values in xmm
12308 instruct Repl4C_reg(regXD dst, regXD src) %{
12309   predicate(UseSSE>=2);
12310   match(Set dst (Replicate4C src));
12311   format %{ "PSHUFLW $dst,$src,0x00\t! replicate4C" %}
12312   ins_encode %{
12313     __ pshuflw($dst$$XMMRegister, $src$$XMMRegister, 0x00);
12314   %}
12315   ins_pipe( fpu_reg_reg );
12316 %}
12317 
12318 // Replicate scalar to packed char (2 byte) values in xmm
12319 instruct Repl4C_eRegI(regXD dst, eRegI src) %{
12320   predicate(UseSSE>=2);
12321   match(Set dst (Replicate4C src));
12322   format %{ "MOVD    $dst,$src\n\t"
12323             "PSHUFLW $dst,$dst,0x00\t! replicate4C" %}
12324   ins_encode %{
12325     __ movdl($dst$$XMMRegister, $src$$Register);
12326     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
12327   %}
12328   ins_pipe( fpu_reg_reg );
12329 %}
12330 
12331 // Replicate scalar zero to packed char (2 byte) values in xmm
12332 instruct Repl4C_immI0(regXD dst, immI0 zero) %{
12333   predicate(UseSSE>=2);
12334   match(Set dst (Replicate4C zero));
12335   format %{ "PXOR  $dst,$dst\t! replicate4C" %}
12336   ins_encode %{
12337     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
12338   %}
12339   ins_pipe( fpu_reg_reg );
12340 %}
12341 
12342 // Replicate scalar to packed integer (4 byte) values in xmm
12343 instruct Repl2I_reg(regXD dst, regXD src) %{
12344   predicate(UseSSE>=2);
12345   match(Set dst (Replicate2I src));
12346   format %{ "PSHUFD $dst,$src,0x00\t! replicate2I" %}
12347   ins_encode %{
12348     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
12349   %}
12350   ins_pipe( fpu_reg_reg );
12351 %}
12352 
12353 // Replicate scalar to packed integer (4 byte) values in xmm
12354 instruct Repl2I_eRegI(regXD dst, eRegI src) %{
12355   predicate(UseSSE>=2);
12356   match(Set dst (Replicate2I src));
12357   format %{ "MOVD   $dst,$src\n\t"
12358             "PSHUFD $dst,$dst,0x00\t! replicate2I" %}
12359   ins_encode %{
12360     __ movdl($dst$$XMMRegister, $src$$Register);
12361     __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
12362   %}
12363   ins_pipe( fpu_reg_reg );
12364 %}
12365 
12366 // Replicate scalar zero to packed integer (2 byte) values in xmm
12367 instruct Repl2I_immI0(regXD dst, immI0 zero) %{
12368   predicate(UseSSE>=2);
12369   match(Set dst (Replicate2I zero));
12370   format %{ "PXOR  $dst,$dst\t! replicate2I" %}
12371   ins_encode %{
12372     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
12373   %}
12374   ins_pipe( fpu_reg_reg );
12375 %}
12376 
12377 // Replicate scalar to packed single precision floating point values in xmm
12378 instruct Repl2F_reg(regXD dst, regXD src) %{
12379   predicate(UseSSE>=2);
12380   match(Set dst (Replicate2F src));
12381   format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
12382   ins_encode %{
12383     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0xe0);
12384   %}
12385   ins_pipe( fpu_reg_reg );
12386 %}
12387 
12388 // Replicate scalar to packed single precision floating point values in xmm
12389 instruct Repl2F_regX(regXD dst, regX src) %{
12390   predicate(UseSSE>=2);
12391   match(Set dst (Replicate2F src));
12392   format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
12393   ins_encode %{
12394     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0xe0);
12395   %}
12396   ins_pipe( fpu_reg_reg );
12397 %}
12398 
12399 // Replicate scalar to packed single precision floating point values in xmm
12400 instruct Repl2F_immXF0(regXD dst, immXF0 zero) %{
12401   predicate(UseSSE>=2);
12402   match(Set dst (Replicate2F zero));
12403   format %{ "PXOR  $dst,$dst\t! replicate2F" %}
12404   ins_encode %{
12405     __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
12406   %}
12407   ins_pipe( fpu_reg_reg );
12408 %}
12409 
12410 // =======================================================================
12411 // fast clearing of an array
12412 instruct rep_stos(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
12413   match(Set dummy (ClearArray cnt base));
12414   effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
12415   format %{ "SHL    ECX,1\t# Convert doublewords to words\n\t"
12416             "XOR    EAX,EAX\n\t"
12417             "REP STOS\t# store EAX into [EDI++] while ECX--" %}
12418   opcode(0,0x4);
12419   ins_encode( Opcode(0xD1), RegOpc(ECX),
12420               OpcRegReg(0x33,EAX,EAX),
12421               Opcode(0xF3), Opcode(0xAB) );
12422   ins_pipe( pipe_slow );
12423 %}
12424 
12425 instruct string_compare(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
12426                         eAXRegI result, regXD tmp1, eFlagsReg cr) %{

src/cpu/x86/vm/x86_32.ad
Index Unified diffs Context diffs Sdiffs Wdiffs Patch New Old Previous File Next File