264 return operand;
265 }
266
267 // Buffer for 128-bits masks used by SSE instructions.
268 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment)
269
270 // Static initialization during VM startup.
271 static jlong *float_signmask_pool = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF));
272 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF));
273 static jlong *float_signflip_pool = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000));
274 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000));
275
276 // Offset hacking within calls.
277 static int pre_call_FPU_size() {
278 if (Compile::current()->in_24_bit_fp_mode())
279 return 6; // fldcw
280 return 0;
281 }
282
283 static int preserve_SP_size() {
284 return LP64_ONLY(1 +) 2; // [rex,] op, rm(reg/reg)
285 }
286
287 // !!!!! Special hack to get all type of calls to specify the byte offset
288 // from the start of the call to the point where the return address
289 // will point.
290 int MachCallStaticJavaNode::ret_addr_offset() {
291 int offset = 5 + pre_call_FPU_size(); // 5 bytes from start of call to where return address points
292 if (_method_handle_invoke)
293 offset += preserve_SP_size();
294 return offset;
295 }
296
297 int MachCallDynamicJavaNode::ret_addr_offset() {
298 return 10 + pre_call_FPU_size(); // 10 bytes from start of call to where return address points
299 }
300
301 static int sizeof_FFree_Float_Stack_All = -1;
302
303 int MachCallRuntimeNode::ret_addr_offset() {
304 assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already");
478 if ( displace_is_oop ) {
479 emit_d32_reloc(cbuf, displace, relocInfo::oop_type, 1);
480 } else {
481 emit_d32 (cbuf, displace);
482 }
483 }
484 }
485 }
486 }
487
488
489 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
490 if( dst_encoding == src_encoding ) {
491 // reg-reg copy, use an empty encoding
492 } else {
493 emit_opcode( cbuf, 0x8B );
494 emit_rm(cbuf, 0x3, dst_encoding, src_encoding );
495 }
496 }
497
498 void encode_CopyXD( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
499 if( dst_encoding == src_encoding ) {
500 // reg-reg copy, use an empty encoding
501 } else {
502 MacroAssembler _masm(&cbuf);
503
504 __ movdqa(as_XMMRegister(dst_encoding), as_XMMRegister(src_encoding));
505 }
506 }
507
508
509 //=============================================================================
510 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
511
512 int Compile::ConstantTable::calculate_table_base_offset() const {
513 return 0; // absolute addressing, no offset
514 }
515
516 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
517 // Empty encoding
518 }
519
520 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
521 return 0;
522 }
523
524 #ifndef PRODUCT
525 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
775 emit_opcode (*cbuf, opcode );
776 encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, false);
777 #ifndef PRODUCT
778 } else if( !do_size ) {
779 if( size != 0 ) st->print("\n\t");
780 if( opcode == 0x8B || opcode == 0x89 ) { // MOV
781 if( is_load ) st->print("%s %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset);
782 else st->print("%s [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]);
783 } else { // FLD, FST, PUSH, POP
784 st->print("%s [ESP + #%d]",op_str,offset);
785 }
786 #endif
787 }
788 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
789 return size+3+offset_size;
790 }
791
792 // Helper for XMM registers. Extra opcode bits, limited syntax.
793 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load,
794 int offset, int reg_lo, int reg_hi, int size, outputStream* st ) {
795 if( cbuf ) {
796 if( reg_lo+1 == reg_hi ) { // double move?
797 if( is_load && !UseXmmLoadAndClearUpper )
798 emit_opcode(*cbuf, 0x66 ); // use 'movlpd' for load
799 else
800 emit_opcode(*cbuf, 0xF2 ); // use 'movsd' otherwise
801 } else {
802 emit_opcode(*cbuf, 0xF3 );
803 }
804 emit_opcode(*cbuf, 0x0F );
805 if( reg_lo+1 == reg_hi && is_load && !UseXmmLoadAndClearUpper )
806 emit_opcode(*cbuf, 0x12 ); // use 'movlpd' for load
807 else
808 emit_opcode(*cbuf, is_load ? 0x10 : 0x11 );
809 encode_RegMem(*cbuf, Matcher::_regEncode[reg_lo], ESP_enc, 0x4, 0, offset, false);
810 #ifndef PRODUCT
811 } else if( !do_size ) {
812 if( size != 0 ) st->print("\n\t");
813 if( reg_lo+1 == reg_hi ) { // double move?
814 if( is_load ) st->print("%s %s,[ESP + #%d]",
815 UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD",
816 Matcher::regName[reg_lo], offset);
817 else st->print("MOVSD [ESP + #%d],%s",
818 offset, Matcher::regName[reg_lo]);
819 } else {
820 if( is_load ) st->print("MOVSS %s,[ESP + #%d]",
821 Matcher::regName[reg_lo], offset);
822 else st->print("MOVSS [ESP + #%d],%s",
823 offset, Matcher::regName[reg_lo]);
824 }
825 #endif
826 }
827 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
828 return size+5+offset_size;
829 }
830
831
832 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
833 int src_hi, int dst_hi, int size, outputStream* st ) {
834 if( UseXmmRegToRegMoveAll ) {//Use movaps,movapd to move between xmm registers
835 if( cbuf ) {
836 if( (src_lo+1 == src_hi && dst_lo+1 == dst_hi) ) {
837 emit_opcode(*cbuf, 0x66 );
838 }
839 emit_opcode(*cbuf, 0x0F );
840 emit_opcode(*cbuf, 0x28 );
841 emit_rm (*cbuf, 0x3, Matcher::_regEncode[dst_lo], Matcher::_regEncode[src_lo] );
842 #ifndef PRODUCT
843 } else if( !do_size ) {
844 if( size != 0 ) st->print("\n\t");
845 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
846 st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
847 } else {
848 st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
849 }
850 #endif
851 }
852 return size + ((src_lo+1 == src_hi && dst_lo+1 == dst_hi) ? 4 : 3);
853 } else {
854 if( cbuf ) {
855 emit_opcode(*cbuf, (src_lo+1 == src_hi && dst_lo+1 == dst_hi) ? 0xF2 : 0xF3 );
856 emit_opcode(*cbuf, 0x0F );
857 emit_opcode(*cbuf, 0x10 );
858 emit_rm (*cbuf, 0x3, Matcher::_regEncode[dst_lo], Matcher::_regEncode[src_lo] );
859 #ifndef PRODUCT
860 } else if( !do_size ) {
861 if( size != 0 ) st->print("\n\t");
862 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
863 st->print("MOVSD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
864 } else {
865 st->print("MOVSS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
866 }
867 #endif
868 }
869 return size+4;
870 }
871 }
872
873 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
874 int src_hi, int dst_hi, int size, outputStream* st ) {
875 // 32-bit
876 if (cbuf) {
877 emit_opcode(*cbuf, 0x66);
878 emit_opcode(*cbuf, 0x0F);
879 emit_opcode(*cbuf, 0x6E);
880 emit_rm(*cbuf, 0x3, Matcher::_regEncode[dst_lo] & 7, Matcher::_regEncode[src_lo] & 7);
881 #ifndef PRODUCT
882 } else if (!do_size) {
883 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
884 #endif
885 }
886 return 4;
887 }
888
889
890 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
891 int src_hi, int dst_hi, int size, outputStream* st ) {
892 // 32-bit
893 if (cbuf) {
894 emit_opcode(*cbuf, 0x66);
895 emit_opcode(*cbuf, 0x0F);
896 emit_opcode(*cbuf, 0x7E);
897 emit_rm(*cbuf, 0x3, Matcher::_regEncode[src_lo] & 7, Matcher::_regEncode[dst_lo] & 7);
898 #ifndef PRODUCT
899 } else if (!do_size) {
900 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
901 #endif
902 }
903 return 4;
904 }
905
906 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) {
907 if( cbuf ) {
908 emit_opcode(*cbuf, 0x8B );
909 emit_rm (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] );
910 #ifndef PRODUCT
911 } else if( !do_size ) {
912 if( size != 0 ) st->print("\n\t");
913 st->print("MOV %s,%s",Matcher::regName[dst],Matcher::regName[src]);
914 #endif
915 }
916 return size+2;
917 }
1914 // CALL to fixup routine. Fixup routine uses ScopeDesc info to determine
1915 // who we intended to call.
1916 cbuf.set_insts_mark();
1917 $$$emit8$primary;
1918 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1919 virtual_call_Relocation::spec(virtual_call_oop_addr), RELOC_IMM32 );
1920 %}
1921
1922 enc_class Java_Compiled_Call (method meth) %{ // JAVA COMPILED CALL
1923 int disp = in_bytes(methodOopDesc::from_compiled_offset());
1924 assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small");
1925
1926 // CALL *[EAX+in_bytes(methodOopDesc::from_compiled_code_entry_point_offset())]
1927 cbuf.set_insts_mark();
1928 $$$emit8$primary;
1929 emit_rm(cbuf, 0x01, $secondary, EAX_enc ); // R/M byte
1930 emit_d8(cbuf, disp); // Displacement
1931
1932 %}
1933
1934 enc_class Xor_Reg (eRegI dst) %{
1935 emit_opcode(cbuf, 0x33);
1936 emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg);
1937 %}
1938
1939 // Following encoding is no longer used, but may be restored if calling
1940 // convention changes significantly.
1941 // Became: Xor_Reg(EBP), Java_To_Runtime( labl )
1942 //
1943 // enc_class Java_Interpreter_Call (label labl) %{ // JAVA INTERPRETER CALL
1944 // // int ic_reg = Matcher::inline_cache_reg();
1945 // // int ic_encode = Matcher::_regEncode[ic_reg];
1946 // // int imo_reg = Matcher::interpreter_method_oop_reg();
1947 // // int imo_encode = Matcher::_regEncode[imo_reg];
1948 //
1949 // // // Interpreter expects method_oop in EBX, currently a callee-saved register,
1950 // // // so we load it immediately before the call
1951 // // emit_opcode(cbuf, 0x8B); // MOV imo_reg,ic_reg # method_oop
1952 // // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte
1953 //
1954 // // xor rbp,ebp
1955 // emit_opcode(cbuf, 0x33);
1956 // emit_rm(cbuf, 0x3, EBP_enc, EBP_enc);
1957 //
1958 // // CALL to interpreter.
1996 emit_d32(cbuf, src_con);
1997 }
1998 %}
1999
2000 enc_class LdImmL_Hi( eRegL dst, immL src) %{ // Load Immediate
2001 // Load immediate does not have a zero or sign extended version
2002 // for 8-bit immediates
2003 int dst_enc = $dst$$reg + 2;
2004 int src_con = ((julong)($src$$constant)) >> 32;
2005 if (src_con == 0) {
2006 // xor dst, dst
2007 emit_opcode(cbuf, 0x33);
2008 emit_rm(cbuf, 0x3, dst_enc, dst_enc);
2009 } else {
2010 emit_opcode(cbuf, $primary + dst_enc);
2011 emit_d32(cbuf, src_con);
2012 }
2013 %}
2014
2015
2016 enc_class MovI2X_reg(regX dst, eRegI src) %{
2017 emit_opcode(cbuf, 0x66 ); // MOVD dst,src
2018 emit_opcode(cbuf, 0x0F );
2019 emit_opcode(cbuf, 0x6E );
2020 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2021 %}
2022
2023 enc_class MovX2I_reg(eRegI dst, regX src) %{
2024 emit_opcode(cbuf, 0x66 ); // MOVD dst,src
2025 emit_opcode(cbuf, 0x0F );
2026 emit_opcode(cbuf, 0x7E );
2027 emit_rm(cbuf, 0x3, $src$$reg, $dst$$reg);
2028 %}
2029
2030 enc_class MovL2XD_reg(regXD dst, eRegL src, regXD tmp) %{
2031 { // MOVD $dst,$src.lo
2032 emit_opcode(cbuf,0x66);
2033 emit_opcode(cbuf,0x0F);
2034 emit_opcode(cbuf,0x6E);
2035 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2036 }
2037 { // MOVD $tmp,$src.hi
2038 emit_opcode(cbuf,0x66);
2039 emit_opcode(cbuf,0x0F);
2040 emit_opcode(cbuf,0x6E);
2041 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg));
2042 }
2043 { // PUNPCKLDQ $dst,$tmp
2044 emit_opcode(cbuf,0x66);
2045 emit_opcode(cbuf,0x0F);
2046 emit_opcode(cbuf,0x62);
2047 emit_rm(cbuf, 0x3, $dst$$reg, $tmp$$reg);
2048 }
2049 %}
2050
2051 enc_class MovXD2L_reg(eRegL dst, regXD src, regXD tmp) %{
2052 { // MOVD $dst.lo,$src
2053 emit_opcode(cbuf,0x66);
2054 emit_opcode(cbuf,0x0F);
2055 emit_opcode(cbuf,0x7E);
2056 emit_rm(cbuf, 0x3, $src$$reg, $dst$$reg);
2057 }
2058 { // PSHUFLW $tmp,$src,0x4E (01001110b)
2059 emit_opcode(cbuf,0xF2);
2060 emit_opcode(cbuf,0x0F);
2061 emit_opcode(cbuf,0x70);
2062 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
2063 emit_d8(cbuf, 0x4E);
2064 }
2065 { // MOVD $dst.hi,$tmp
2066 emit_opcode(cbuf,0x66);
2067 emit_opcode(cbuf,0x0F);
2068 emit_opcode(cbuf,0x7E);
2069 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg));
2070 }
2071 %}
2072
2073
2074 // Encode a reg-reg copy. If it is useless, then empty encoding.
2075 enc_class enc_Copy( eRegI dst, eRegI src ) %{
2076 encode_Copy( cbuf, $dst$$reg, $src$$reg );
2077 %}
2078
2079 enc_class enc_CopyL_Lo( eRegI dst, eRegL src ) %{
2080 encode_Copy( cbuf, $dst$$reg, $src$$reg );
2081 %}
2082
2083 // Encode xmm reg-reg copy. If it is useless, then empty encoding.
2084 enc_class enc_CopyXD( RegXD dst, RegXD src ) %{
2085 encode_CopyXD( cbuf, $dst$$reg, $src$$reg );
2086 %}
2087
2088 enc_class RegReg (eRegI dst, eRegI src) %{ // RegReg(Many)
2089 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2090 %}
2091
2092 enc_class RegReg_Lo(eRegL dst, eRegL src) %{ // RegReg(Many)
2093 $$$emit8$primary;
2094 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2095 %}
2096
2097 enc_class RegReg_Hi(eRegL dst, eRegL src) %{ // RegReg(Many)
2098 $$$emit8$secondary;
2099 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2100 %}
2101
2102 enc_class RegReg_Lo2(eRegL dst, eRegL src) %{ // RegReg(Many)
2103 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2104 %}
2105
2106 enc_class RegReg_Hi2(eRegL dst, eRegL src) %{ // RegReg(Many)
2107 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2617
2618
2619 enc_class Push_Reg_Mod_D( regD dst, regD src) %{
2620 // load dst in FPR0
2621 emit_opcode( cbuf, 0xD9 );
2622 emit_d8( cbuf, 0xC0-1+$dst$$reg );
2623 if ($src$$reg != FPR1L_enc) {
2624 // fincstp
2625 emit_opcode (cbuf, 0xD9);
2626 emit_opcode (cbuf, 0xF7);
2627 // swap src with FPR1:
2628 // FXCH FPR1 with src
2629 emit_opcode(cbuf, 0xD9);
2630 emit_d8(cbuf, 0xC8-1+$src$$reg );
2631 // fdecstp
2632 emit_opcode (cbuf, 0xD9);
2633 emit_opcode (cbuf, 0xF6);
2634 }
2635 %}
2636
2637 enc_class Push_ModD_encoding( regXD src0, regXD src1) %{
2638 // Allocate a word
2639 emit_opcode(cbuf,0x83); // SUB ESP,8
2640 emit_opcode(cbuf,0xEC);
2641 emit_d8(cbuf,0x08);
2642
2643 emit_opcode (cbuf, 0xF2 ); // MOVSD [ESP], src1
2644 emit_opcode (cbuf, 0x0F );
2645 emit_opcode (cbuf, 0x11 );
2646 encode_RegMem(cbuf, $src1$$reg, ESP_enc, 0x4, 0, 0, false);
2647
2648 emit_opcode(cbuf,0xDD ); // FLD_D [ESP]
2649 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
2650
2651 emit_opcode (cbuf, 0xF2 ); // MOVSD [ESP], src0
2652 emit_opcode (cbuf, 0x0F );
2653 emit_opcode (cbuf, 0x11 );
2654 encode_RegMem(cbuf, $src0$$reg, ESP_enc, 0x4, 0, 0, false);
2655
2656 emit_opcode(cbuf,0xDD ); // FLD_D [ESP]
2657 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
2658
2659 %}
2660
2661 enc_class Push_ModX_encoding( regX src0, regX src1) %{
2662 // Allocate a word
2663 emit_opcode(cbuf,0x83); // SUB ESP,4
2664 emit_opcode(cbuf,0xEC);
2665 emit_d8(cbuf,0x04);
2666
2667 emit_opcode (cbuf, 0xF3 ); // MOVSS [ESP], src1
2668 emit_opcode (cbuf, 0x0F );
2669 emit_opcode (cbuf, 0x11 );
2670 encode_RegMem(cbuf, $src1$$reg, ESP_enc, 0x4, 0, 0, false);
2671
2672 emit_opcode(cbuf,0xD9 ); // FLD [ESP]
2673 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
2674
2675 emit_opcode (cbuf, 0xF3 ); // MOVSS [ESP], src0
2676 emit_opcode (cbuf, 0x0F );
2677 emit_opcode (cbuf, 0x11 );
2678 encode_RegMem(cbuf, $src0$$reg, ESP_enc, 0x4, 0, 0, false);
2679
2680 emit_opcode(cbuf,0xD9 ); // FLD [ESP]
2681 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
2682
2683 %}
2684
2685 enc_class Push_ResultXD(regXD dst) %{
2686 store_to_stackslot( cbuf, 0xDD, 0x03, 0 ); //FSTP [ESP]
2687
2688 // UseXmmLoadAndClearUpper ? movsd dst,[esp] : movlpd dst,[esp]
2689 emit_opcode (cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
2690 emit_opcode (cbuf, 0x0F );
2691 emit_opcode (cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12);
2692 encode_RegMem(cbuf, $dst$$reg, ESP_enc, 0x4, 0, 0, false);
2693
2694 emit_opcode(cbuf,0x83); // ADD ESP,8
2695 emit_opcode(cbuf,0xC4);
2696 emit_d8(cbuf,0x08);
2697 %}
2698
2699 enc_class Push_ResultX(regX dst, immI d8) %{
2700 store_to_stackslot( cbuf, 0xD9, 0x03, 0 ); //FSTP_S [ESP]
2701
2702 emit_opcode (cbuf, 0xF3 ); // MOVSS dst(xmm), [ESP]
2703 emit_opcode (cbuf, 0x0F );
2704 emit_opcode (cbuf, 0x10 );
2705 encode_RegMem(cbuf, $dst$$reg, ESP_enc, 0x4, 0, 0, false);
2706
2707 emit_opcode(cbuf,0x83); // ADD ESP,d8 (4 or 8)
2708 emit_opcode(cbuf,0xC4);
2709 emit_d8(cbuf,$d8$$constant);
2710 %}
2711
2712 enc_class Push_SrcXD(regXD src) %{
2713 // Allocate a word
2714 emit_opcode(cbuf,0x83); // SUB ESP,8
2715 emit_opcode(cbuf,0xEC);
2716 emit_d8(cbuf,0x08);
2717
2718 emit_opcode (cbuf, 0xF2 ); // MOVSD [ESP], src
2719 emit_opcode (cbuf, 0x0F );
2720 emit_opcode (cbuf, 0x11 );
2721 encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
2722
2723 emit_opcode(cbuf,0xDD ); // FLD_D [ESP]
2724 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
2725 %}
2726
2727 enc_class push_stack_temp_qword() %{
2728 emit_opcode(cbuf,0x83); // SUB ESP,8
2729 emit_opcode(cbuf,0xEC);
2730 emit_d8 (cbuf,0x08);
2731 %}
2732
2733 enc_class pop_stack_temp_qword() %{
2734 emit_opcode(cbuf,0x83); // ADD ESP,8
2735 emit_opcode(cbuf,0xC4);
2736 emit_d8 (cbuf,0x08);
2737 %}
2738
2739 enc_class push_xmm_to_fpr1( regXD xmm_src ) %{
2740 emit_opcode (cbuf, 0xF2 ); // MOVSD [ESP], xmm_src
2741 emit_opcode (cbuf, 0x0F );
2742 emit_opcode (cbuf, 0x11 );
2743 encode_RegMem(cbuf, $xmm_src$$reg, ESP_enc, 0x4, 0, 0, false);
2744
2745 emit_opcode(cbuf,0xDD ); // FLD_D [ESP]
2746 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
2747 %}
2748
2749 // Compute X^Y using Intel's fast hardware instructions, if possible.
2750 // Otherwise return a NaN.
2751 enc_class pow_exp_core_encoding %{
2752 // FPR1 holds Y*ln2(X). Compute FPR1 = 2^(Y*ln2(X))
2753 emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xC0); // fdup = fld st(0) Q Q
2754 emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xFC); // frndint int(Q) Q
2755 emit_opcode(cbuf,0xDC); emit_opcode(cbuf,0xE9); // fsub st(1) -= st(0); int(Q) frac(Q)
2756 emit_opcode(cbuf,0xDB); // FISTP [ESP] frac(Q)
2757 emit_opcode(cbuf,0x1C);
2758 emit_d8(cbuf,0x24);
2759 emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xF0); // f2xm1 2^frac(Q)-1
2760 emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xE8); // fld1 1 2^frac(Q)-1
2761 emit_opcode(cbuf,0xDE); emit_opcode(cbuf,0xC1); // faddp 2^frac(Q)
2762 emit_opcode(cbuf,0x8B); // mov rax,[esp+0]=int(Q)
2763 encode_RegMem(cbuf, EAX_enc, ESP_enc, 0x4, 0, 0, false);
2764 emit_opcode(cbuf,0xC7); // mov rcx,0xFFFFF800 - overflow mask
2765 emit_rm(cbuf, 0x3, 0x0, ECX_enc);
2766 emit_d32(cbuf,0xFFFFF800);
2905 emit_opcode( cbuf, 0x7A );
2906 emit_d8 ( cbuf, 0x13 );
2907 // movl(dst, less_result);
2908 emit_opcode( cbuf, 0xB8 + $dst$$reg);
2909 emit_d32( cbuf, -1 );
2910 // jcc(Assembler::below, exit);
2911 emit_opcode( cbuf, 0x72 );
2912 emit_d8 ( cbuf, 0x0C );
2913 // movl(dst, equal_result);
2914 emit_opcode( cbuf, 0xB8 + $dst$$reg);
2915 emit_d32( cbuf, 0 );
2916 // jcc(Assembler::equal, exit);
2917 emit_opcode( cbuf, 0x74 );
2918 emit_d8 ( cbuf, 0x05 );
2919 // movl(dst, greater_result);
2920 emit_opcode( cbuf, 0xB8 + $dst$$reg);
2921 emit_d32( cbuf, 1 );
2922 %}
2923
2924
2925 // XMM version of CmpF_Result. Because the XMM compare
2926 // instructions set the EFLAGS directly. It becomes simpler than
2927 // the float version above.
2928 enc_class CmpX_Result(eRegI dst) %{
2929 MacroAssembler _masm(&cbuf);
2930 Label nan, inc, done;
2931
2932 __ jccb(Assembler::parity, nan);
2933 __ jccb(Assembler::equal, done);
2934 __ jccb(Assembler::above, inc);
2935 __ bind(nan);
2936 __ decrement(as_Register($dst$$reg)); // NO L qqq
2937 __ jmpb(done);
2938 __ bind(inc);
2939 __ increment(as_Register($dst$$reg)); // NO L qqq
2940 __ bind(done);
2941 %}
2942
2943 // Compare the longs and set flags
2944 // BROKEN! Do Not use as-is
2945 enc_class cmpl_test( eRegL src1, eRegL src2 ) %{
2946 // CMP $src1.hi,$src2.hi
2947 emit_opcode( cbuf, 0x3B );
2948 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
2949 // JNE,s done
2950 emit_opcode(cbuf,0x75);
2951 emit_d8(cbuf, 2 );
2952 // CMP $src1.lo,$src2.lo
2953 emit_opcode( cbuf, 0x3B );
2954 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2955 // done:
2956 %}
2957
2958 enc_class convert_int_long( regL dst, eRegI src ) %{
2959 // mov $dst.lo,$src
2960 int dst_encoding = $dst$$reg;
2961 int src_encoding = $src$$reg;
2962 encode_Copy( cbuf, dst_encoding , src_encoding );
3145 emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg);
3146 // CMP $tmp,$src.lo
3147 emit_opcode( cbuf, 0x3B );
3148 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg );
3149 // SBB $tmp,$src.hi
3150 emit_opcode( cbuf, 0x1B );
3151 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) );
3152 %}
3153
3154 // Sniff, sniff... smells like Gnu Superoptimizer
3155 enc_class neg_long( eRegL dst ) %{
3156 emit_opcode(cbuf,0xF7); // NEG hi
3157 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
3158 emit_opcode(cbuf,0xF7); // NEG lo
3159 emit_rm (cbuf,0x3, 0x3, $dst$$reg );
3160 emit_opcode(cbuf,0x83); // SBB hi,0
3161 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
3162 emit_d8 (cbuf,0 );
3163 %}
3164
3165 enc_class movq_ld(regXD dst, memory mem) %{
3166 MacroAssembler _masm(&cbuf);
3167 __ movq($dst$$XMMRegister, $mem$$Address);
3168 %}
3169
3170 enc_class movq_st(memory mem, regXD src) %{
3171 MacroAssembler _masm(&cbuf);
3172 __ movq($mem$$Address, $src$$XMMRegister);
3173 %}
3174
3175 enc_class pshufd_8x8(regX dst, regX src) %{
3176 MacroAssembler _masm(&cbuf);
3177
3178 encode_CopyXD(cbuf, $dst$$reg, $src$$reg);
3179 __ punpcklbw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg));
3180 __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg), 0x00);
3181 %}
3182
3183 enc_class pshufd_4x16(regX dst, regX src) %{
3184 MacroAssembler _masm(&cbuf);
3185
3186 __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), 0x00);
3187 %}
3188
3189 enc_class pshufd(regXD dst, regXD src, int mode) %{
3190 MacroAssembler _masm(&cbuf);
3191
3192 __ pshufd(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), $mode);
3193 %}
3194
3195 enc_class pxor(regXD dst, regXD src) %{
3196 MacroAssembler _masm(&cbuf);
3197
3198 __ pxor(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg));
3199 %}
3200
3201 enc_class mov_i2x(regXD dst, eRegI src) %{
3202 MacroAssembler _masm(&cbuf);
3203
3204 __ movdl(as_XMMRegister($dst$$reg), as_Register($src$$reg));
3205 %}
3206
3207
3208 // Because the transitions from emitted code to the runtime
3209 // monitorenter/exit helper stubs are so slow it's critical that
3210 // we inline both the stack-locking fast-path and the inflated fast path.
3211 //
3212 // See also: cmpFastLock and cmpFastUnlock.
3213 //
3214 // What follows is a specialized inline transliteration of the code
3215 // in slow_enter() and slow_exit(). If we're concerned about I$ bloat
3216 // another option would be to emit TrySlowEnter and TrySlowExit methods
3217 // at startup-time. These methods would accept arguments as
3218 // (rax,=Obj, rbx=Self, rcx=box, rdx=Scratch) and return success-failure
3219 // indications in the icc.ZFlag. Fast_Lock and Fast_Unlock would simply
3220 // marshal the arguments and emit calls to TrySlowEnter and TrySlowExit.
3221 // In practice, however, the # of lock sites is bounded and is usually small.
3222 // Besides the call overhead, TrySlowEnter and TrySlowExit might suffer
3223 // if the processor uses simple bimodal branch predictors keyed by EIP
3224 // Since the helper routines would be called from multiple synchronization
3225 // sites.
3226 //
3227 // An even better approach would be write "MonitorEnter()" and "MonitorExit()"
3825 emit_opcode(cbuf,0x5A); // POP EDX
3826 emit_opcode(cbuf,0x81); // CMP EDX,imm
3827 emit_d8 (cbuf,0xFA); // rdx
3828 emit_d32 (cbuf,0x80000000); // 0x80000000
3829 emit_opcode(cbuf,0x75); // JNE around_slow_call
3830 emit_d8 (cbuf,0x07+4); // Size of slow_call
3831 emit_opcode(cbuf,0x85); // TEST EAX,EAX
3832 emit_opcode(cbuf,0xC0); // 2/rax,/rax,
3833 emit_opcode(cbuf,0x75); // JNE around_slow_call
3834 emit_d8 (cbuf,0x07); // Size of slow_call
3835 // Push src onto stack slow-path
3836 emit_opcode(cbuf,0xD9 ); // FLD ST(i)
3837 emit_d8 (cbuf,0xC0-1+$src$$reg );
3838 // CALL directly to the runtime
3839 cbuf.set_insts_mark();
3840 emit_opcode(cbuf,0xE8); // Call into runtime
3841 emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3842 // Carry on here...
3843 %}
3844
3845 enc_class X2L_encoding( regX src ) %{
3846 // Allocate a word
3847 emit_opcode(cbuf,0x83); // SUB ESP,8
3848 emit_opcode(cbuf,0xEC);
3849 emit_d8(cbuf,0x08);
3850
3851 emit_opcode (cbuf, 0xF3 ); // MOVSS [ESP], src
3852 emit_opcode (cbuf, 0x0F );
3853 emit_opcode (cbuf, 0x11 );
3854 encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
3855
3856 emit_opcode(cbuf,0xD9 ); // FLD_S [ESP]
3857 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
3858
3859 emit_opcode(cbuf,0xD9); // FLDCW trunc
3860 emit_opcode(cbuf,0x2D);
3861 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
3862
3863 // Encoding assumes a double has been pushed into FPR0.
3864 // Store down the double as a long, popping the FPU stack
3865 emit_opcode(cbuf,0xDF); // FISTP [ESP]
3866 emit_opcode(cbuf,0x3C);
3867 emit_d8(cbuf,0x24);
3868
3869 // Restore the rounding mode; mask the exception
3870 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode
3871 emit_opcode(cbuf,0x2D);
3872 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
3873 ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
3874 : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
3875
3876 // Load the converted int; adjust CPU stack
3877 emit_opcode(cbuf,0x58); // POP EAX
3878
3879 emit_opcode(cbuf,0x5A); // POP EDX
3880
3881 emit_opcode(cbuf,0x81); // CMP EDX,imm
3882 emit_d8 (cbuf,0xFA); // rdx
3883 emit_d32 (cbuf,0x80000000);// 0x80000000
3884
3885 emit_opcode(cbuf,0x75); // JNE around_slow_call
3886 emit_d8 (cbuf,0x13+4); // Size of slow_call
3887
3888 emit_opcode(cbuf,0x85); // TEST EAX,EAX
3889 emit_opcode(cbuf,0xC0); // 2/rax,/rax,
3890
3891 emit_opcode(cbuf,0x75); // JNE around_slow_call
3892 emit_d8 (cbuf,0x13); // Size of slow_call
3893
3894 // Allocate a word
3895 emit_opcode(cbuf,0x83); // SUB ESP,4
3896 emit_opcode(cbuf,0xEC);
3897 emit_d8(cbuf,0x04);
3898
3899 emit_opcode (cbuf, 0xF3 ); // MOVSS [ESP], src
3900 emit_opcode (cbuf, 0x0F );
3901 emit_opcode (cbuf, 0x11 );
3902 encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
3903
3904 emit_opcode(cbuf,0xD9 ); // FLD_S [ESP]
3905 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
3906
3907 emit_opcode(cbuf,0x83); // ADD ESP,4
3908 emit_opcode(cbuf,0xC4);
3909 emit_d8(cbuf,0x04);
3910
3911 // CALL directly to the runtime
3912 cbuf.set_insts_mark();
3913 emit_opcode(cbuf,0xE8); // Call into runtime
3914 emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3915 // Carry on here...
3916 %}
3917
3918 enc_class XD2L_encoding( regXD src ) %{
3919 // Allocate a word
3920 emit_opcode(cbuf,0x83); // SUB ESP,8
3921 emit_opcode(cbuf,0xEC);
3922 emit_d8(cbuf,0x08);
3923
3924 emit_opcode (cbuf, 0xF2 ); // MOVSD [ESP], src
3925 emit_opcode (cbuf, 0x0F );
3926 emit_opcode (cbuf, 0x11 );
3927 encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
3928
3929 emit_opcode(cbuf,0xDD ); // FLD_D [ESP]
3930 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
3931
3932 emit_opcode(cbuf,0xD9); // FLDCW trunc
3933 emit_opcode(cbuf,0x2D);
3934 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
3935
3936 // Encoding assumes a double has been pushed into FPR0.
3937 // Store down the double as a long, popping the FPU stack
3938 emit_opcode(cbuf,0xDF); // FISTP [ESP]
3939 emit_opcode(cbuf,0x3C);
3940 emit_d8(cbuf,0x24);
3941
3942 // Restore the rounding mode; mask the exception
3943 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode
3944 emit_opcode(cbuf,0x2D);
3945 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
3946 ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
3947 : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
3948
3949 // Load the converted int; adjust CPU stack
3950 emit_opcode(cbuf,0x58); // POP EAX
3951
3952 emit_opcode(cbuf,0x5A); // POP EDX
3953
3954 emit_opcode(cbuf,0x81); // CMP EDX,imm
3955 emit_d8 (cbuf,0xFA); // rdx
3956 emit_d32 (cbuf,0x80000000); // 0x80000000
3957
3958 emit_opcode(cbuf,0x75); // JNE around_slow_call
3959 emit_d8 (cbuf,0x13+4); // Size of slow_call
3960
3961 emit_opcode(cbuf,0x85); // TEST EAX,EAX
3962 emit_opcode(cbuf,0xC0); // 2/rax,/rax,
3963
3964 emit_opcode(cbuf,0x75); // JNE around_slow_call
3965 emit_d8 (cbuf,0x13); // Size of slow_call
3966
3967 // Push src onto stack slow-path
3968 // Allocate a word
3969 emit_opcode(cbuf,0x83); // SUB ESP,8
3970 emit_opcode(cbuf,0xEC);
3971 emit_d8(cbuf,0x08);
3972
3973 emit_opcode (cbuf, 0xF2 ); // MOVSD [ESP], src
3974 emit_opcode (cbuf, 0x0F );
3975 emit_opcode (cbuf, 0x11 );
3976 encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
3977
3978 emit_opcode(cbuf,0xDD ); // FLD_D [ESP]
3979 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
3980
3981 emit_opcode(cbuf,0x83); // ADD ESP,8
3982 emit_opcode(cbuf,0xC4);
3983 emit_d8(cbuf,0x08);
3984
3985 // CALL directly to the runtime
3986 cbuf.set_insts_mark();
3987 emit_opcode(cbuf,0xE8); // Call into runtime
3988 emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3989 // Carry on here...
3990 %}
3991
3992 enc_class D2X_encoding( regX dst, regD src ) %{
3993 // Allocate a word
3994 emit_opcode(cbuf,0x83); // SUB ESP,4
3995 emit_opcode(cbuf,0xEC);
3996 emit_d8(cbuf,0x04);
3997 int pop = 0x02;
3998 if ($src$$reg != FPR1L_enc) {
3999 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1)
4000 emit_d8( cbuf, 0xC0-1+$src$$reg );
4001 pop = 0x03;
4002 }
4003 store_to_stackslot( cbuf, 0xD9, pop, 0 ); // FST<P>_S [ESP]
4004
4005 emit_opcode (cbuf, 0xF3 ); // MOVSS dst(xmm), [ESP]
4006 emit_opcode (cbuf, 0x0F );
4007 emit_opcode (cbuf, 0x10 );
4008 encode_RegMem(cbuf, $dst$$reg, ESP_enc, 0x4, 0, 0, false);
4009
4010 emit_opcode(cbuf,0x83); // ADD ESP,4
4011 emit_opcode(cbuf,0xC4);
4012 emit_d8(cbuf,0x04);
4013 // Carry on here...
4014 %}
4015
4016 enc_class FX2I_encoding( regX src, eRegI dst ) %{
4017 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
4018
4019 // Compare the result to see if we need to go to the slow path
4020 emit_opcode(cbuf,0x81); // CMP dst,imm
4021 emit_rm (cbuf,0x3,0x7,$dst$$reg);
4022 emit_d32 (cbuf,0x80000000); // 0x80000000
4023
4024 emit_opcode(cbuf,0x75); // JNE around_slow_call
4025 emit_d8 (cbuf,0x13); // Size of slow_call
4026 // Store xmm to a temp memory
4027 // location and push it onto stack.
4028
4029 emit_opcode(cbuf,0x83); // SUB ESP,4
4030 emit_opcode(cbuf,0xEC);
4031 emit_d8(cbuf, $primary ? 0x8 : 0x4);
4032
4033 emit_opcode (cbuf, $primary ? 0xF2 : 0xF3 ); // MOVSS [ESP], xmm
4034 emit_opcode (cbuf, 0x0F );
4035 emit_opcode (cbuf, 0x11 );
4036 encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
4037
4038 emit_opcode(cbuf, $primary ? 0xDD : 0xD9 ); // FLD [ESP]
4039 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
4040
4041 emit_opcode(cbuf,0x83); // ADD ESP,4
4042 emit_opcode(cbuf,0xC4);
4043 emit_d8(cbuf, $primary ? 0x8 : 0x4);
4044
4045 // CALL directly to the runtime
4046 cbuf.set_insts_mark();
4047 emit_opcode(cbuf,0xE8); // Call into runtime
4048 emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
4049
4050 // Carry on here...
4051 %}
4052
4053 enc_class X2D_encoding( regD dst, regX src ) %{
4054 // Allocate a word
4055 emit_opcode(cbuf,0x83); // SUB ESP,4
4056 emit_opcode(cbuf,0xEC);
4057 emit_d8(cbuf,0x04);
4058
4059 emit_opcode (cbuf, 0xF3 ); // MOVSS [ESP], xmm
4060 emit_opcode (cbuf, 0x0F );
4061 emit_opcode (cbuf, 0x11 );
4062 encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
4063
4064 emit_opcode(cbuf,0xD9 ); // FLD_S [ESP]
4065 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
4066
4067 emit_opcode(cbuf,0x83); // ADD ESP,4
4068 emit_opcode(cbuf,0xC4);
4069 emit_d8(cbuf,0x04);
4070
4071 // Carry on here...
4072 %}
4073
4074 enc_class AbsXF_encoding(regX dst) %{
4075 address signmask_address=(address)float_signmask_pool;
4076 // andpd:\tANDPS $dst,[signconst]
4077 emit_opcode(cbuf, 0x0F);
4078 emit_opcode(cbuf, 0x54);
4079 emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
4080 emit_d32(cbuf, (int)signmask_address);
4081 %}
4082
4083 enc_class AbsXD_encoding(regXD dst) %{
4084 address signmask_address=(address)double_signmask_pool;
4085 // andpd:\tANDPD $dst,[signconst]
4086 emit_opcode(cbuf, 0x66);
4087 emit_opcode(cbuf, 0x0F);
4088 emit_opcode(cbuf, 0x54);
4089 emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
4090 emit_d32(cbuf, (int)signmask_address);
4091 %}
4092
4093 enc_class NegXF_encoding(regX dst) %{
4094 address signmask_address=(address)float_signflip_pool;
4095 // andpd:\tXORPS $dst,[signconst]
4096 emit_opcode(cbuf, 0x0F);
4097 emit_opcode(cbuf, 0x57);
4098 emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
4099 emit_d32(cbuf, (int)signmask_address);
4100 %}
4101
4102 enc_class NegXD_encoding(regXD dst) %{
4103 address signmask_address=(address)double_signflip_pool;
4104 // andpd:\tXORPD $dst,[signconst]
4105 emit_opcode(cbuf, 0x66);
4106 emit_opcode(cbuf, 0x0F);
4107 emit_opcode(cbuf, 0x57);
4108 emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
4109 emit_d32(cbuf, (int)signmask_address);
4110 %}
4111
4112 enc_class FMul_ST_reg( eRegF src1 ) %{
4113 // Operand was loaded from memory into fp ST (stack top)
4114 // FMUL ST,$src /* D8 C8+i */
4115 emit_opcode(cbuf, 0xD8);
4116 emit_opcode(cbuf, 0xC8 + $src1$$reg);
4117 %}
4118
4119 enc_class FAdd_ST_reg( eRegF src2 ) %{
4120 // FADDP ST,src2 /* D8 C0+i */
4121 emit_opcode(cbuf, 0xD8);
4122 emit_opcode(cbuf, 0xC0 + $src2$$reg);
4123 //could use FADDP src2,fpST /* DE C0+i */
4124 %}
4125
4126 enc_class FAddP_reg_ST( eRegF src2 ) %{
4127 // FADDP src2,ST /* DE C0+i */
4128 emit_opcode(cbuf, 0xDE);
4129 emit_opcode(cbuf, 0xC0 + $src2$$reg);
4130 %}
4131
4159 emit_opcode(cbuf, 0xC0 + $src1$$reg);
4160
4161 // FMULP src2,ST /* DE C8+i */
4162 emit_opcode(cbuf, 0xDE);
4163 emit_opcode(cbuf, 0xC8 + $src2$$reg);
4164 %}
4165
4166 // Atomically load the volatile long
4167 enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{
4168 emit_opcode(cbuf,0xDF);
4169 int rm_byte_opcode = 0x05;
4170 int base = $mem$$base;
4171 int index = $mem$$index;
4172 int scale = $mem$$scale;
4173 int displace = $mem$$disp;
4174 bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
4175 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_is_oop);
4176 store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp );
4177 %}
4178
4179 enc_class enc_loadLX_volatile( memory mem, stackSlotL dst, regXD tmp ) %{
4180 { // Atomic long load
4181 // UseXmmLoadAndClearUpper ? movsd $tmp,$mem : movlpd $tmp,$mem
4182 emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
4183 emit_opcode(cbuf,0x0F);
4184 emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0x10 : 0x12);
4185 int base = $mem$$base;
4186 int index = $mem$$index;
4187 int scale = $mem$$scale;
4188 int displace = $mem$$disp;
4189 bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
4190 encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
4191 }
4192 { // MOVSD $dst,$tmp ! atomic long store
4193 emit_opcode(cbuf,0xF2);
4194 emit_opcode(cbuf,0x0F);
4195 emit_opcode(cbuf,0x11);
4196 int base = $dst$$base;
4197 int index = $dst$$index;
4198 int scale = $dst$$scale;
4199 int displace = $dst$$disp;
4200 bool disp_is_oop = $dst->disp_is_oop(); // disp-as-oop when working with static globals
4201 encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
4202 }
4203 %}
4204
4205 enc_class enc_loadLX_reg_volatile( memory mem, eRegL dst, regXD tmp ) %{
4206 { // Atomic long load
4207 // UseXmmLoadAndClearUpper ? movsd $tmp,$mem : movlpd $tmp,$mem
4208 emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
4209 emit_opcode(cbuf,0x0F);
4210 emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0x10 : 0x12);
4211 int base = $mem$$base;
4212 int index = $mem$$index;
4213 int scale = $mem$$scale;
4214 int displace = $mem$$disp;
4215 bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
4216 encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
4217 }
4218 { // MOVD $dst.lo,$tmp
4219 emit_opcode(cbuf,0x66);
4220 emit_opcode(cbuf,0x0F);
4221 emit_opcode(cbuf,0x7E);
4222 emit_rm(cbuf, 0x3, $tmp$$reg, $dst$$reg);
4223 }
4224 { // PSRLQ $tmp,32
4225 emit_opcode(cbuf,0x66);
4226 emit_opcode(cbuf,0x0F);
4227 emit_opcode(cbuf,0x73);
4228 emit_rm(cbuf, 0x3, 0x02, $tmp$$reg);
4229 emit_d8(cbuf, 0x20);
4230 }
4231 { // MOVD $dst.hi,$tmp
4232 emit_opcode(cbuf,0x66);
4233 emit_opcode(cbuf,0x0F);
4234 emit_opcode(cbuf,0x7E);
4235 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg));
4236 }
4237 %}
4238
4239 // Volatile Store Long. Must be atomic, so move it into
4240 // the FP TOS and then do a 64-bit FIST. Has to probe the
4241 // target address before the store (for null-ptr checks)
4242 // so the memory operand is used twice in the encoding.
4243 enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{
4244 store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp );
4245 cbuf.set_insts_mark(); // Mark start of FIST in case $mem has an oop
4246 emit_opcode(cbuf,0xDF);
4247 int rm_byte_opcode = 0x07;
4248 int base = $mem$$base;
4249 int index = $mem$$index;
4250 int scale = $mem$$scale;
4251 int displace = $mem$$disp;
4252 bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
4253 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_is_oop);
4254 %}
4255
4256 enc_class enc_storeLX_volatile( memory mem, stackSlotL src, regXD tmp) %{
4257 { // Atomic long load
4258 // UseXmmLoadAndClearUpper ? movsd $tmp,[$src] : movlpd $tmp,[$src]
4259 emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
4260 emit_opcode(cbuf,0x0F);
4261 emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0x10 : 0x12);
4262 int base = $src$$base;
4263 int index = $src$$index;
4264 int scale = $src$$scale;
4265 int displace = $src$$disp;
4266 bool disp_is_oop = $src->disp_is_oop(); // disp-as-oop when working with static globals
4267 encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
4268 }
4269 cbuf.set_insts_mark(); // Mark start of MOVSD in case $mem has an oop
4270 { // MOVSD $mem,$tmp ! atomic long store
4271 emit_opcode(cbuf,0xF2);
4272 emit_opcode(cbuf,0x0F);
4273 emit_opcode(cbuf,0x11);
4274 int base = $mem$$base;
4275 int index = $mem$$index;
4276 int scale = $mem$$scale;
4277 int displace = $mem$$disp;
4278 bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
4279 encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
4280 }
4281 %}
4282
4283 enc_class enc_storeLX_reg_volatile( memory mem, eRegL src, regXD tmp, regXD tmp2) %{
4284 { // MOVD $tmp,$src.lo
4285 emit_opcode(cbuf,0x66);
4286 emit_opcode(cbuf,0x0F);
4287 emit_opcode(cbuf,0x6E);
4288 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
4289 }
4290 { // MOVD $tmp2,$src.hi
4291 emit_opcode(cbuf,0x66);
4292 emit_opcode(cbuf,0x0F);
4293 emit_opcode(cbuf,0x6E);
4294 emit_rm(cbuf, 0x3, $tmp2$$reg, HIGH_FROM_LOW($src$$reg));
4295 }
4296 { // PUNPCKLDQ $tmp,$tmp2
4297 emit_opcode(cbuf,0x66);
4298 emit_opcode(cbuf,0x0F);
4299 emit_opcode(cbuf,0x62);
4300 emit_rm(cbuf, 0x3, $tmp$$reg, $tmp2$$reg);
4301 }
4302 cbuf.set_insts_mark(); // Mark start of MOVSD in case $mem has an oop
4303 { // MOVSD $mem,$tmp ! atomic long store
4304 emit_opcode(cbuf,0xF2);
4305 emit_opcode(cbuf,0x0F);
4306 emit_opcode(cbuf,0x11);
4307 int base = $mem$$base;
4308 int index = $mem$$index;
4309 int scale = $mem$$scale;
4310 int displace = $mem$$disp;
4311 bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
4312 encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
4313 }
4314 %}
4315
4316 // Safepoint Poll. This polls the safepoint page, and causes an
4317 // exception if it is not readable. Unfortunately, it kills the condition code
4318 // in the process
4319 // We current use TESTL [spp],EDI
4320 // A better choice might be TESTB [spp + pagesize() - CacheLineSize()],0
4321
4322 enc_class Safepoint_Poll() %{
4323 cbuf.relocate(cbuf.insts_mark(), relocInfo::poll_type, 0);
4324 emit_opcode(cbuf,0x85);
4325 emit_rm (cbuf, 0x0, 0x7, 0x5);
4326 emit_d32(cbuf, (intptr_t)os::get_polling_page());
4327 %}
4328 %}
4329
4330
4331 //----------FRAME--------------------------------------------------------------
4332 // Definition of frame structure and management information.
4333 //
4334 // S T A C K L A Y O U T Allocators stack-slot number
4335 // | (to get allocators register number
6860 // then store it down to the stack and reload on the int
6861 // side.
6862 instruct loadL_volatile(stackSlotL dst, memory mem) %{
6863 predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access());
6864 match(Set dst (LoadL mem));
6865
6866 ins_cost(200);
6867 format %{ "FILD $mem\t# Atomic volatile long load\n\t"
6868 "FISTp $dst" %}
6869 ins_encode(enc_loadL_volatile(mem,dst));
6870 ins_pipe( fpu_reg_mem );
6871 %}
6872
6873 instruct loadLX_volatile(stackSlotL dst, memory mem, regXD tmp) %{
6874 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
6875 match(Set dst (LoadL mem));
6876 effect(TEMP tmp);
6877 ins_cost(180);
6878 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t"
6879 "MOVSD $dst,$tmp" %}
6880 ins_encode(enc_loadLX_volatile(mem, dst, tmp));
6881 ins_pipe( pipe_slow );
6882 %}
6883
6884 instruct loadLX_reg_volatile(eRegL dst, memory mem, regXD tmp) %{
6885 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
6886 match(Set dst (LoadL mem));
6887 effect(TEMP tmp);
6888 ins_cost(160);
6889 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t"
6890 "MOVD $dst.lo,$tmp\n\t"
6891 "PSRLQ $tmp,32\n\t"
6892 "MOVD $dst.hi,$tmp" %}
6893 ins_encode(enc_loadLX_reg_volatile(mem, dst, tmp));
6894 ins_pipe( pipe_slow );
6895 %}
6896
6897 // Load Range
6898 instruct loadRange(eRegI dst, memory mem) %{
6899 match(Set dst (LoadRange mem));
6900
6901 ins_cost(125);
6902 format %{ "MOV $dst,$mem" %}
6903 opcode(0x8B);
6904 ins_encode( OpcP, RegMem(dst,mem));
6905 ins_pipe( ialu_reg_mem );
6906 %}
6907
6908
6909 // Load Pointer
6910 instruct loadP(eRegP dst, memory mem) %{
6911 match(Set dst (LoadP mem));
6912
6913 ins_cost(125);
6931 // Load Double
6932 instruct loadD(regD dst, memory mem) %{
6933 predicate(UseSSE<=1);
6934 match(Set dst (LoadD mem));
6935
6936 ins_cost(150);
6937 format %{ "FLD_D ST,$mem\n\t"
6938 "FSTP $dst" %}
6939 opcode(0xDD); /* DD /0 */
6940 ins_encode( OpcP, RMopc_Mem(0x00,mem),
6941 Pop_Reg_D(dst) );
6942 ins_pipe( fpu_reg_mem );
6943 %}
6944
6945 // Load Double to XMM
6946 instruct loadXD(regXD dst, memory mem) %{
6947 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
6948 match(Set dst (LoadD mem));
6949 ins_cost(145);
6950 format %{ "MOVSD $dst,$mem" %}
6951 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x10), RegMem(dst,mem));
6952 ins_pipe( pipe_slow );
6953 %}
6954
6955 instruct loadXD_partial(regXD dst, memory mem) %{
6956 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
6957 match(Set dst (LoadD mem));
6958 ins_cost(145);
6959 format %{ "MOVLPD $dst,$mem" %}
6960 ins_encode( Opcode(0x66), Opcode(0x0F), Opcode(0x12), RegMem(dst,mem));
6961 ins_pipe( pipe_slow );
6962 %}
6963
6964 // Load to XMM register (single-precision floating point)
6965 // MOVSS instruction
6966 instruct loadX(regX dst, memory mem) %{
6967 predicate(UseSSE>=1);
6968 match(Set dst (LoadF mem));
6969 ins_cost(145);
6970 format %{ "MOVSS $dst,$mem" %}
6971 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x10), RegMem(dst,mem));
6972 ins_pipe( pipe_slow );
6973 %}
6974
6975 // Load Float
6976 instruct loadF(regF dst, memory mem) %{
6977 predicate(UseSSE==0);
6978 match(Set dst (LoadF mem));
6979
6980 ins_cost(150);
6981 format %{ "FLD_S ST,$mem\n\t"
6982 "FSTP $dst" %}
6983 opcode(0xD9); /* D9 /0 */
6984 ins_encode( OpcP, RMopc_Mem(0x00,mem),
6985 Pop_Reg_F(dst) );
6986 ins_pipe( fpu_reg_mem );
6987 %}
6988
6989 // Load Aligned Packed Byte to XMM register
6990 instruct loadA8B(regXD dst, memory mem) %{
6991 predicate(UseSSE>=1);
6992 match(Set dst (Load8B mem));
6993 ins_cost(125);
6994 format %{ "MOVQ $dst,$mem\t! packed8B" %}
6995 ins_encode( movq_ld(dst, mem));
6996 ins_pipe( pipe_slow );
6997 %}
6998
6999 // Load Aligned Packed Short to XMM register
7000 instruct loadA4S(regXD dst, memory mem) %{
7001 predicate(UseSSE>=1);
7002 match(Set dst (Load4S mem));
7003 ins_cost(125);
7004 format %{ "MOVQ $dst,$mem\t! packed4S" %}
7005 ins_encode( movq_ld(dst, mem));
7006 ins_pipe( pipe_slow );
7007 %}
7008
7009 // Load Aligned Packed Char to XMM register
7010 instruct loadA4C(regXD dst, memory mem) %{
7011 predicate(UseSSE>=1);
7012 match(Set dst (Load4C mem));
7013 ins_cost(125);
7014 format %{ "MOVQ $dst,$mem\t! packed4C" %}
7015 ins_encode( movq_ld(dst, mem));
7016 ins_pipe( pipe_slow );
7017 %}
7018
7019 // Load Aligned Packed Integer to XMM register
7020 instruct load2IU(regXD dst, memory mem) %{
7021 predicate(UseSSE>=1);
7022 match(Set dst (Load2I mem));
7023 ins_cost(125);
7024 format %{ "MOVQ $dst,$mem\t! packed2I" %}
7025 ins_encode( movq_ld(dst, mem));
7026 ins_pipe( pipe_slow );
7027 %}
7028
7029 // Load Aligned Packed Single to XMM
7030 instruct loadA2F(regXD dst, memory mem) %{
7031 predicate(UseSSE>=1);
7032 match(Set dst (Load2F mem));
7033 ins_cost(145);
7034 format %{ "MOVQ $dst,$mem\t! packed2F" %}
7035 ins_encode( movq_ld(dst, mem));
7036 ins_pipe( pipe_slow );
7037 %}
7038
7039 // Load Effective Address
7040 instruct leaP8(eRegP dst, indOffset8 mem) %{
7041 match(Set dst mem);
7042
7043 ins_cost(110);
7044 format %{ "LEA $dst,$mem" %}
7045 opcode(0x8D);
7046 ins_encode( OpcP, RegMem(dst,mem));
7047 ins_pipe( ialu_reg_reg_fat );
7048 %}
7049
7050 instruct leaP32(eRegP dst, indOffset32 mem) %{
7051 match(Set dst mem);
7052
7053 ins_cost(110);
7054 format %{ "LEA $dst,$mem" %}
7055 opcode(0x8D);
7241 %}
7242 ins_pipe(fpu_reg_con);
7243 %}
7244
7245 // The instruction usage is guarded by predicate in operand immXD().
7246 instruct loadConXD(regXD dst, immXD con) %{
7247 match(Set dst con);
7248 ins_cost(125);
7249 format %{ "MOVSD $dst,[$constantaddress]\t# load from constant table: double=$con" %}
7250 ins_encode %{
7251 __ movdbl($dst$$XMMRegister, $constantaddress($con));
7252 %}
7253 ins_pipe(pipe_slow);
7254 %}
7255
7256 // The instruction usage is guarded by predicate in operand immXD0().
7257 instruct loadConXD0(regXD dst, immXD0 src) %{
7258 match(Set dst src);
7259 ins_cost(100);
7260 format %{ "XORPD $dst,$dst\t# double 0.0" %}
7261 ins_encode( Opcode(0x66), Opcode(0x0F), Opcode(0x57), RegReg(dst,dst));
7262 ins_pipe( pipe_slow );
7263 %}
7264
7265 // Load Stack Slot
7266 instruct loadSSI(eRegI dst, stackSlotI src) %{
7267 match(Set dst src);
7268 ins_cost(125);
7269
7270 format %{ "MOV $dst,$src" %}
7271 opcode(0x8B);
7272 ins_encode( OpcP, RegMem(dst,src));
7273 ins_pipe( ialu_reg_mem );
7274 %}
7275
7276 instruct loadSSL(eRegL dst, stackSlotL src) %{
7277 match(Set dst src);
7278
7279 ins_cost(200);
7280 format %{ "MOV $dst,$src.lo\n\t"
7281 "MOV $dst+4,$src.hi" %}
7543 predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access());
7544 match(Set mem (StoreL mem src));
7545 effect( KILL cr );
7546 ins_cost(400);
7547 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t"
7548 "FILD $src\n\t"
7549 "FISTp $mem\t # 64-bit atomic volatile long store" %}
7550 opcode(0x3B);
7551 ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src));
7552 ins_pipe( fpu_reg_mem );
7553 %}
7554
7555 instruct storeLX_volatile(memory mem, stackSlotL src, regXD tmp, eFlagsReg cr) %{
7556 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
7557 match(Set mem (StoreL mem src));
7558 effect( TEMP tmp, KILL cr );
7559 ins_cost(380);
7560 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t"
7561 "MOVSD $tmp,$src\n\t"
7562 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %}
7563 opcode(0x3B);
7564 ins_encode( OpcP, RegMem( EAX, mem ), enc_storeLX_volatile(mem, src, tmp));
7565 ins_pipe( pipe_slow );
7566 %}
7567
7568 instruct storeLX_reg_volatile(memory mem, eRegL src, regXD tmp2, regXD tmp, eFlagsReg cr) %{
7569 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
7570 match(Set mem (StoreL mem src));
7571 effect( TEMP tmp2 , TEMP tmp, KILL cr );
7572 ins_cost(360);
7573 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t"
7574 "MOVD $tmp,$src.lo\n\t"
7575 "MOVD $tmp2,$src.hi\n\t"
7576 "PUNPCKLDQ $tmp,$tmp2\n\t"
7577 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %}
7578 opcode(0x3B);
7579 ins_encode( OpcP, RegMem( EAX, mem ), enc_storeLX_reg_volatile(mem, src, tmp, tmp2));
7580 ins_pipe( pipe_slow );
7581 %}
7582
7583 // Store Pointer; for storing unknown oops and raw pointers
7584 instruct storeP(memory mem, anyRegP src) %{
7585 match(Set mem (StoreP mem src));
7586
7587 ins_cost(125);
7588 format %{ "MOV $mem,$src" %}
7589 opcode(0x89);
7590 ins_encode( OpcP, RegMem( src, mem ) );
7591 ins_pipe( ialu_mem_reg );
7592 %}
7593
7594 // Store Integer Immediate
7595 instruct storeImmI(memory mem, immI src) %{
7596 match(Set mem (StoreI mem src));
7597
7598 ins_cost(150);
7599 format %{ "MOV $mem,$src" %}
7626 ins_pipe( ialu_mem_imm );
7627 %}
7628
7629 // Store Byte Immediate
7630 instruct storeImmB(memory mem, immI8 src) %{
7631 match(Set mem (StoreB mem src));
7632
7633 ins_cost(150);
7634 format %{ "MOV8 $mem,$src" %}
7635 opcode(0xC6); /* C6 /0 */
7636 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src ));
7637 ins_pipe( ialu_mem_imm );
7638 %}
7639
7640 // Store Aligned Packed Byte XMM register to memory
7641 instruct storeA8B(memory mem, regXD src) %{
7642 predicate(UseSSE>=1);
7643 match(Set mem (Store8B mem src));
7644 ins_cost(145);
7645 format %{ "MOVQ $mem,$src\t! packed8B" %}
7646 ins_encode( movq_st(mem, src));
7647 ins_pipe( pipe_slow );
7648 %}
7649
7650 // Store Aligned Packed Char/Short XMM register to memory
7651 instruct storeA4C(memory mem, regXD src) %{
7652 predicate(UseSSE>=1);
7653 match(Set mem (Store4C mem src));
7654 ins_cost(145);
7655 format %{ "MOVQ $mem,$src\t! packed4C" %}
7656 ins_encode( movq_st(mem, src));
7657 ins_pipe( pipe_slow );
7658 %}
7659
7660 // Store Aligned Packed Integer XMM register to memory
7661 instruct storeA2I(memory mem, regXD src) %{
7662 predicate(UseSSE>=1);
7663 match(Set mem (Store2I mem src));
7664 ins_cost(145);
7665 format %{ "MOVQ $mem,$src\t! packed2I" %}
7666 ins_encode( movq_st(mem, src));
7667 ins_pipe( pipe_slow );
7668 %}
7669
7670 // Store CMS card-mark Immediate
7671 instruct storeImmCM(memory mem, immI8 src) %{
7672 match(Set mem (StoreCM mem src));
7673
7674 ins_cost(150);
7675 format %{ "MOV8 $mem,$src\t! CMS card-mark imm0" %}
7676 opcode(0xC6); /* C6 /0 */
7677 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src ));
7678 ins_pipe( ialu_mem_imm );
7679 %}
7680
7681 // Store Double
7682 instruct storeD( memory mem, regDPR1 src) %{
7683 predicate(UseSSE<=1);
7684 match(Set mem (StoreD mem src));
7685
7686 ins_cost(100);
7692
7693 // Store double does rounding on x86
7694 instruct storeD_rounded( memory mem, regDPR1 src) %{
7695 predicate(UseSSE<=1);
7696 match(Set mem (StoreD mem (RoundDouble src)));
7697
7698 ins_cost(100);
7699 format %{ "FST_D $mem,$src\t# round" %}
7700 opcode(0xDD); /* DD /2 */
7701 ins_encode( enc_FP_store(mem,src) );
7702 ins_pipe( fpu_mem_reg );
7703 %}
7704
7705 // Store XMM register to memory (double-precision floating points)
7706 // MOVSD instruction
7707 instruct storeXD(memory mem, regXD src) %{
7708 predicate(UseSSE>=2);
7709 match(Set mem (StoreD mem src));
7710 ins_cost(95);
7711 format %{ "MOVSD $mem,$src" %}
7712 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x11), RegMem(src, mem));
7713 ins_pipe( pipe_slow );
7714 %}
7715
7716 // Store XMM register to memory (single-precision floating point)
7717 // MOVSS instruction
7718 instruct storeX(memory mem, regX src) %{
7719 predicate(UseSSE>=1);
7720 match(Set mem (StoreF mem src));
7721 ins_cost(95);
7722 format %{ "MOVSS $mem,$src" %}
7723 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x11), RegMem(src, mem));
7724 ins_pipe( pipe_slow );
7725 %}
7726
7727 // Store Aligned Packed Single Float XMM register to memory
7728 instruct storeA2F(memory mem, regXD src) %{
7729 predicate(UseSSE>=1);
7730 match(Set mem (Store2F mem src));
7731 ins_cost(145);
7732 format %{ "MOVQ $mem,$src\t! packed2F" %}
7733 ins_encode( movq_st(mem, src));
7734 ins_pipe( pipe_slow );
7735 %}
7736
7737 // Store Float
7738 instruct storeF( memory mem, regFPR1 src) %{
7739 predicate(UseSSE==0);
7740 match(Set mem (StoreF mem src));
7741
7742 ins_cost(100);
7743 format %{ "FST_S $mem,$src" %}
7744 opcode(0xD9); /* D9 /2 */
7745 ins_encode( enc_FP_store(mem,src) );
7746 ins_pipe( fpu_mem_reg );
7747 %}
7748
7749 // Store Float does rounding on x86
7750 instruct storeF_rounded( memory mem, regFPR1 src) %{
7751 predicate(UseSSE==0);
7752 match(Set mem (StoreF mem (RoundFloat src)));
7753
8423 match(Set dst (CastII dst));
8424 format %{ "#castII of $dst" %}
8425 ins_encode( /*empty encoding*/ );
8426 ins_cost(0);
8427 ins_pipe( empty );
8428 %}
8429
8430
8431 // Load-locked - same as a regular pointer load when used with compare-swap
8432 instruct loadPLocked(eRegP dst, memory mem) %{
8433 match(Set dst (LoadPLocked mem));
8434
8435 ins_cost(125);
8436 format %{ "MOV $dst,$mem\t# Load ptr. locked" %}
8437 opcode(0x8B);
8438 ins_encode( OpcP, RegMem(dst,mem));
8439 ins_pipe( ialu_reg_mem );
8440 %}
8441
8442 // LoadLong-locked - same as a volatile long load when used with compare-swap
8443 instruct loadLLocked(stackSlotL dst, load_long_memory mem) %{
8444 predicate(UseSSE<=1);
8445 match(Set dst (LoadLLocked mem));
8446
8447 ins_cost(200);
8448 format %{ "FILD $mem\t# Atomic volatile long load\n\t"
8449 "FISTp $dst" %}
8450 ins_encode(enc_loadL_volatile(mem,dst));
8451 ins_pipe( fpu_reg_mem );
8452 %}
8453
8454 instruct loadLX_Locked(stackSlotL dst, load_long_memory mem, regXD tmp) %{
8455 predicate(UseSSE>=2);
8456 match(Set dst (LoadLLocked mem));
8457 effect(TEMP tmp);
8458 ins_cost(180);
8459 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t"
8460 "MOVSD $dst,$tmp" %}
8461 ins_encode(enc_loadLX_volatile(mem, dst, tmp));
8462 ins_pipe( pipe_slow );
8463 %}
8464
8465 instruct loadLX_reg_Locked(eRegL dst, load_long_memory mem, regXD tmp) %{
8466 predicate(UseSSE>=2);
8467 match(Set dst (LoadLLocked mem));
8468 effect(TEMP tmp);
8469 ins_cost(160);
8470 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t"
8471 "MOVD $dst.lo,$tmp\n\t"
8472 "PSRLQ $tmp,32\n\t"
8473 "MOVD $dst.hi,$tmp" %}
8474 ins_encode(enc_loadLX_reg_volatile(mem, dst, tmp));
8475 ins_pipe( pipe_slow );
8476 %}
8477
8478 // Conditional-store of the updated heap-top.
8479 // Used during allocation of the shared heap.
8480 // Sets flags (EQ) on success. Implemented with a CMPXCHG on Intel.
8481 instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{
8482 match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
8483 // EAX is killed if there is contention, but then it's also unused.
8484 // In the common case of no contention, EAX holds the new oop address.
8485 format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %}
8486 ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) );
8487 ins_pipe( pipe_cmpxchg );
8488 %}
8489
8490 // Conditional-store of an int value.
8491 // ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG on Intel.
8492 instruct storeIConditional( memory mem, eAXRegI oldval, eRegI newval, eFlagsReg cr ) %{
8493 match(Set cr (StoreIConditional mem (Binary oldval newval)));
8494 effect(KILL oldval);
10116 OpcS, OpcP, PopFPU,
10117 CmpF_Result(dst));
10118 ins_pipe( pipe_slow );
10119 %}
10120
10121 // Compare into -1,0,1
10122 instruct cmpD_reg(eRegI dst, regD src1, regD src2, eAXRegI rax, eFlagsReg cr) %{
10123 predicate(UseSSE<=1);
10124 match(Set dst (CmpD3 src1 src2));
10125 effect(KILL cr, KILL rax);
10126 ins_cost(300);
10127 format %{ "FCMPD $dst,$src1,$src2" %}
10128 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10129 ins_encode( Push_Reg_D(src1),
10130 OpcP, RegOpc(src2),
10131 CmpF_Result(dst));
10132 ins_pipe( pipe_slow );
10133 %}
10134
10135 // float compare and set condition codes in EFLAGS by XMM regs
10136 instruct cmpXD_cc(eFlagsRegU cr, regXD dst, regXD src, eAXRegI rax) %{
10137 predicate(UseSSE>=2);
10138 match(Set cr (CmpD dst src));
10139 effect(KILL rax);
10140 ins_cost(125);
10141 format %{ "COMISD $dst,$src\n"
10142 "\tJNP exit\n"
10143 "\tMOV ah,1 // saw a NaN, set CF\n"
10144 "\tSAHF\n"
10145 "exit:\tNOP // avoid branch to branch" %}
10146 opcode(0x66, 0x0F, 0x2F);
10147 ins_encode(OpcP, OpcS, Opcode(tertiary), RegReg(dst, src), cmpF_P6_fixup);
10148 ins_pipe( pipe_slow );
10149 %}
10150
10151 instruct cmpXD_ccCF(eFlagsRegUCF cr, regXD dst, regXD src) %{
10152 predicate(UseSSE>=2);
10153 match(Set cr (CmpD dst src));
10154 ins_cost(100);
10155 format %{ "COMISD $dst,$src" %}
10156 opcode(0x66, 0x0F, 0x2F);
10157 ins_encode(OpcP, OpcS, Opcode(tertiary), RegReg(dst, src));
10158 ins_pipe( pipe_slow );
10159 %}
10160
10161 // float compare and set condition codes in EFLAGS by XMM regs
10162 instruct cmpXD_ccmem(eFlagsRegU cr, regXD dst, memory src, eAXRegI rax) %{
10163 predicate(UseSSE>=2);
10164 match(Set cr (CmpD dst (LoadD src)));
10165 effect(KILL rax);
10166 ins_cost(145);
10167 format %{ "COMISD $dst,$src\n"
10168 "\tJNP exit\n"
10169 "\tMOV ah,1 // saw a NaN, set CF\n"
10170 "\tSAHF\n"
10171 "exit:\tNOP // avoid branch to branch" %}
10172 opcode(0x66, 0x0F, 0x2F);
10173 ins_encode(OpcP, OpcS, Opcode(tertiary), RegMem(dst, src), cmpF_P6_fixup);
10174 ins_pipe( pipe_slow );
10175 %}
10176
10177 instruct cmpXD_ccmemCF(eFlagsRegUCF cr, regXD dst, memory src) %{
10178 predicate(UseSSE>=2);
10179 match(Set cr (CmpD dst (LoadD src)));
10180 ins_cost(100);
10181 format %{ "COMISD $dst,$src" %}
10182 opcode(0x66, 0x0F, 0x2F);
10183 ins_encode(OpcP, OpcS, Opcode(tertiary), RegMem(dst, src));
10184 ins_pipe( pipe_slow );
10185 %}
10186
10187 // Compare into -1,0,1 in XMM
10188 instruct cmpXD_reg(eRegI dst, regXD src1, regXD src2, eFlagsReg cr) %{
10189 predicate(UseSSE>=2);
10190 match(Set dst (CmpD3 src1 src2));
10191 effect(KILL cr);
10192 ins_cost(255);
10193 format %{ "XOR $dst,$dst\n"
10194 "\tCOMISD $src1,$src2\n"
10195 "\tJP,s nan\n"
10196 "\tJEQ,s exit\n"
10197 "\tJA,s inc\n"
10198 "nan:\tDEC $dst\n"
10199 "\tJMP,s exit\n"
10200 "inc:\tINC $dst\n"
10201 "exit:"
10202 %}
10203 opcode(0x66, 0x0F, 0x2F);
10204 ins_encode(Xor_Reg(dst), OpcP, OpcS, Opcode(tertiary), RegReg(src1, src2),
10205 CmpX_Result(dst));
10206 ins_pipe( pipe_slow );
10207 %}
10208
10209 // Compare into -1,0,1 in XMM and memory
10210 instruct cmpXD_regmem(eRegI dst, regXD src1, memory mem, eFlagsReg cr) %{
10211 predicate(UseSSE>=2);
10212 match(Set dst (CmpD3 src1 (LoadD mem)));
10213 effect(KILL cr);
10214 ins_cost(275);
10215 format %{ "COMISD $src1,$mem\n"
10216 "\tMOV $dst,0\t\t# do not blow flags\n"
10217 "\tJP,s nan\n"
10218 "\tJEQ,s exit\n"
10219 "\tJA,s inc\n"
10220 "nan:\tDEC $dst\n"
10221 "\tJMP,s exit\n"
10222 "inc:\tINC $dst\n"
10223 "exit:"
10224 %}
10225 opcode(0x66, 0x0F, 0x2F);
10226 ins_encode(OpcP, OpcS, Opcode(tertiary), RegMem(src1, mem),
10227 LdImmI(dst,0x0), CmpX_Result(dst));
10228 ins_pipe( pipe_slow );
10229 %}
10230
10231
10232 instruct subD_reg(regD dst, regD src) %{
10233 predicate (UseSSE <=1);
10234 match(Set dst (SubD dst src));
10235
10236 format %{ "FLD $src\n\t"
10237 "DSUBp $dst,ST" %}
10238 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */
10239 ins_cost(150);
10240 ins_encode( Push_Reg_D(src),
10241 OpcP, RegOpc(dst) );
10242 ins_pipe( fpu_reg_reg );
10243 %}
10244
10245 instruct subD_reg_round(stackSlotD dst, regD src1, regD src2) %{
10246 predicate (UseSSE <=1);
10247 match(Set dst (RoundDouble (SubD src1 src2)));
10266 "DSUBp $dst,ST" %}
10267 opcode(0xDE, 0x5, 0xDD); /* DE C0+i */ /* LoadD DD /0 */
10268 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
10269 OpcP, RegOpc(dst) );
10270 ins_pipe( fpu_reg_mem );
10271 %}
10272
10273 instruct absD_reg(regDPR1 dst, regDPR1 src) %{
10274 predicate (UseSSE<=1);
10275 match(Set dst (AbsD src));
10276 ins_cost(100);
10277 format %{ "FABS" %}
10278 opcode(0xE1, 0xD9);
10279 ins_encode( OpcS, OpcP );
10280 ins_pipe( fpu_reg_reg );
10281 %}
10282
10283 instruct absXD_reg( regXD dst ) %{
10284 predicate(UseSSE>=2);
10285 match(Set dst (AbsD dst));
10286 format %{ "ANDPD $dst,[0x7FFFFFFFFFFFFFFF]\t# ABS D by sign masking" %}
10287 ins_encode( AbsXD_encoding(dst));
10288 ins_pipe( pipe_slow );
10289 %}
10290
10291 instruct negD_reg(regDPR1 dst, regDPR1 src) %{
10292 predicate(UseSSE<=1);
10293 match(Set dst (NegD src));
10294 ins_cost(100);
10295 format %{ "FCHS" %}
10296 opcode(0xE0, 0xD9);
10297 ins_encode( OpcS, OpcP );
10298 ins_pipe( fpu_reg_reg );
10299 %}
10300
10301 instruct negXD_reg( regXD dst ) %{
10302 predicate(UseSSE>=2);
10303 match(Set dst (NegD dst));
10304 format %{ "XORPD $dst,[0x8000000000000000]\t# CHS D by sign flipping" %}
10305 ins_encode %{
10306 __ xorpd($dst$$XMMRegister,
10307 ExternalAddress((address)double_signflip_pool));
10308 %}
10309 ins_pipe( pipe_slow );
10310 %}
10311
10312 instruct addD_reg(regD dst, regD src) %{
10313 predicate(UseSSE<=1);
10314 match(Set dst (AddD dst src));
10315 format %{ "FLD $src\n\t"
10316 "DADD $dst,ST" %}
10317 size(4);
10318 ins_cost(150);
10319 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
10320 ins_encode( Push_Reg_D(src),
10321 OpcP, RegOpc(dst) );
10322 ins_pipe( fpu_reg_reg );
10323 %}
10397 instruct addD_reg_imm_round(stackSlotD dst, regD src, immD con) %{
10398 predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 );
10399 match(Set dst (RoundDouble (AddD src con)));
10400 ins_cost(200);
10401 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t"
10402 "DADD ST,$src\n\t"
10403 "FSTP_D $dst\t# D-round" %}
10404 ins_encode %{
10405 __ fld_d($constantaddress($con));
10406 __ fadd($src$$reg);
10407 __ fstp_d(Address(rsp, $dst$$disp));
10408 %}
10409 ins_pipe(fpu_mem_reg_con);
10410 %}
10411
10412 // Add two double precision floating point values in xmm
10413 instruct addXD_reg(regXD dst, regXD src) %{
10414 predicate(UseSSE>=2);
10415 match(Set dst (AddD dst src));
10416 format %{ "ADDSD $dst,$src" %}
10417 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x58), RegReg(dst, src));
10418 ins_pipe( pipe_slow );
10419 %}
10420
10421 instruct addXD_imm(regXD dst, immXD con) %{
10422 predicate(UseSSE>=2);
10423 match(Set dst (AddD dst con));
10424 format %{ "ADDSD $dst,[$constantaddress]\t# load from constant table: double=$con" %}
10425 ins_encode %{
10426 __ addsd($dst$$XMMRegister, $constantaddress($con));
10427 %}
10428 ins_pipe(pipe_slow);
10429 %}
10430
10431 instruct addXD_mem(regXD dst, memory mem) %{
10432 predicate(UseSSE>=2);
10433 match(Set dst (AddD dst (LoadD mem)));
10434 format %{ "ADDSD $dst,$mem" %}
10435 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x58), RegMem(dst,mem));
10436 ins_pipe( pipe_slow );
10437 %}
10438
10439 // Sub two double precision floating point values in xmm
10440 instruct subXD_reg(regXD dst, regXD src) %{
10441 predicate(UseSSE>=2);
10442 match(Set dst (SubD dst src));
10443 format %{ "SUBSD $dst,$src" %}
10444 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5C), RegReg(dst, src));
10445 ins_pipe( pipe_slow );
10446 %}
10447
10448 instruct subXD_imm(regXD dst, immXD con) %{
10449 predicate(UseSSE>=2);
10450 match(Set dst (SubD dst con));
10451 format %{ "SUBSD $dst,[$constantaddress]\t# load from constant table: double=$con" %}
10452 ins_encode %{
10453 __ subsd($dst$$XMMRegister, $constantaddress($con));
10454 %}
10455 ins_pipe(pipe_slow);
10456 %}
10457
10458 instruct subXD_mem(regXD dst, memory mem) %{
10459 predicate(UseSSE>=2);
10460 match(Set dst (SubD dst (LoadD mem)));
10461 format %{ "SUBSD $dst,$mem" %}
10462 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5C), RegMem(dst,mem));
10463 ins_pipe( pipe_slow );
10464 %}
10465
10466 // Mul two double precision floating point values in xmm
10467 instruct mulXD_reg(regXD dst, regXD src) %{
10468 predicate(UseSSE>=2);
10469 match(Set dst (MulD dst src));
10470 format %{ "MULSD $dst,$src" %}
10471 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x59), RegReg(dst, src));
10472 ins_pipe( pipe_slow );
10473 %}
10474
10475 instruct mulXD_imm(regXD dst, immXD con) %{
10476 predicate(UseSSE>=2);
10477 match(Set dst (MulD dst con));
10478 format %{ "MULSD $dst,[$constantaddress]\t# load from constant table: double=$con" %}
10479 ins_encode %{
10480 __ mulsd($dst$$XMMRegister, $constantaddress($con));
10481 %}
10482 ins_pipe(pipe_slow);
10483 %}
10484
10485 instruct mulXD_mem(regXD dst, memory mem) %{
10486 predicate(UseSSE>=2);
10487 match(Set dst (MulD dst (LoadD mem)));
10488 format %{ "MULSD $dst,$mem" %}
10489 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x59), RegMem(dst,mem));
10490 ins_pipe( pipe_slow );
10491 %}
10492
10493 // Div two double precision floating point values in xmm
10494 instruct divXD_reg(regXD dst, regXD src) %{
10495 predicate(UseSSE>=2);
10496 match(Set dst (DivD dst src));
10497 format %{ "DIVSD $dst,$src" %}
10498 opcode(0xF2, 0x0F, 0x5E);
10499 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5E), RegReg(dst, src));
10500 ins_pipe( pipe_slow );
10501 %}
10502
10503 instruct divXD_imm(regXD dst, immXD con) %{
10504 predicate(UseSSE>=2);
10505 match(Set dst (DivD dst con));
10506 format %{ "DIVSD $dst,[$constantaddress]\t# load from constant table: double=$con" %}
10507 ins_encode %{
10508 __ divsd($dst$$XMMRegister, $constantaddress($con));
10509 %}
10510 ins_pipe(pipe_slow);
10511 %}
10512
10513 instruct divXD_mem(regXD dst, memory mem) %{
10514 predicate(UseSSE>=2);
10515 match(Set dst (DivD dst (LoadD mem)));
10516 format %{ "DIVSD $dst,$mem" %}
10517 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5E), RegMem(dst,mem));
10518 ins_pipe( pipe_slow );
10519 %}
10520
10521
10522 instruct mulD_reg(regD dst, regD src) %{
10523 predicate(UseSSE<=1);
10524 match(Set dst (MulD dst src));
10525 format %{ "FLD $src\n\t"
10526 "DMULp $dst,ST" %}
10527 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
10528 ins_cost(150);
10529 ins_encode( Push_Reg_D(src),
10530 OpcP, RegOpc(dst) );
10531 ins_pipe( fpu_reg_reg );
10532 %}
10533
10534 // Strict FP instruction biases argument before multiply then
10535 // biases result to avoid double rounding of subnormals.
10536 //
10537 // scale arg1 by multiplying arg1 by 2^(-15360)
11129 OpcS, OpcP, PopFPU,
11130 CmpF_Result(dst));
11131 ins_pipe( pipe_slow );
11132 %}
11133
11134 // Compare into -1,0,1
11135 instruct cmpF_reg(eRegI dst, regF src1, regF src2, eAXRegI rax, eFlagsReg cr) %{
11136 predicate(UseSSE == 0);
11137 match(Set dst (CmpF3 src1 src2));
11138 effect(KILL cr, KILL rax);
11139 ins_cost(300);
11140 format %{ "FCMPF $dst,$src1,$src2" %}
11141 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
11142 ins_encode( Push_Reg_D(src1),
11143 OpcP, RegOpc(src2),
11144 CmpF_Result(dst));
11145 ins_pipe( pipe_slow );
11146 %}
11147
11148 // float compare and set condition codes in EFLAGS by XMM regs
11149 instruct cmpX_cc(eFlagsRegU cr, regX dst, regX src, eAXRegI rax) %{
11150 predicate(UseSSE>=1);
11151 match(Set cr (CmpF dst src));
11152 effect(KILL rax);
11153 ins_cost(145);
11154 format %{ "COMISS $dst,$src\n"
11155 "\tJNP exit\n"
11156 "\tMOV ah,1 // saw a NaN, set CF\n"
11157 "\tSAHF\n"
11158 "exit:\tNOP // avoid branch to branch" %}
11159 opcode(0x0F, 0x2F);
11160 ins_encode(OpcP, OpcS, RegReg(dst, src), cmpF_P6_fixup);
11161 ins_pipe( pipe_slow );
11162 %}
11163
11164 instruct cmpX_ccCF(eFlagsRegUCF cr, regX dst, regX src) %{
11165 predicate(UseSSE>=1);
11166 match(Set cr (CmpF dst src));
11167 ins_cost(100);
11168 format %{ "COMISS $dst,$src" %}
11169 opcode(0x0F, 0x2F);
11170 ins_encode(OpcP, OpcS, RegReg(dst, src));
11171 ins_pipe( pipe_slow );
11172 %}
11173
11174 // float compare and set condition codes in EFLAGS by XMM regs
11175 instruct cmpX_ccmem(eFlagsRegU cr, regX dst, memory src, eAXRegI rax) %{
11176 predicate(UseSSE>=1);
11177 match(Set cr (CmpF dst (LoadF src)));
11178 effect(KILL rax);
11179 ins_cost(165);
11180 format %{ "COMISS $dst,$src\n"
11181 "\tJNP exit\n"
11182 "\tMOV ah,1 // saw a NaN, set CF\n"
11183 "\tSAHF\n"
11184 "exit:\tNOP // avoid branch to branch" %}
11185 opcode(0x0F, 0x2F);
11186 ins_encode(OpcP, OpcS, RegMem(dst, src), cmpF_P6_fixup);
11187 ins_pipe( pipe_slow );
11188 %}
11189
11190 instruct cmpX_ccmemCF(eFlagsRegUCF cr, regX dst, memory src) %{
11191 predicate(UseSSE>=1);
11192 match(Set cr (CmpF dst (LoadF src)));
11193 ins_cost(100);
11194 format %{ "COMISS $dst,$src" %}
11195 opcode(0x0F, 0x2F);
11196 ins_encode(OpcP, OpcS, RegMem(dst, src));
11197 ins_pipe( pipe_slow );
11198 %}
11199
11200 // Compare into -1,0,1 in XMM
11201 instruct cmpX_reg(eRegI dst, regX src1, regX src2, eFlagsReg cr) %{
11202 predicate(UseSSE>=1);
11203 match(Set dst (CmpF3 src1 src2));
11204 effect(KILL cr);
11205 ins_cost(255);
11206 format %{ "XOR $dst,$dst\n"
11207 "\tCOMISS $src1,$src2\n"
11208 "\tJP,s nan\n"
11209 "\tJEQ,s exit\n"
11210 "\tJA,s inc\n"
11211 "nan:\tDEC $dst\n"
11212 "\tJMP,s exit\n"
11213 "inc:\tINC $dst\n"
11214 "exit:"
11215 %}
11216 opcode(0x0F, 0x2F);
11217 ins_encode(Xor_Reg(dst), OpcP, OpcS, RegReg(src1, src2), CmpX_Result(dst));
11218 ins_pipe( pipe_slow );
11219 %}
11220
11221 // Compare into -1,0,1 in XMM and memory
11222 instruct cmpX_regmem(eRegI dst, regX src1, memory mem, eFlagsReg cr) %{
11223 predicate(UseSSE>=1);
11224 match(Set dst (CmpF3 src1 (LoadF mem)));
11225 effect(KILL cr);
11226 ins_cost(275);
11227 format %{ "COMISS $src1,$mem\n"
11228 "\tMOV $dst,0\t\t# do not blow flags\n"
11229 "\tJP,s nan\n"
11230 "\tJEQ,s exit\n"
11231 "\tJA,s inc\n"
11232 "nan:\tDEC $dst\n"
11233 "\tJMP,s exit\n"
11234 "inc:\tINC $dst\n"
11235 "exit:"
11236 %}
11237 opcode(0x0F, 0x2F);
11238 ins_encode(OpcP, OpcS, RegMem(src1, mem), LdImmI(dst,0x0), CmpX_Result(dst));
11239 ins_pipe( pipe_slow );
11240 %}
11241
11242 // Spill to obtain 24-bit precision
11243 instruct subF24_reg(stackSlotF dst, regF src1, regF src2) %{
11244 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11245 match(Set dst (SubF src1 src2));
11246
11247 format %{ "FSUB $dst,$src1 - $src2" %}
11248 opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */
11249 ins_encode( Push_Reg_F(src1),
11250 OpcReg_F(src2),
11251 Pop_Mem_F(dst) );
11252 ins_pipe( fpu_mem_reg_reg );
11253 %}
11254 //
11255 // This instruction does not round to 24-bits
11256 instruct subF_reg(regF dst, regF src) %{
11257 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11258 match(Set dst (SubF dst src));
11278 %}
11279 //
11280 // This instruction does not round to 24-bits
11281 instruct addF_reg(regF dst, regF src) %{
11282 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11283 match(Set dst (AddF dst src));
11284
11285 format %{ "FLD $src\n\t"
11286 "FADDp $dst,ST" %}
11287 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
11288 ins_encode( Push_Reg_F(src),
11289 OpcP, RegOpc(dst) );
11290 ins_pipe( fpu_reg_reg );
11291 %}
11292
11293 // Add two single precision floating point values in xmm
11294 instruct addX_reg(regX dst, regX src) %{
11295 predicate(UseSSE>=1);
11296 match(Set dst (AddF dst src));
11297 format %{ "ADDSS $dst,$src" %}
11298 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x58), RegReg(dst, src));
11299 ins_pipe( pipe_slow );
11300 %}
11301
11302 instruct addX_imm(regX dst, immXF con) %{
11303 predicate(UseSSE>=1);
11304 match(Set dst (AddF dst con));
11305 format %{ "ADDSS $dst,[$constantaddress]\t# load from constant table: float=$con" %}
11306 ins_encode %{
11307 __ addss($dst$$XMMRegister, $constantaddress($con));
11308 %}
11309 ins_pipe(pipe_slow);
11310 %}
11311
11312 instruct addX_mem(regX dst, memory mem) %{
11313 predicate(UseSSE>=1);
11314 match(Set dst (AddF dst (LoadF mem)));
11315 format %{ "ADDSS $dst,$mem" %}
11316 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x58), RegMem(dst, mem));
11317 ins_pipe( pipe_slow );
11318 %}
11319
11320 // Subtract two single precision floating point values in xmm
11321 instruct subX_reg(regX dst, regX src) %{
11322 predicate(UseSSE>=1);
11323 match(Set dst (SubF dst src));
11324 format %{ "SUBSS $dst,$src" %}
11325 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5C), RegReg(dst, src));
11326 ins_pipe( pipe_slow );
11327 %}
11328
11329 instruct subX_imm(regX dst, immXF con) %{
11330 predicate(UseSSE>=1);
11331 match(Set dst (SubF dst con));
11332 format %{ "SUBSS $dst,[$constantaddress]\t# load from constant table: float=$con" %}
11333 ins_encode %{
11334 __ subss($dst$$XMMRegister, $constantaddress($con));
11335 %}
11336 ins_pipe(pipe_slow);
11337 %}
11338
11339 instruct subX_mem(regX dst, memory mem) %{
11340 predicate(UseSSE>=1);
11341 match(Set dst (SubF dst (LoadF mem)));
11342 format %{ "SUBSS $dst,$mem" %}
11343 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5C), RegMem(dst,mem));
11344 ins_pipe( pipe_slow );
11345 %}
11346
11347 // Multiply two single precision floating point values in xmm
11348 instruct mulX_reg(regX dst, regX src) %{
11349 predicate(UseSSE>=1);
11350 match(Set dst (MulF dst src));
11351 format %{ "MULSS $dst,$src" %}
11352 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x59), RegReg(dst, src));
11353 ins_pipe( pipe_slow );
11354 %}
11355
11356 instruct mulX_imm(regX dst, immXF con) %{
11357 predicate(UseSSE>=1);
11358 match(Set dst (MulF dst con));
11359 format %{ "MULSS $dst,[$constantaddress]\t# load from constant table: float=$con" %}
11360 ins_encode %{
11361 __ mulss($dst$$XMMRegister, $constantaddress($con));
11362 %}
11363 ins_pipe(pipe_slow);
11364 %}
11365
11366 instruct mulX_mem(regX dst, memory mem) %{
11367 predicate(UseSSE>=1);
11368 match(Set dst (MulF dst (LoadF mem)));
11369 format %{ "MULSS $dst,$mem" %}
11370 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x59), RegMem(dst,mem));
11371 ins_pipe( pipe_slow );
11372 %}
11373
11374 // Divide two single precision floating point values in xmm
11375 instruct divX_reg(regX dst, regX src) %{
11376 predicate(UseSSE>=1);
11377 match(Set dst (DivF dst src));
11378 format %{ "DIVSS $dst,$src" %}
11379 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5E), RegReg(dst, src));
11380 ins_pipe( pipe_slow );
11381 %}
11382
11383 instruct divX_imm(regX dst, immXF con) %{
11384 predicate(UseSSE>=1);
11385 match(Set dst (DivF dst con));
11386 format %{ "DIVSS $dst,[$constantaddress]\t# load from constant table: float=$con" %}
11387 ins_encode %{
11388 __ divss($dst$$XMMRegister, $constantaddress($con));
11389 %}
11390 ins_pipe(pipe_slow);
11391 %}
11392
11393 instruct divX_mem(regX dst, memory mem) %{
11394 predicate(UseSSE>=1);
11395 match(Set dst (DivF dst (LoadF mem)));
11396 format %{ "DIVSS $dst,$mem" %}
11397 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5E), RegMem(dst,mem));
11398 ins_pipe( pipe_slow );
11399 %}
11400
11401 // Get the square root of a single precision floating point values in xmm
11402 instruct sqrtX_reg(regX dst, regX src) %{
11403 predicate(UseSSE>=1);
11404 match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
11405 format %{ "SQRTSS $dst,$src" %}
11406 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x51), RegReg(dst, src));
11407 ins_pipe( pipe_slow );
11408 %}
11409
11410 instruct sqrtX_mem(regX dst, memory mem) %{
11411 predicate(UseSSE>=1);
11412 match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF mem)))));
11413 format %{ "SQRTSS $dst,$mem" %}
11414 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x51), RegMem(dst, mem));
11415 ins_pipe( pipe_slow );
11416 %}
11417
11418 // Get the square root of a double precision floating point values in xmm
11419 instruct sqrtXD_reg(regXD dst, regXD src) %{
11420 predicate(UseSSE>=2);
11421 match(Set dst (SqrtD src));
11422 format %{ "SQRTSD $dst,$src" %}
11423 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x51), RegReg(dst, src));
11424 ins_pipe( pipe_slow );
11425 %}
11426
11427 instruct sqrtXD_mem(regXD dst, memory mem) %{
11428 predicate(UseSSE>=2);
11429 match(Set dst (SqrtD (LoadD mem)));
11430 format %{ "SQRTSD $dst,$mem" %}
11431 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x51), RegMem(dst, mem));
11432 ins_pipe( pipe_slow );
11433 %}
11434
11435 instruct absF_reg(regFPR1 dst, regFPR1 src) %{
11436 predicate(UseSSE==0);
11437 match(Set dst (AbsF src));
11438 ins_cost(100);
11439 format %{ "FABS" %}
11440 opcode(0xE1, 0xD9);
11441 ins_encode( OpcS, OpcP );
11442 ins_pipe( fpu_reg_reg );
11443 %}
11444
11445 instruct absX_reg(regX dst ) %{
11446 predicate(UseSSE>=1);
11447 match(Set dst (AbsF dst));
11448 format %{ "ANDPS $dst,[0x7FFFFFFF]\t# ABS F by sign masking" %}
11449 ins_encode( AbsXF_encoding(dst));
11450 ins_pipe( pipe_slow );
11451 %}
11452
11453 instruct negF_reg(regFPR1 dst, regFPR1 src) %{
11454 predicate(UseSSE==0);
11455 match(Set dst (NegF src));
11456 ins_cost(100);
11457 format %{ "FCHS" %}
11458 opcode(0xE0, 0xD9);
11459 ins_encode( OpcS, OpcP );
11460 ins_pipe( fpu_reg_reg );
11461 %}
11462
11463 instruct negX_reg( regX dst ) %{
11464 predicate(UseSSE>=1);
11465 match(Set dst (NegF dst));
11466 format %{ "XORPS $dst,[0x80000000]\t# CHS F by sign flipping" %}
11467 ins_encode( NegXF_encoding(dst));
11468 ins_pipe( pipe_slow );
11469 %}
11470
11471 // Cisc-alternate to addF_reg
11472 // Spill to obtain 24-bit precision
11473 instruct addF24_reg_mem(stackSlotF dst, regF src1, memory src2) %{
11474 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11475 match(Set dst (AddF src1 (LoadF src2)));
11476
11477 format %{ "FLD $src2\n\t"
11478 "FADD ST,$src1\n\t"
11479 "FSTP_S $dst" %}
11480 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */
11481 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
11482 OpcReg_F(src1),
11483 Pop_Mem_F(dst) );
11484 ins_pipe( fpu_mem_reg_mem );
11485 %}
11486 //
11487 // Cisc-alternate to addF_reg
11853
11854 // Force rounding to 24-bit precision and 6-bit exponent
11855 instruct convD2F_reg(stackSlotF dst, regD src) %{
11856 predicate(UseSSE==0);
11857 match(Set dst (ConvD2F src));
11858 format %{ "FST_S $dst,$src\t# F-round" %}
11859 expand %{
11860 roundFloat_mem_reg(dst,src);
11861 %}
11862 %}
11863
11864 // Force rounding to 24-bit precision and 6-bit exponent
11865 instruct convD2X_reg(regX dst, regD src, eFlagsReg cr) %{
11866 predicate(UseSSE==1);
11867 match(Set dst (ConvD2F src));
11868 effect( KILL cr );
11869 format %{ "SUB ESP,4\n\t"
11870 "FST_S [ESP],$src\t# F-round\n\t"
11871 "MOVSS $dst,[ESP]\n\t"
11872 "ADD ESP,4" %}
11873 ins_encode( D2X_encoding(dst, src) );
11874 ins_pipe( pipe_slow );
11875 %}
11876
11877 // Force rounding double precision to single precision
11878 instruct convXD2X_reg(regX dst, regXD src) %{
11879 predicate(UseSSE>=2);
11880 match(Set dst (ConvD2F src));
11881 format %{ "CVTSD2SS $dst,$src\t# F-round" %}
11882 opcode(0xF2, 0x0F, 0x5A);
11883 ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src));
11884 ins_pipe( pipe_slow );
11885 %}
11886
11887 instruct convF2D_reg_reg(regD dst, regF src) %{
11888 predicate(UseSSE==0);
11889 match(Set dst (ConvF2D src));
11890 format %{ "FST_S $dst,$src\t# D-round" %}
11891 ins_encode( Pop_Reg_Reg_D(dst, src));
11892 ins_pipe( fpu_reg_reg );
11893 %}
11894
11895 instruct convF2D_reg(stackSlotD dst, regF src) %{
11896 predicate(UseSSE==1);
11897 match(Set dst (ConvF2D src));
11898 format %{ "FST_D $dst,$src\t# D-round" %}
11899 expand %{
11900 roundDouble_mem_reg(dst,src);
11901 %}
11902 %}
11903
11904 instruct convX2D_reg(regD dst, regX src, eFlagsReg cr) %{
11905 predicate(UseSSE==1);
11906 match(Set dst (ConvF2D src));
11907 effect( KILL cr );
11908 format %{ "SUB ESP,4\n\t"
11909 "MOVSS [ESP] $src\n\t"
11910 "FLD_S [ESP]\n\t"
11911 "ADD ESP,4\n\t"
11912 "FSTP $dst\t# D-round" %}
11913 ins_encode( X2D_encoding(dst, src), Pop_Reg_D(dst));
11914 ins_pipe( pipe_slow );
11915 %}
11916
11917 instruct convX2XD_reg(regXD dst, regX src) %{
11918 predicate(UseSSE>=2);
11919 match(Set dst (ConvF2D src));
11920 format %{ "CVTSS2SD $dst,$src\t# D-round" %}
11921 opcode(0xF3, 0x0F, 0x5A);
11922 ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src));
11923 ins_pipe( pipe_slow );
11924 %}
11925
11926 // Convert a double to an int. If the double is a NAN, stuff a zero in instead.
11927 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
11928 predicate(UseSSE<=1);
11929 match(Set dst (ConvD2I src));
11930 effect( KILL tmp, KILL cr );
11931 format %{ "FLD $src\t# Convert double to int \n\t"
11932 "FLDCW trunc mode\n\t"
11933 "SUB ESP,4\n\t"
11934 "FISTp [ESP + #0]\n\t"
11935 "FLDCW std/24-bit mode\n\t"
11936 "POP EAX\n\t"
11937 "CMP EAX,0x80000000\n\t"
11938 "JNE,s fast\n\t"
11939 "FLD_D $src\n\t"
11940 "CALL d2i_wrapper\n"
11941 "fast:" %}
11942 ins_encode( Push_Reg_D(src), D2I_encoding(src) );
11943 ins_pipe( pipe_slow );
11944 %}
11945
11946 // Convert a double to an int. If the double is a NAN, stuff a zero in instead.
11947 instruct convXD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regXD src, eFlagsReg cr ) %{
11948 predicate(UseSSE>=2);
11949 match(Set dst (ConvD2I src));
11950 effect( KILL tmp, KILL cr );
11951 format %{ "CVTTSD2SI $dst, $src\n\t"
11952 "CMP $dst,0x80000000\n\t"
11953 "JNE,s fast\n\t"
11954 "SUB ESP, 8\n\t"
11955 "MOVSD [ESP], $src\n\t"
11956 "FLD_D [ESP]\n\t"
11957 "ADD ESP, 8\n\t"
11958 "CALL d2i_wrapper\n"
11959 "fast:" %}
11960 opcode(0x1); // double-precision conversion
11961 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x2C), FX2I_encoding(src,dst));
11962 ins_pipe( pipe_slow );
11963 %}
11964
11965 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
11966 predicate(UseSSE<=1);
11967 match(Set dst (ConvD2L src));
11968 effect( KILL cr );
11969 format %{ "FLD $src\t# Convert double to long\n\t"
11970 "FLDCW trunc mode\n\t"
11971 "SUB ESP,8\n\t"
11972 "FISTp [ESP + #0]\n\t"
11973 "FLDCW std/24-bit mode\n\t"
11974 "POP EAX\n\t"
11975 "POP EDX\n\t"
11976 "CMP EDX,0x80000000\n\t"
11977 "JNE,s fast\n\t"
11978 "TEST EAX,EAX\n\t"
11979 "JNE,s fast\n\t"
11980 "FLD $src\n\t"
11981 "CALL d2l_wrapper\n"
11987 // XMM lacks a float/double->long conversion, so use the old FPU stack.
11988 instruct convXD2L_reg_reg( eADXRegL dst, regXD src, eFlagsReg cr ) %{
11989 predicate (UseSSE>=2);
11990 match(Set dst (ConvD2L src));
11991 effect( KILL cr );
11992 format %{ "SUB ESP,8\t# Convert double to long\n\t"
11993 "MOVSD [ESP],$src\n\t"
11994 "FLD_D [ESP]\n\t"
11995 "FLDCW trunc mode\n\t"
11996 "FISTp [ESP + #0]\n\t"
11997 "FLDCW std/24-bit mode\n\t"
11998 "POP EAX\n\t"
11999 "POP EDX\n\t"
12000 "CMP EDX,0x80000000\n\t"
12001 "JNE,s fast\n\t"
12002 "TEST EAX,EAX\n\t"
12003 "JNE,s fast\n\t"
12004 "SUB ESP,8\n\t"
12005 "MOVSD [ESP],$src\n\t"
12006 "FLD_D [ESP]\n\t"
12007 "CALL d2l_wrapper\n"
12008 "fast:" %}
12009 ins_encode( XD2L_encoding(src) );
12010 ins_pipe( pipe_slow );
12011 %}
12012
12013 // Convert a double to an int. Java semantics require we do complex
12014 // manglations in the corner cases. So we set the rounding mode to
12015 // 'zero', store the darned double down as an int, and reset the
12016 // rounding mode to 'nearest'. The hardware stores a flag value down
12017 // if we would overflow or converted a NAN; we check for this and
12018 // and go the slow path if needed.
12019 instruct convF2I_reg_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
12020 predicate(UseSSE==0);
12021 match(Set dst (ConvF2I src));
12022 effect( KILL tmp, KILL cr );
12023 format %{ "FLD $src\t# Convert float to int \n\t"
12024 "FLDCW trunc mode\n\t"
12025 "SUB ESP,4\n\t"
12026 "FISTp [ESP + #0]\n\t"
12027 "FLDCW std/24-bit mode\n\t"
12028 "POP EAX\n\t"
12029 "CMP EAX,0x80000000\n\t"
12033 "fast:" %}
12034 // D2I_encoding works for F2I
12035 ins_encode( Push_Reg_F(src), D2I_encoding(src) );
12036 ins_pipe( pipe_slow );
12037 %}
12038
12039 // Convert a float in xmm to an int reg.
12040 instruct convX2I_reg(eAXRegI dst, eDXRegI tmp, regX src, eFlagsReg cr ) %{
12041 predicate(UseSSE>=1);
12042 match(Set dst (ConvF2I src));
12043 effect( KILL tmp, KILL cr );
12044 format %{ "CVTTSS2SI $dst, $src\n\t"
12045 "CMP $dst,0x80000000\n\t"
12046 "JNE,s fast\n\t"
12047 "SUB ESP, 4\n\t"
12048 "MOVSS [ESP], $src\n\t"
12049 "FLD [ESP]\n\t"
12050 "ADD ESP, 4\n\t"
12051 "CALL d2i_wrapper\n"
12052 "fast:" %}
12053 opcode(0x0); // single-precision conversion
12054 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x2C), FX2I_encoding(src,dst));
12055 ins_pipe( pipe_slow );
12056 %}
12057
12058 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
12059 predicate(UseSSE==0);
12060 match(Set dst (ConvF2L src));
12061 effect( KILL cr );
12062 format %{ "FLD $src\t# Convert float to long\n\t"
12063 "FLDCW trunc mode\n\t"
12064 "SUB ESP,8\n\t"
12065 "FISTp [ESP + #0]\n\t"
12066 "FLDCW std/24-bit mode\n\t"
12067 "POP EAX\n\t"
12068 "POP EDX\n\t"
12069 "CMP EDX,0x80000000\n\t"
12070 "JNE,s fast\n\t"
12071 "TEST EAX,EAX\n\t"
12072 "JNE,s fast\n\t"
12073 "FLD $src\n\t"
12074 "CALL d2l_wrapper\n"
12084 match(Set dst (ConvF2L src));
12085 effect( KILL cr );
12086 format %{ "SUB ESP,8\t# Convert float to long\n\t"
12087 "MOVSS [ESP],$src\n\t"
12088 "FLD_S [ESP]\n\t"
12089 "FLDCW trunc mode\n\t"
12090 "FISTp [ESP + #0]\n\t"
12091 "FLDCW std/24-bit mode\n\t"
12092 "POP EAX\n\t"
12093 "POP EDX\n\t"
12094 "CMP EDX,0x80000000\n\t"
12095 "JNE,s fast\n\t"
12096 "TEST EAX,EAX\n\t"
12097 "JNE,s fast\n\t"
12098 "SUB ESP,4\t# Convert float to long\n\t"
12099 "MOVSS [ESP],$src\n\t"
12100 "FLD_S [ESP]\n\t"
12101 "ADD ESP,4\n\t"
12102 "CALL d2l_wrapper\n"
12103 "fast:" %}
12104 ins_encode( X2L_encoding(src) );
12105 ins_pipe( pipe_slow );
12106 %}
12107
12108 instruct convI2D_reg(regD dst, stackSlotI src) %{
12109 predicate( UseSSE<=1 );
12110 match(Set dst (ConvI2D src));
12111 format %{ "FILD $src\n\t"
12112 "FSTP $dst" %}
12113 opcode(0xDB, 0x0); /* DB /0 */
12114 ins_encode(Push_Mem_I(src), Pop_Reg_D(dst));
12115 ins_pipe( fpu_reg_mem );
12116 %}
12117
12118 instruct convI2XD_reg(regXD dst, eRegI src) %{
12119 predicate( UseSSE>=2 && !UseXmmI2D );
12120 match(Set dst (ConvI2D src));
12121 format %{ "CVTSI2SD $dst,$src" %}
12122 opcode(0xF2, 0x0F, 0x2A);
12123 ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src));
12124 ins_pipe( pipe_slow );
12125 %}
12126
12127 instruct convI2XD_mem(regXD dst, memory mem) %{
12128 predicate( UseSSE>=2 );
12129 match(Set dst (ConvI2D (LoadI mem)));
12130 format %{ "CVTSI2SD $dst,$mem" %}
12131 opcode(0xF2, 0x0F, 0x2A);
12132 ins_encode( OpcP, OpcS, Opcode(tertiary), RegMem(dst, mem));
12133 ins_pipe( pipe_slow );
12134 %}
12135
12136 instruct convXI2XD_reg(regXD dst, eRegI src)
12137 %{
12138 predicate( UseSSE>=2 && UseXmmI2D );
12139 match(Set dst (ConvI2D src));
12140
12141 format %{ "MOVD $dst,$src\n\t"
12142 "CVTDQ2PD $dst,$dst\t# i2d" %}
12143 ins_encode %{
12144 __ movdl($dst$$XMMRegister, $src$$Register);
12145 __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
12146 %}
12147 ins_pipe(pipe_slow); // XXX
12148 %}
12149
12150 instruct convI2D_mem(regD dst, memory mem) %{
12151 predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
12152 match(Set dst (ConvI2D (LoadI mem)));
12208 ins_pipe( fpu_reg_mem );
12209 %}
12210
12211 // This instruction does not round to 24-bits
12212 instruct convI2F_mem(regF dst, memory mem) %{
12213 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
12214 match(Set dst (ConvI2F (LoadI mem)));
12215 format %{ "FILD $mem\n\t"
12216 "FSTP $dst" %}
12217 opcode(0xDB); /* DB /0 */
12218 ins_encode( OpcP, RMopc_Mem(0x00,mem),
12219 Pop_Reg_F(dst));
12220 ins_pipe( fpu_reg_mem );
12221 %}
12222
12223 // Convert an int to a float in xmm; no rounding step needed.
12224 instruct convI2X_reg(regX dst, eRegI src) %{
12225 predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F );
12226 match(Set dst (ConvI2F src));
12227 format %{ "CVTSI2SS $dst, $src" %}
12228
12229 opcode(0xF3, 0x0F, 0x2A); /* F3 0F 2A /r */
12230 ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src));
12231 ins_pipe( pipe_slow );
12232 %}
12233
12234 instruct convXI2X_reg(regX dst, eRegI src)
12235 %{
12236 predicate( UseSSE>=2 && UseXmmI2F );
12237 match(Set dst (ConvI2F src));
12238
12239 format %{ "MOVD $dst,$src\n\t"
12240 "CVTDQ2PS $dst,$dst\t# i2f" %}
12241 ins_encode %{
12242 __ movdl($dst$$XMMRegister, $src$$Register);
12243 __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
12244 %}
12245 ins_pipe(pipe_slow); // XXX
12246 %}
12247
12248 instruct convI2L_reg( eRegL dst, eRegI src, eFlagsReg cr) %{
12249 match(Set dst (ConvI2L src));
12250 effect(KILL cr);
12334 "FSTP_S $dst\t# F-round" %}
12335 opcode(0xDF, 0x5); /* DF /5 */
12336 ins_encode(convert_long_double(src), Pop_Mem_F(dst));
12337 ins_pipe( pipe_slow );
12338 %}
12339
12340 instruct convL2I_reg( eRegI dst, eRegL src ) %{
12341 match(Set dst (ConvL2I src));
12342 effect( DEF dst, USE src );
12343 format %{ "MOV $dst,$src.lo" %}
12344 ins_encode(enc_CopyL_Lo(dst,src));
12345 ins_pipe( ialu_reg_reg );
12346 %}
12347
12348
12349 instruct MoveF2I_stack_reg(eRegI dst, stackSlotF src) %{
12350 match(Set dst (MoveF2I src));
12351 effect( DEF dst, USE src );
12352 ins_cost(100);
12353 format %{ "MOV $dst,$src\t# MoveF2I_stack_reg" %}
12354 opcode(0x8B);
12355 ins_encode( OpcP, RegMem(dst,src));
12356 ins_pipe( ialu_reg_mem );
12357 %}
12358
12359 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
12360 predicate(UseSSE==0);
12361 match(Set dst (MoveF2I src));
12362 effect( DEF dst, USE src );
12363
12364 ins_cost(125);
12365 format %{ "FST_S $dst,$src\t# MoveF2I_reg_stack" %}
12366 ins_encode( Pop_Mem_Reg_F(dst, src) );
12367 ins_pipe( fpu_mem_reg );
12368 %}
12369
12370 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regX src) %{
12371 predicate(UseSSE>=1);
12372 match(Set dst (MoveF2I src));
12373 effect( DEF dst, USE src );
12374
12375 ins_cost(95);
12376 format %{ "MOVSS $dst,$src\t# MoveF2I_reg_stack_sse" %}
12377 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x11), RegMem(src, dst));
12378 ins_pipe( pipe_slow );
12379 %}
12380
12381 instruct MoveF2I_reg_reg_sse(eRegI dst, regX src) %{
12382 predicate(UseSSE>=2);
12383 match(Set dst (MoveF2I src));
12384 effect( DEF dst, USE src );
12385 ins_cost(85);
12386 format %{ "MOVD $dst,$src\t# MoveF2I_reg_reg_sse" %}
12387 ins_encode( MovX2I_reg(dst, src));
12388 ins_pipe( pipe_slow );
12389 %}
12390
12391 instruct MoveI2F_reg_stack(stackSlotF dst, eRegI src) %{
12392 match(Set dst (MoveI2F src));
12393 effect( DEF dst, USE src );
12394
12395 ins_cost(100);
12396 format %{ "MOV $dst,$src\t# MoveI2F_reg_stack" %}
12397 opcode(0x89);
12398 ins_encode( OpcPRegSS( dst, src ) );
12399 ins_pipe( ialu_mem_reg );
12400 %}
12401
12402
12403 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
12404 predicate(UseSSE==0);
12405 match(Set dst (MoveI2F src));
12406 effect(DEF dst, USE src);
12407
12408 ins_cost(125);
12409 format %{ "FLD_S $src\n\t"
12410 "FSTP $dst\t# MoveI2F_stack_reg" %}
12411 opcode(0xD9); /* D9 /0, FLD m32real */
12412 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
12413 Pop_Reg_F(dst) );
12414 ins_pipe( fpu_reg_mem );
12415 %}
12416
12417 instruct MoveI2F_stack_reg_sse(regX dst, stackSlotI src) %{
12418 predicate(UseSSE>=1);
12419 match(Set dst (MoveI2F src));
12420 effect( DEF dst, USE src );
12421
12422 ins_cost(95);
12423 format %{ "MOVSS $dst,$src\t# MoveI2F_stack_reg_sse" %}
12424 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x10), RegMem(dst,src));
12425 ins_pipe( pipe_slow );
12426 %}
12427
12428 instruct MoveI2F_reg_reg_sse(regX dst, eRegI src) %{
12429 predicate(UseSSE>=2);
12430 match(Set dst (MoveI2F src));
12431 effect( DEF dst, USE src );
12432
12433 ins_cost(85);
12434 format %{ "MOVD $dst,$src\t# MoveI2F_reg_reg_sse" %}
12435 ins_encode( MovI2X_reg(dst, src) );
12436 ins_pipe( pipe_slow );
12437 %}
12438
12439 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{
12440 match(Set dst (MoveD2L src));
12441 effect(DEF dst, USE src);
12442
12443 ins_cost(250);
12444 format %{ "MOV $dst.lo,$src\n\t"
12445 "MOV $dst.hi,$src+4\t# MoveD2L_stack_reg" %}
12446 opcode(0x8B, 0x8B);
12447 ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src));
12448 ins_pipe( ialu_mem_long_reg );
12449 %}
12450
12451 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
12452 predicate(UseSSE<=1);
12453 match(Set dst (MoveD2L src));
12454 effect(DEF dst, USE src);
12455
12456 ins_cost(125);
12457 format %{ "FST_D $dst,$src\t# MoveD2L_reg_stack" %}
12458 ins_encode( Pop_Mem_Reg_D(dst, src) );
12459 ins_pipe( fpu_mem_reg );
12460 %}
12461
12462 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regXD src) %{
12463 predicate(UseSSE>=2);
12464 match(Set dst (MoveD2L src));
12465 effect(DEF dst, USE src);
12466 ins_cost(95);
12467
12468 format %{ "MOVSD $dst,$src\t# MoveD2L_reg_stack_sse" %}
12469 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x11), RegMem(src,dst));
12470 ins_pipe( pipe_slow );
12471 %}
12472
12473 instruct MoveD2L_reg_reg_sse(eRegL dst, regXD src, regXD tmp) %{
12474 predicate(UseSSE>=2);
12475 match(Set dst (MoveD2L src));
12476 effect(DEF dst, USE src, TEMP tmp);
12477 ins_cost(85);
12478 format %{ "MOVD $dst.lo,$src\n\t"
12479 "PSHUFLW $tmp,$src,0x4E\n\t"
12480 "MOVD $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %}
12481 ins_encode( MovXD2L_reg(dst, src, tmp) );
12482 ins_pipe( pipe_slow );
12483 %}
12484
12485 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{
12486 match(Set dst (MoveL2D src));
12487 effect(DEF dst, USE src);
12488
12489 ins_cost(200);
12490 format %{ "MOV $dst,$src.lo\n\t"
12491 "MOV $dst+4,$src.hi\t# MoveL2D_reg_stack" %}
12492 opcode(0x89, 0x89);
12493 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
12494 ins_pipe( ialu_mem_long_reg );
12495 %}
12496
12497
12498 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
12499 predicate(UseSSE<=1);
12500 match(Set dst (MoveL2D src));
12501 effect(DEF dst, USE src);
12502 ins_cost(125);
12503
12504 format %{ "FLD_D $src\n\t"
12505 "FSTP $dst\t# MoveL2D_stack_reg" %}
12506 opcode(0xDD); /* DD /0, FLD m64real */
12507 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
12508 Pop_Reg_D(dst) );
12509 ins_pipe( fpu_reg_mem );
12510 %}
12511
12512
12513 instruct MoveL2D_stack_reg_sse(regXD dst, stackSlotL src) %{
12514 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
12515 match(Set dst (MoveL2D src));
12516 effect(DEF dst, USE src);
12517
12518 ins_cost(95);
12519 format %{ "MOVSD $dst,$src\t# MoveL2D_stack_reg_sse" %}
12520 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x10), RegMem(dst,src));
12521 ins_pipe( pipe_slow );
12522 %}
12523
12524 instruct MoveL2D_stack_reg_sse_partial(regXD dst, stackSlotL src) %{
12525 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
12526 match(Set dst (MoveL2D src));
12527 effect(DEF dst, USE src);
12528
12529 ins_cost(95);
12530 format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %}
12531 ins_encode( Opcode(0x66), Opcode(0x0F), Opcode(0x12), RegMem(dst,src));
12532 ins_pipe( pipe_slow );
12533 %}
12534
12535 instruct MoveL2D_reg_reg_sse(regXD dst, eRegL src, regXD tmp) %{
12536 predicate(UseSSE>=2);
12537 match(Set dst (MoveL2D src));
12538 effect(TEMP dst, USE src, TEMP tmp);
12539 ins_cost(85);
12540 format %{ "MOVD $dst,$src.lo\n\t"
12541 "MOVD $tmp,$src.hi\n\t"
12542 "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %}
12543 ins_encode( MovL2XD_reg(dst, src, tmp) );
12544 ins_pipe( pipe_slow );
12545 %}
12546
12547 // Replicate scalar to packed byte (1 byte) values in xmm
12548 instruct Repl8B_reg(regXD dst, regXD src) %{
12549 predicate(UseSSE>=2);
12550 match(Set dst (Replicate8B src));
12551 format %{ "MOVDQA $dst,$src\n\t"
12552 "PUNPCKLBW $dst,$dst\n\t"
12553 "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
12554 ins_encode( pshufd_8x8(dst, src));
12555 ins_pipe( pipe_slow );
12556 %}
12557
12558 // Replicate scalar to packed byte (1 byte) values in xmm
12559 instruct Repl8B_eRegI(regXD dst, eRegI src) %{
12560 predicate(UseSSE>=2);
12561 match(Set dst (Replicate8B src));
12562 format %{ "MOVD $dst,$src\n\t"
12563 "PUNPCKLBW $dst,$dst\n\t"
12564 "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
12565 ins_encode( mov_i2x(dst, src), pshufd_8x8(dst, dst));
12566 ins_pipe( pipe_slow );
12567 %}
12568
12569 // Replicate scalar zero to packed byte (1 byte) values in xmm
12570 instruct Repl8B_immI0(regXD dst, immI0 zero) %{
12571 predicate(UseSSE>=2);
12572 match(Set dst (Replicate8B zero));
12573 format %{ "PXOR $dst,$dst\t! replicate8B" %}
12574 ins_encode( pxor(dst, dst));
12575 ins_pipe( fpu_reg_reg );
12576 %}
12577
12578 // Replicate scalar to packed shore (2 byte) values in xmm
12579 instruct Repl4S_reg(regXD dst, regXD src) %{
12580 predicate(UseSSE>=2);
12581 match(Set dst (Replicate4S src));
12582 format %{ "PSHUFLW $dst,$src,0x00\t! replicate4S" %}
12583 ins_encode( pshufd_4x16(dst, src));
12584 ins_pipe( fpu_reg_reg );
12585 %}
12586
12587 // Replicate scalar to packed shore (2 byte) values in xmm
12588 instruct Repl4S_eRegI(regXD dst, eRegI src) %{
12589 predicate(UseSSE>=2);
12590 match(Set dst (Replicate4S src));
12591 format %{ "MOVD $dst,$src\n\t"
12592 "PSHUFLW $dst,$dst,0x00\t! replicate4S" %}
12593 ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst));
12594 ins_pipe( fpu_reg_reg );
12595 %}
12596
12597 // Replicate scalar zero to packed short (2 byte) values in xmm
12598 instruct Repl4S_immI0(regXD dst, immI0 zero) %{
12599 predicate(UseSSE>=2);
12600 match(Set dst (Replicate4S zero));
12601 format %{ "PXOR $dst,$dst\t! replicate4S" %}
12602 ins_encode( pxor(dst, dst));
12603 ins_pipe( fpu_reg_reg );
12604 %}
12605
12606 // Replicate scalar to packed char (2 byte) values in xmm
12607 instruct Repl4C_reg(regXD dst, regXD src) %{
12608 predicate(UseSSE>=2);
12609 match(Set dst (Replicate4C src));
12610 format %{ "PSHUFLW $dst,$src,0x00\t! replicate4C" %}
12611 ins_encode( pshufd_4x16(dst, src));
12612 ins_pipe( fpu_reg_reg );
12613 %}
12614
12615 // Replicate scalar to packed char (2 byte) values in xmm
12616 instruct Repl4C_eRegI(regXD dst, eRegI src) %{
12617 predicate(UseSSE>=2);
12618 match(Set dst (Replicate4C src));
12619 format %{ "MOVD $dst,$src\n\t"
12620 "PSHUFLW $dst,$dst,0x00\t! replicate4C" %}
12621 ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst));
12622 ins_pipe( fpu_reg_reg );
12623 %}
12624
12625 // Replicate scalar zero to packed char (2 byte) values in xmm
12626 instruct Repl4C_immI0(regXD dst, immI0 zero) %{
12627 predicate(UseSSE>=2);
12628 match(Set dst (Replicate4C zero));
12629 format %{ "PXOR $dst,$dst\t! replicate4C" %}
12630 ins_encode( pxor(dst, dst));
12631 ins_pipe( fpu_reg_reg );
12632 %}
12633
12634 // Replicate scalar to packed integer (4 byte) values in xmm
12635 instruct Repl2I_reg(regXD dst, regXD src) %{
12636 predicate(UseSSE>=2);
12637 match(Set dst (Replicate2I src));
12638 format %{ "PSHUFD $dst,$src,0x00\t! replicate2I" %}
12639 ins_encode( pshufd(dst, src, 0x00));
12640 ins_pipe( fpu_reg_reg );
12641 %}
12642
12643 // Replicate scalar to packed integer (4 byte) values in xmm
12644 instruct Repl2I_eRegI(regXD dst, eRegI src) %{
12645 predicate(UseSSE>=2);
12646 match(Set dst (Replicate2I src));
12647 format %{ "MOVD $dst,$src\n\t"
12648 "PSHUFD $dst,$dst,0x00\t! replicate2I" %}
12649 ins_encode( mov_i2x(dst, src), pshufd(dst, dst, 0x00));
12650 ins_pipe( fpu_reg_reg );
12651 %}
12652
12653 // Replicate scalar zero to packed integer (2 byte) values in xmm
12654 instruct Repl2I_immI0(regXD dst, immI0 zero) %{
12655 predicate(UseSSE>=2);
12656 match(Set dst (Replicate2I zero));
12657 format %{ "PXOR $dst,$dst\t! replicate2I" %}
12658 ins_encode( pxor(dst, dst));
12659 ins_pipe( fpu_reg_reg );
12660 %}
12661
12662 // Replicate scalar to packed single precision floating point values in xmm
12663 instruct Repl2F_reg(regXD dst, regXD src) %{
12664 predicate(UseSSE>=2);
12665 match(Set dst (Replicate2F src));
12666 format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
12667 ins_encode( pshufd(dst, src, 0xe0));
12668 ins_pipe( fpu_reg_reg );
12669 %}
12670
12671 // Replicate scalar to packed single precision floating point values in xmm
12672 instruct Repl2F_regX(regXD dst, regX src) %{
12673 predicate(UseSSE>=2);
12674 match(Set dst (Replicate2F src));
12675 format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
12676 ins_encode( pshufd(dst, src, 0xe0));
12677 ins_pipe( fpu_reg_reg );
12678 %}
12679
12680 // Replicate scalar to packed single precision floating point values in xmm
12681 instruct Repl2F_immXF0(regXD dst, immXF0 zero) %{
12682 predicate(UseSSE>=2);
12683 match(Set dst (Replicate2F zero));
12684 format %{ "PXOR $dst,$dst\t! replicate2F" %}
12685 ins_encode( pxor(dst, dst));
12686 ins_pipe( fpu_reg_reg );
12687 %}
12688
12689 // =======================================================================
12690 // fast clearing of an array
12691 instruct rep_stos(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
12692 match(Set dummy (ClearArray cnt base));
12693 effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
12694 format %{ "SHL ECX,1\t# Convert doublewords to words\n\t"
12695 "XOR EAX,EAX\n\t"
12696 "REP STOS\t# store EAX into [EDI++] while ECX--" %}
12697 opcode(0,0x4);
12698 ins_encode( Opcode(0xD1), RegOpc(ECX),
12699 OpcRegReg(0x33,EAX,EAX),
12700 Opcode(0xF3), Opcode(0xAB) );
12701 ins_pipe( pipe_slow );
12702 %}
12703
12704 instruct string_compare(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
12705 eAXRegI result, regXD tmp1, eFlagsReg cr) %{
|
264 return operand;
265 }
266
267 // Buffer for 128-bits masks used by SSE instructions.
268 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment)
269
270 // Static initialization during VM startup.
271 static jlong *float_signmask_pool = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF));
272 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF));
273 static jlong *float_signflip_pool = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000));
274 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000));
275
276 // Offset hacking within calls.
277 static int pre_call_FPU_size() {
278 if (Compile::current()->in_24_bit_fp_mode())
279 return 6; // fldcw
280 return 0;
281 }
282
283 static int preserve_SP_size() {
284 return 2; // op, rm(reg/reg)
285 }
286
287 // !!!!! Special hack to get all type of calls to specify the byte offset
288 // from the start of the call to the point where the return address
289 // will point.
290 int MachCallStaticJavaNode::ret_addr_offset() {
291 int offset = 5 + pre_call_FPU_size(); // 5 bytes from start of call to where return address points
292 if (_method_handle_invoke)
293 offset += preserve_SP_size();
294 return offset;
295 }
296
297 int MachCallDynamicJavaNode::ret_addr_offset() {
298 return 10 + pre_call_FPU_size(); // 10 bytes from start of call to where return address points
299 }
300
301 static int sizeof_FFree_Float_Stack_All = -1;
302
303 int MachCallRuntimeNode::ret_addr_offset() {
304 assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already");
478 if ( displace_is_oop ) {
479 emit_d32_reloc(cbuf, displace, relocInfo::oop_type, 1);
480 } else {
481 emit_d32 (cbuf, displace);
482 }
483 }
484 }
485 }
486 }
487
488
489 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
490 if( dst_encoding == src_encoding ) {
491 // reg-reg copy, use an empty encoding
492 } else {
493 emit_opcode( cbuf, 0x8B );
494 emit_rm(cbuf, 0x3, dst_encoding, src_encoding );
495 }
496 }
497
498 void emit_cmpfp_fixup(MacroAssembler& _masm) {
499 Label exit;
500 __ jccb(Assembler::noParity, exit);
501 __ pushf();
502 //
503 // comiss/ucomiss instructions set ZF,PF,CF flags and
504 // zero OF,AF,SF for NaN values.
505 // Fixup flags by zeroing ZF,PF so that compare of NaN
506 // values returns 'less than' result (CF is set).
507 // Leave the rest of flags unchanged.
508 //
509 // 7 6 5 4 3 2 1 0
510 // |S|Z|r|A|r|P|r|C| (r - reserved bit)
511 // 0 0 1 0 1 0 1 1 (0x2B)
512 //
513 __ andl(Address(rsp, 0), 0xffffff2b);
514 __ popf();
515 __ bind(exit);
516 }
517
518 void emit_cmpfp3(MacroAssembler& _masm, Register dst) {
519 Label done;
520 __ movl(dst, -1);
521 __ jcc(Assembler::parity, done);
522 __ jcc(Assembler::below, done);
523 __ setb(Assembler::notEqual, dst);
524 __ movzbl(dst, dst);
525 __ bind(done);
526 }
527
528
529 //=============================================================================
530 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
531
532 int Compile::ConstantTable::calculate_table_base_offset() const {
533 return 0; // absolute addressing, no offset
534 }
535
536 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
537 // Empty encoding
538 }
539
540 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
541 return 0;
542 }
543
544 #ifndef PRODUCT
545 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
795 emit_opcode (*cbuf, opcode );
796 encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, false);
797 #ifndef PRODUCT
798 } else if( !do_size ) {
799 if( size != 0 ) st->print("\n\t");
800 if( opcode == 0x8B || opcode == 0x89 ) { // MOV
801 if( is_load ) st->print("%s %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset);
802 else st->print("%s [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]);
803 } else { // FLD, FST, PUSH, POP
804 st->print("%s [ESP + #%d]",op_str,offset);
805 }
806 #endif
807 }
808 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
809 return size+3+offset_size;
810 }
811
812 // Helper for XMM registers. Extra opcode bits, limited syntax.
813 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load,
814 int offset, int reg_lo, int reg_hi, int size, outputStream* st ) {
815 if (cbuf) {
816 MacroAssembler _masm(cbuf);
817 if (reg_lo+1 == reg_hi) { // double move?
818 if (is_load) {
819 __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
820 } else {
821 __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
822 }
823 } else {
824 if (is_load) {
825 __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
826 } else {
827 __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
828 }
829 }
830 #ifndef PRODUCT
831 } else if (!do_size) {
832 if (size != 0) st->print("\n\t");
833 if (reg_lo+1 == reg_hi) { // double move?
834 if (is_load) st->print("%s %s,[ESP + #%d]",
835 UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD",
836 Matcher::regName[reg_lo], offset);
837 else st->print("MOVSD [ESP + #%d],%s",
838 offset, Matcher::regName[reg_lo]);
839 } else {
840 if (is_load) st->print("MOVSS %s,[ESP + #%d]",
841 Matcher::regName[reg_lo], offset);
842 else st->print("MOVSS [ESP + #%d],%s",
843 offset, Matcher::regName[reg_lo]);
844 }
845 #endif
846 }
847 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
848 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes.
849 return size+5+offset_size;
850 }
851
852
853 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
854 int src_hi, int dst_hi, int size, outputStream* st ) {
855 if (cbuf) {
856 MacroAssembler _masm(cbuf);
857 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
858 __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
859 as_XMMRegister(Matcher::_regEncode[src_lo]));
860 } else {
861 __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]),
862 as_XMMRegister(Matcher::_regEncode[src_lo]));
863 }
864 #ifndef PRODUCT
865 } else if (!do_size) {
866 if (size != 0) st->print("\n\t");
867 if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers
868 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
869 st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
870 } else {
871 st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
872 }
873 } else {
874 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
875 st->print("MOVSD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
876 } else {
877 st->print("MOVSS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
878 }
879 }
880 #endif
881 }
882 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes.
883 // Only MOVAPS SSE prefix uses 1 byte.
884 int sz = 4;
885 if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) &&
886 UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3;
887 return size + sz;
888 }
889
890 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
891 int src_hi, int dst_hi, int size, outputStream* st ) {
892 // 32-bit
893 if (cbuf) {
894 MacroAssembler _masm(cbuf);
895 __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
896 as_Register(Matcher::_regEncode[src_lo]));
897 #ifndef PRODUCT
898 } else if (!do_size) {
899 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
900 #endif
901 }
902 return 4;
903 }
904
905
906 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
907 int src_hi, int dst_hi, int size, outputStream* st ) {
908 // 32-bit
909 if (cbuf) {
910 MacroAssembler _masm(cbuf);
911 __ movdl(as_Register(Matcher::_regEncode[dst_lo]),
912 as_XMMRegister(Matcher::_regEncode[src_lo]));
913 #ifndef PRODUCT
914 } else if (!do_size) {
915 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
916 #endif
917 }
918 return 4;
919 }
920
921 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) {
922 if( cbuf ) {
923 emit_opcode(*cbuf, 0x8B );
924 emit_rm (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] );
925 #ifndef PRODUCT
926 } else if( !do_size ) {
927 if( size != 0 ) st->print("\n\t");
928 st->print("MOV %s,%s",Matcher::regName[dst],Matcher::regName[src]);
929 #endif
930 }
931 return size+2;
932 }
1929 // CALL to fixup routine. Fixup routine uses ScopeDesc info to determine
1930 // who we intended to call.
1931 cbuf.set_insts_mark();
1932 $$$emit8$primary;
1933 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1934 virtual_call_Relocation::spec(virtual_call_oop_addr), RELOC_IMM32 );
1935 %}
1936
1937 enc_class Java_Compiled_Call (method meth) %{ // JAVA COMPILED CALL
1938 int disp = in_bytes(methodOopDesc::from_compiled_offset());
1939 assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small");
1940
1941 // CALL *[EAX+in_bytes(methodOopDesc::from_compiled_code_entry_point_offset())]
1942 cbuf.set_insts_mark();
1943 $$$emit8$primary;
1944 emit_rm(cbuf, 0x01, $secondary, EAX_enc ); // R/M byte
1945 emit_d8(cbuf, disp); // Displacement
1946
1947 %}
1948
1949 // Following encoding is no longer used, but may be restored if calling
1950 // convention changes significantly.
1951 // Became: Xor_Reg(EBP), Java_To_Runtime( labl )
1952 //
1953 // enc_class Java_Interpreter_Call (label labl) %{ // JAVA INTERPRETER CALL
1954 // // int ic_reg = Matcher::inline_cache_reg();
1955 // // int ic_encode = Matcher::_regEncode[ic_reg];
1956 // // int imo_reg = Matcher::interpreter_method_oop_reg();
1957 // // int imo_encode = Matcher::_regEncode[imo_reg];
1958 //
1959 // // // Interpreter expects method_oop in EBX, currently a callee-saved register,
1960 // // // so we load it immediately before the call
1961 // // emit_opcode(cbuf, 0x8B); // MOV imo_reg,ic_reg # method_oop
1962 // // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte
1963 //
1964 // // xor rbp,ebp
1965 // emit_opcode(cbuf, 0x33);
1966 // emit_rm(cbuf, 0x3, EBP_enc, EBP_enc);
1967 //
1968 // // CALL to interpreter.
2006 emit_d32(cbuf, src_con);
2007 }
2008 %}
2009
2010 enc_class LdImmL_Hi( eRegL dst, immL src) %{ // Load Immediate
2011 // Load immediate does not have a zero or sign extended version
2012 // for 8-bit immediates
2013 int dst_enc = $dst$$reg + 2;
2014 int src_con = ((julong)($src$$constant)) >> 32;
2015 if (src_con == 0) {
2016 // xor dst, dst
2017 emit_opcode(cbuf, 0x33);
2018 emit_rm(cbuf, 0x3, dst_enc, dst_enc);
2019 } else {
2020 emit_opcode(cbuf, $primary + dst_enc);
2021 emit_d32(cbuf, src_con);
2022 }
2023 %}
2024
2025
2026 // Encode a reg-reg copy. If it is useless, then empty encoding.
2027 enc_class enc_Copy( eRegI dst, eRegI src ) %{
2028 encode_Copy( cbuf, $dst$$reg, $src$$reg );
2029 %}
2030
2031 enc_class enc_CopyL_Lo( eRegI dst, eRegL src ) %{
2032 encode_Copy( cbuf, $dst$$reg, $src$$reg );
2033 %}
2034
2035 enc_class RegReg (eRegI dst, eRegI src) %{ // RegReg(Many)
2036 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2037 %}
2038
2039 enc_class RegReg_Lo(eRegL dst, eRegL src) %{ // RegReg(Many)
2040 $$$emit8$primary;
2041 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2042 %}
2043
2044 enc_class RegReg_Hi(eRegL dst, eRegL src) %{ // RegReg(Many)
2045 $$$emit8$secondary;
2046 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2047 %}
2048
2049 enc_class RegReg_Lo2(eRegL dst, eRegL src) %{ // RegReg(Many)
2050 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2051 %}
2052
2053 enc_class RegReg_Hi2(eRegL dst, eRegL src) %{ // RegReg(Many)
2054 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2564
2565
2566 enc_class Push_Reg_Mod_D( regD dst, regD src) %{
2567 // load dst in FPR0
2568 emit_opcode( cbuf, 0xD9 );
2569 emit_d8( cbuf, 0xC0-1+$dst$$reg );
2570 if ($src$$reg != FPR1L_enc) {
2571 // fincstp
2572 emit_opcode (cbuf, 0xD9);
2573 emit_opcode (cbuf, 0xF7);
2574 // swap src with FPR1:
2575 // FXCH FPR1 with src
2576 emit_opcode(cbuf, 0xD9);
2577 emit_d8(cbuf, 0xC8-1+$src$$reg );
2578 // fdecstp
2579 emit_opcode (cbuf, 0xD9);
2580 emit_opcode (cbuf, 0xF6);
2581 }
2582 %}
2583
2584 enc_class Push_ModD_encoding(regXD src0, regXD src1) %{
2585 MacroAssembler _masm(&cbuf);
2586 __ subptr(rsp, 8);
2587 __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
2588 __ fld_d(Address(rsp, 0));
2589 __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
2590 __ fld_d(Address(rsp, 0));
2591 %}
2592
2593 enc_class Push_ModX_encoding(regX src0, regX src1) %{
2594 MacroAssembler _masm(&cbuf);
2595 __ subptr(rsp, 4);
2596 __ movflt(Address(rsp, 0), $src1$$XMMRegister);
2597 __ fld_s(Address(rsp, 0));
2598 __ movflt(Address(rsp, 0), $src0$$XMMRegister);
2599 __ fld_s(Address(rsp, 0));
2600 %}
2601
2602 enc_class Push_ResultXD(regXD dst) %{
2603 MacroAssembler _masm(&cbuf);
2604 __ fstp_d(Address(rsp, 0));
2605 __ movdbl($dst$$XMMRegister, Address(rsp, 0));
2606 __ addptr(rsp, 8);
2607 %}
2608
2609 enc_class Push_ResultX(regX dst, immI d8) %{
2610 MacroAssembler _masm(&cbuf);
2611 __ fstp_s(Address(rsp, 0));
2612 __ movflt($dst$$XMMRegister, Address(rsp, 0));
2613 __ addptr(rsp, $d8$$constant);
2614 %}
2615
2616 enc_class Push_SrcXD(regXD src) %{
2617 MacroAssembler _masm(&cbuf);
2618 __ subptr(rsp, 8);
2619 __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2620 __ fld_d(Address(rsp, 0));
2621 %}
2622
2623 enc_class push_stack_temp_qword() %{
2624 MacroAssembler _masm(&cbuf);
2625 __ subptr(rsp, 8);
2626 %}
2627
2628 enc_class pop_stack_temp_qword() %{
2629 MacroAssembler _masm(&cbuf);
2630 __ addptr(rsp, 8);
2631 %}
2632
2633 enc_class push_xmm_to_fpr1(regXD src) %{
2634 MacroAssembler _masm(&cbuf);
2635 __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2636 __ fld_d(Address(rsp, 0));
2637 %}
2638
2639 // Compute X^Y using Intel's fast hardware instructions, if possible.
2640 // Otherwise return a NaN.
2641 enc_class pow_exp_core_encoding %{
2642 // FPR1 holds Y*ln2(X). Compute FPR1 = 2^(Y*ln2(X))
2643 emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xC0); // fdup = fld st(0) Q Q
2644 emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xFC); // frndint int(Q) Q
2645 emit_opcode(cbuf,0xDC); emit_opcode(cbuf,0xE9); // fsub st(1) -= st(0); int(Q) frac(Q)
2646 emit_opcode(cbuf,0xDB); // FISTP [ESP] frac(Q)
2647 emit_opcode(cbuf,0x1C);
2648 emit_d8(cbuf,0x24);
2649 emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xF0); // f2xm1 2^frac(Q)-1
2650 emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xE8); // fld1 1 2^frac(Q)-1
2651 emit_opcode(cbuf,0xDE); emit_opcode(cbuf,0xC1); // faddp 2^frac(Q)
2652 emit_opcode(cbuf,0x8B); // mov rax,[esp+0]=int(Q)
2653 encode_RegMem(cbuf, EAX_enc, ESP_enc, 0x4, 0, 0, false);
2654 emit_opcode(cbuf,0xC7); // mov rcx,0xFFFFF800 - overflow mask
2655 emit_rm(cbuf, 0x3, 0x0, ECX_enc);
2656 emit_d32(cbuf,0xFFFFF800);
2795 emit_opcode( cbuf, 0x7A );
2796 emit_d8 ( cbuf, 0x13 );
2797 // movl(dst, less_result);
2798 emit_opcode( cbuf, 0xB8 + $dst$$reg);
2799 emit_d32( cbuf, -1 );
2800 // jcc(Assembler::below, exit);
2801 emit_opcode( cbuf, 0x72 );
2802 emit_d8 ( cbuf, 0x0C );
2803 // movl(dst, equal_result);
2804 emit_opcode( cbuf, 0xB8 + $dst$$reg);
2805 emit_d32( cbuf, 0 );
2806 // jcc(Assembler::equal, exit);
2807 emit_opcode( cbuf, 0x74 );
2808 emit_d8 ( cbuf, 0x05 );
2809 // movl(dst, greater_result);
2810 emit_opcode( cbuf, 0xB8 + $dst$$reg);
2811 emit_d32( cbuf, 1 );
2812 %}
2813
2814
2815 // Compare the longs and set flags
2816 // BROKEN! Do Not use as-is
2817 enc_class cmpl_test( eRegL src1, eRegL src2 ) %{
2818 // CMP $src1.hi,$src2.hi
2819 emit_opcode( cbuf, 0x3B );
2820 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
2821 // JNE,s done
2822 emit_opcode(cbuf,0x75);
2823 emit_d8(cbuf, 2 );
2824 // CMP $src1.lo,$src2.lo
2825 emit_opcode( cbuf, 0x3B );
2826 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2827 // done:
2828 %}
2829
2830 enc_class convert_int_long( regL dst, eRegI src ) %{
2831 // mov $dst.lo,$src
2832 int dst_encoding = $dst$$reg;
2833 int src_encoding = $src$$reg;
2834 encode_Copy( cbuf, dst_encoding , src_encoding );
3017 emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg);
3018 // CMP $tmp,$src.lo
3019 emit_opcode( cbuf, 0x3B );
3020 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg );
3021 // SBB $tmp,$src.hi
3022 emit_opcode( cbuf, 0x1B );
3023 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) );
3024 %}
3025
3026 // Sniff, sniff... smells like Gnu Superoptimizer
3027 enc_class neg_long( eRegL dst ) %{
3028 emit_opcode(cbuf,0xF7); // NEG hi
3029 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
3030 emit_opcode(cbuf,0xF7); // NEG lo
3031 emit_rm (cbuf,0x3, 0x3, $dst$$reg );
3032 emit_opcode(cbuf,0x83); // SBB hi,0
3033 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
3034 emit_d8 (cbuf,0 );
3035 %}
3036
3037
3038 // Because the transitions from emitted code to the runtime
3039 // monitorenter/exit helper stubs are so slow it's critical that
3040 // we inline both the stack-locking fast-path and the inflated fast path.
3041 //
3042 // See also: cmpFastLock and cmpFastUnlock.
3043 //
3044 // What follows is a specialized inline transliteration of the code
3045 // in slow_enter() and slow_exit(). If we're concerned about I$ bloat
3046 // another option would be to emit TrySlowEnter and TrySlowExit methods
3047 // at startup-time. These methods would accept arguments as
3048 // (rax,=Obj, rbx=Self, rcx=box, rdx=Scratch) and return success-failure
3049 // indications in the icc.ZFlag. Fast_Lock and Fast_Unlock would simply
3050 // marshal the arguments and emit calls to TrySlowEnter and TrySlowExit.
3051 // In practice, however, the # of lock sites is bounded and is usually small.
3052 // Besides the call overhead, TrySlowEnter and TrySlowExit might suffer
3053 // if the processor uses simple bimodal branch predictors keyed by EIP
3054 // Since the helper routines would be called from multiple synchronization
3055 // sites.
3056 //
3057 // An even better approach would be write "MonitorEnter()" and "MonitorExit()"
3655 emit_opcode(cbuf,0x5A); // POP EDX
3656 emit_opcode(cbuf,0x81); // CMP EDX,imm
3657 emit_d8 (cbuf,0xFA); // rdx
3658 emit_d32 (cbuf,0x80000000); // 0x80000000
3659 emit_opcode(cbuf,0x75); // JNE around_slow_call
3660 emit_d8 (cbuf,0x07+4); // Size of slow_call
3661 emit_opcode(cbuf,0x85); // TEST EAX,EAX
3662 emit_opcode(cbuf,0xC0); // 2/rax,/rax,
3663 emit_opcode(cbuf,0x75); // JNE around_slow_call
3664 emit_d8 (cbuf,0x07); // Size of slow_call
3665 // Push src onto stack slow-path
3666 emit_opcode(cbuf,0xD9 ); // FLD ST(i)
3667 emit_d8 (cbuf,0xC0-1+$src$$reg );
3668 // CALL directly to the runtime
3669 cbuf.set_insts_mark();
3670 emit_opcode(cbuf,0xE8); // Call into runtime
3671 emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3672 // Carry on here...
3673 %}
3674
3675 enc_class FMul_ST_reg( eRegF src1 ) %{
3676 // Operand was loaded from memory into fp ST (stack top)
3677 // FMUL ST,$src /* D8 C8+i */
3678 emit_opcode(cbuf, 0xD8);
3679 emit_opcode(cbuf, 0xC8 + $src1$$reg);
3680 %}
3681
3682 enc_class FAdd_ST_reg( eRegF src2 ) %{
3683 // FADDP ST,src2 /* D8 C0+i */
3684 emit_opcode(cbuf, 0xD8);
3685 emit_opcode(cbuf, 0xC0 + $src2$$reg);
3686 //could use FADDP src2,fpST /* DE C0+i */
3687 %}
3688
3689 enc_class FAddP_reg_ST( eRegF src2 ) %{
3690 // FADDP src2,ST /* DE C0+i */
3691 emit_opcode(cbuf, 0xDE);
3692 emit_opcode(cbuf, 0xC0 + $src2$$reg);
3693 %}
3694
3722 emit_opcode(cbuf, 0xC0 + $src1$$reg);
3723
3724 // FMULP src2,ST /* DE C8+i */
3725 emit_opcode(cbuf, 0xDE);
3726 emit_opcode(cbuf, 0xC8 + $src2$$reg);
3727 %}
3728
3729 // Atomically load the volatile long
3730 enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{
3731 emit_opcode(cbuf,0xDF);
3732 int rm_byte_opcode = 0x05;
3733 int base = $mem$$base;
3734 int index = $mem$$index;
3735 int scale = $mem$$scale;
3736 int displace = $mem$$disp;
3737 bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
3738 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_is_oop);
3739 store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp );
3740 %}
3741
3742 // Volatile Store Long. Must be atomic, so move it into
3743 // the FP TOS and then do a 64-bit FIST. Has to probe the
3744 // target address before the store (for null-ptr checks)
3745 // so the memory operand is used twice in the encoding.
3746 enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{
3747 store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp );
3748 cbuf.set_insts_mark(); // Mark start of FIST in case $mem has an oop
3749 emit_opcode(cbuf,0xDF);
3750 int rm_byte_opcode = 0x07;
3751 int base = $mem$$base;
3752 int index = $mem$$index;
3753 int scale = $mem$$scale;
3754 int displace = $mem$$disp;
3755 bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
3756 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_is_oop);
3757 %}
3758
3759 // Safepoint Poll. This polls the safepoint page, and causes an
3760 // exception if it is not readable. Unfortunately, it kills the condition code
3761 // in the process
3762 // We current use TESTL [spp],EDI
3763 // A better choice might be TESTB [spp + pagesize() - CacheLineSize()],0
3764
3765 enc_class Safepoint_Poll() %{
3766 cbuf.relocate(cbuf.insts_mark(), relocInfo::poll_type, 0);
3767 emit_opcode(cbuf,0x85);
3768 emit_rm (cbuf, 0x0, 0x7, 0x5);
3769 emit_d32(cbuf, (intptr_t)os::get_polling_page());
3770 %}
3771 %}
3772
3773
3774 //----------FRAME--------------------------------------------------------------
3775 // Definition of frame structure and management information.
3776 //
3777 // S T A C K L A Y O U T Allocators stack-slot number
3778 // | (to get allocators register number
6303 // then store it down to the stack and reload on the int
6304 // side.
6305 instruct loadL_volatile(stackSlotL dst, memory mem) %{
6306 predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access());
6307 match(Set dst (LoadL mem));
6308
6309 ins_cost(200);
6310 format %{ "FILD $mem\t# Atomic volatile long load\n\t"
6311 "FISTp $dst" %}
6312 ins_encode(enc_loadL_volatile(mem,dst));
6313 ins_pipe( fpu_reg_mem );
6314 %}
6315
6316 instruct loadLX_volatile(stackSlotL dst, memory mem, regXD tmp) %{
6317 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
6318 match(Set dst (LoadL mem));
6319 effect(TEMP tmp);
6320 ins_cost(180);
6321 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t"
6322 "MOVSD $dst,$tmp" %}
6323 ins_encode %{
6324 __ movdbl($tmp$$XMMRegister, $mem$$Address);
6325 __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister);
6326 %}
6327 ins_pipe( pipe_slow );
6328 %}
6329
6330 instruct loadLX_reg_volatile(eRegL dst, memory mem, regXD tmp) %{
6331 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
6332 match(Set dst (LoadL mem));
6333 effect(TEMP tmp);
6334 ins_cost(160);
6335 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t"
6336 "MOVD $dst.lo,$tmp\n\t"
6337 "PSRLQ $tmp,32\n\t"
6338 "MOVD $dst.hi,$tmp" %}
6339 ins_encode %{
6340 __ movdbl($tmp$$XMMRegister, $mem$$Address);
6341 __ movdl($dst$$Register, $tmp$$XMMRegister);
6342 __ psrlq($tmp$$XMMRegister, 32);
6343 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
6344 %}
6345 ins_pipe( pipe_slow );
6346 %}
6347
6348 // Load Range
6349 instruct loadRange(eRegI dst, memory mem) %{
6350 match(Set dst (LoadRange mem));
6351
6352 ins_cost(125);
6353 format %{ "MOV $dst,$mem" %}
6354 opcode(0x8B);
6355 ins_encode( OpcP, RegMem(dst,mem));
6356 ins_pipe( ialu_reg_mem );
6357 %}
6358
6359
6360 // Load Pointer
6361 instruct loadP(eRegP dst, memory mem) %{
6362 match(Set dst (LoadP mem));
6363
6364 ins_cost(125);
6382 // Load Double
6383 instruct loadD(regD dst, memory mem) %{
6384 predicate(UseSSE<=1);
6385 match(Set dst (LoadD mem));
6386
6387 ins_cost(150);
6388 format %{ "FLD_D ST,$mem\n\t"
6389 "FSTP $dst" %}
6390 opcode(0xDD); /* DD /0 */
6391 ins_encode( OpcP, RMopc_Mem(0x00,mem),
6392 Pop_Reg_D(dst) );
6393 ins_pipe( fpu_reg_mem );
6394 %}
6395
6396 // Load Double to XMM
6397 instruct loadXD(regXD dst, memory mem) %{
6398 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
6399 match(Set dst (LoadD mem));
6400 ins_cost(145);
6401 format %{ "MOVSD $dst,$mem" %}
6402 ins_encode %{
6403 __ movdbl ($dst$$XMMRegister, $mem$$Address);
6404 %}
6405 ins_pipe( pipe_slow );
6406 %}
6407
6408 instruct loadXD_partial(regXD dst, memory mem) %{
6409 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
6410 match(Set dst (LoadD mem));
6411 ins_cost(145);
6412 format %{ "MOVLPD $dst,$mem" %}
6413 ins_encode %{
6414 __ movdbl ($dst$$XMMRegister, $mem$$Address);
6415 %}
6416 ins_pipe( pipe_slow );
6417 %}
6418
6419 // Load to XMM register (single-precision floating point)
6420 // MOVSS instruction
6421 instruct loadX(regX dst, memory mem) %{
6422 predicate(UseSSE>=1);
6423 match(Set dst (LoadF mem));
6424 ins_cost(145);
6425 format %{ "MOVSS $dst,$mem" %}
6426 ins_encode %{
6427 __ movflt ($dst$$XMMRegister, $mem$$Address);
6428 %}
6429 ins_pipe( pipe_slow );
6430 %}
6431
6432 // Load Float
6433 instruct loadF(regF dst, memory mem) %{
6434 predicate(UseSSE==0);
6435 match(Set dst (LoadF mem));
6436
6437 ins_cost(150);
6438 format %{ "FLD_S ST,$mem\n\t"
6439 "FSTP $dst" %}
6440 opcode(0xD9); /* D9 /0 */
6441 ins_encode( OpcP, RMopc_Mem(0x00,mem),
6442 Pop_Reg_F(dst) );
6443 ins_pipe( fpu_reg_mem );
6444 %}
6445
6446 // Load Aligned Packed Byte to XMM register
6447 instruct loadA8B(regXD dst, memory mem) %{
6448 predicate(UseSSE>=1);
6449 match(Set dst (Load8B mem));
6450 ins_cost(125);
6451 format %{ "MOVQ $dst,$mem\t! packed8B" %}
6452 ins_encode %{
6453 __ movq($dst$$XMMRegister, $mem$$Address);
6454 %}
6455 ins_pipe( pipe_slow );
6456 %}
6457
6458 // Load Aligned Packed Short to XMM register
6459 instruct loadA4S(regXD dst, memory mem) %{
6460 predicate(UseSSE>=1);
6461 match(Set dst (Load4S mem));
6462 ins_cost(125);
6463 format %{ "MOVQ $dst,$mem\t! packed4S" %}
6464 ins_encode %{
6465 __ movq($dst$$XMMRegister, $mem$$Address);
6466 %}
6467 ins_pipe( pipe_slow );
6468 %}
6469
6470 // Load Aligned Packed Char to XMM register
6471 instruct loadA4C(regXD dst, memory mem) %{
6472 predicate(UseSSE>=1);
6473 match(Set dst (Load4C mem));
6474 ins_cost(125);
6475 format %{ "MOVQ $dst,$mem\t! packed4C" %}
6476 ins_encode %{
6477 __ movq($dst$$XMMRegister, $mem$$Address);
6478 %}
6479 ins_pipe( pipe_slow );
6480 %}
6481
6482 // Load Aligned Packed Integer to XMM register
6483 instruct load2IU(regXD dst, memory mem) %{
6484 predicate(UseSSE>=1);
6485 match(Set dst (Load2I mem));
6486 ins_cost(125);
6487 format %{ "MOVQ $dst,$mem\t! packed2I" %}
6488 ins_encode %{
6489 __ movq($dst$$XMMRegister, $mem$$Address);
6490 %}
6491 ins_pipe( pipe_slow );
6492 %}
6493
6494 // Load Aligned Packed Single to XMM
6495 instruct loadA2F(regXD dst, memory mem) %{
6496 predicate(UseSSE>=1);
6497 match(Set dst (Load2F mem));
6498 ins_cost(145);
6499 format %{ "MOVQ $dst,$mem\t! packed2F" %}
6500 ins_encode %{
6501 __ movq($dst$$XMMRegister, $mem$$Address);
6502 %}
6503 ins_pipe( pipe_slow );
6504 %}
6505
6506 // Load Effective Address
6507 instruct leaP8(eRegP dst, indOffset8 mem) %{
6508 match(Set dst mem);
6509
6510 ins_cost(110);
6511 format %{ "LEA $dst,$mem" %}
6512 opcode(0x8D);
6513 ins_encode( OpcP, RegMem(dst,mem));
6514 ins_pipe( ialu_reg_reg_fat );
6515 %}
6516
6517 instruct leaP32(eRegP dst, indOffset32 mem) %{
6518 match(Set dst mem);
6519
6520 ins_cost(110);
6521 format %{ "LEA $dst,$mem" %}
6522 opcode(0x8D);
6708 %}
6709 ins_pipe(fpu_reg_con);
6710 %}
6711
6712 // The instruction usage is guarded by predicate in operand immXD().
6713 instruct loadConXD(regXD dst, immXD con) %{
6714 match(Set dst con);
6715 ins_cost(125);
6716 format %{ "MOVSD $dst,[$constantaddress]\t# load from constant table: double=$con" %}
6717 ins_encode %{
6718 __ movdbl($dst$$XMMRegister, $constantaddress($con));
6719 %}
6720 ins_pipe(pipe_slow);
6721 %}
6722
6723 // The instruction usage is guarded by predicate in operand immXD0().
6724 instruct loadConXD0(regXD dst, immXD0 src) %{
6725 match(Set dst src);
6726 ins_cost(100);
6727 format %{ "XORPD $dst,$dst\t# double 0.0" %}
6728 ins_encode %{
6729 __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
6730 %}
6731 ins_pipe( pipe_slow );
6732 %}
6733
6734 // Load Stack Slot
6735 instruct loadSSI(eRegI dst, stackSlotI src) %{
6736 match(Set dst src);
6737 ins_cost(125);
6738
6739 format %{ "MOV $dst,$src" %}
6740 opcode(0x8B);
6741 ins_encode( OpcP, RegMem(dst,src));
6742 ins_pipe( ialu_reg_mem );
6743 %}
6744
6745 instruct loadSSL(eRegL dst, stackSlotL src) %{
6746 match(Set dst src);
6747
6748 ins_cost(200);
6749 format %{ "MOV $dst,$src.lo\n\t"
6750 "MOV $dst+4,$src.hi" %}
7012 predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access());
7013 match(Set mem (StoreL mem src));
7014 effect( KILL cr );
7015 ins_cost(400);
7016 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t"
7017 "FILD $src\n\t"
7018 "FISTp $mem\t # 64-bit atomic volatile long store" %}
7019 opcode(0x3B);
7020 ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src));
7021 ins_pipe( fpu_reg_mem );
7022 %}
7023
7024 instruct storeLX_volatile(memory mem, stackSlotL src, regXD tmp, eFlagsReg cr) %{
7025 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
7026 match(Set mem (StoreL mem src));
7027 effect( TEMP tmp, KILL cr );
7028 ins_cost(380);
7029 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t"
7030 "MOVSD $tmp,$src\n\t"
7031 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %}
7032 ins_encode %{
7033 __ cmpl(rax, $mem$$Address);
7034 __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp));
7035 __ movdbl($mem$$Address, $tmp$$XMMRegister);
7036 %}
7037 ins_pipe( pipe_slow );
7038 %}
7039
7040 instruct storeLX_reg_volatile(memory mem, eRegL src, regXD tmp2, regXD tmp, eFlagsReg cr) %{
7041 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
7042 match(Set mem (StoreL mem src));
7043 effect( TEMP tmp2 , TEMP tmp, KILL cr );
7044 ins_cost(360);
7045 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t"
7046 "MOVD $tmp,$src.lo\n\t"
7047 "MOVD $tmp2,$src.hi\n\t"
7048 "PUNPCKLDQ $tmp,$tmp2\n\t"
7049 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %}
7050 ins_encode %{
7051 __ cmpl(rax, $mem$$Address);
7052 __ movdl($tmp$$XMMRegister, $src$$Register);
7053 __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register));
7054 __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister);
7055 __ movdbl($mem$$Address, $tmp$$XMMRegister);
7056 %}
7057 ins_pipe( pipe_slow );
7058 %}
7059
7060 // Store Pointer; for storing unknown oops and raw pointers
7061 instruct storeP(memory mem, anyRegP src) %{
7062 match(Set mem (StoreP mem src));
7063
7064 ins_cost(125);
7065 format %{ "MOV $mem,$src" %}
7066 opcode(0x89);
7067 ins_encode( OpcP, RegMem( src, mem ) );
7068 ins_pipe( ialu_mem_reg );
7069 %}
7070
7071 // Store Integer Immediate
7072 instruct storeImmI(memory mem, immI src) %{
7073 match(Set mem (StoreI mem src));
7074
7075 ins_cost(150);
7076 format %{ "MOV $mem,$src" %}
7103 ins_pipe( ialu_mem_imm );
7104 %}
7105
7106 // Store Byte Immediate
7107 instruct storeImmB(memory mem, immI8 src) %{
7108 match(Set mem (StoreB mem src));
7109
7110 ins_cost(150);
7111 format %{ "MOV8 $mem,$src" %}
7112 opcode(0xC6); /* C6 /0 */
7113 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src ));
7114 ins_pipe( ialu_mem_imm );
7115 %}
7116
7117 // Store Aligned Packed Byte XMM register to memory
7118 instruct storeA8B(memory mem, regXD src) %{
7119 predicate(UseSSE>=1);
7120 match(Set mem (Store8B mem src));
7121 ins_cost(145);
7122 format %{ "MOVQ $mem,$src\t! packed8B" %}
7123 ins_encode %{
7124 __ movq($mem$$Address, $src$$XMMRegister);
7125 %}
7126 ins_pipe( pipe_slow );
7127 %}
7128
7129 // Store Aligned Packed Char/Short XMM register to memory
7130 instruct storeA4C(memory mem, regXD src) %{
7131 predicate(UseSSE>=1);
7132 match(Set mem (Store4C mem src));
7133 ins_cost(145);
7134 format %{ "MOVQ $mem,$src\t! packed4C" %}
7135 ins_encode %{
7136 __ movq($mem$$Address, $src$$XMMRegister);
7137 %}
7138 ins_pipe( pipe_slow );
7139 %}
7140
7141 // Store Aligned Packed Integer XMM register to memory
7142 instruct storeA2I(memory mem, regXD src) %{
7143 predicate(UseSSE>=1);
7144 match(Set mem (Store2I mem src));
7145 ins_cost(145);
7146 format %{ "MOVQ $mem,$src\t! packed2I" %}
7147 ins_encode %{
7148 __ movq($mem$$Address, $src$$XMMRegister);
7149 %}
7150 ins_pipe( pipe_slow );
7151 %}
7152
7153 // Store CMS card-mark Immediate
7154 instruct storeImmCM(memory mem, immI8 src) %{
7155 match(Set mem (StoreCM mem src));
7156
7157 ins_cost(150);
7158 format %{ "MOV8 $mem,$src\t! CMS card-mark imm0" %}
7159 opcode(0xC6); /* C6 /0 */
7160 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src ));
7161 ins_pipe( ialu_mem_imm );
7162 %}
7163
7164 // Store Double
7165 instruct storeD( memory mem, regDPR1 src) %{
7166 predicate(UseSSE<=1);
7167 match(Set mem (StoreD mem src));
7168
7169 ins_cost(100);
7175
7176 // Store double does rounding on x86
7177 instruct storeD_rounded( memory mem, regDPR1 src) %{
7178 predicate(UseSSE<=1);
7179 match(Set mem (StoreD mem (RoundDouble src)));
7180
7181 ins_cost(100);
7182 format %{ "FST_D $mem,$src\t# round" %}
7183 opcode(0xDD); /* DD /2 */
7184 ins_encode( enc_FP_store(mem,src) );
7185 ins_pipe( fpu_mem_reg );
7186 %}
7187
7188 // Store XMM register to memory (double-precision floating points)
7189 // MOVSD instruction
7190 instruct storeXD(memory mem, regXD src) %{
7191 predicate(UseSSE>=2);
7192 match(Set mem (StoreD mem src));
7193 ins_cost(95);
7194 format %{ "MOVSD $mem,$src" %}
7195 ins_encode %{
7196 __ movdbl($mem$$Address, $src$$XMMRegister);
7197 %}
7198 ins_pipe( pipe_slow );
7199 %}
7200
7201 // Store XMM register to memory (single-precision floating point)
7202 // MOVSS instruction
7203 instruct storeX(memory mem, regX src) %{
7204 predicate(UseSSE>=1);
7205 match(Set mem (StoreF mem src));
7206 ins_cost(95);
7207 format %{ "MOVSS $mem,$src" %}
7208 ins_encode %{
7209 __ movflt($mem$$Address, $src$$XMMRegister);
7210 %}
7211 ins_pipe( pipe_slow );
7212 %}
7213
7214 // Store Aligned Packed Single Float XMM register to memory
7215 instruct storeA2F(memory mem, regXD src) %{
7216 predicate(UseSSE>=1);
7217 match(Set mem (Store2F mem src));
7218 ins_cost(145);
7219 format %{ "MOVQ $mem,$src\t! packed2F" %}
7220 ins_encode %{
7221 __ movq($mem$$Address, $src$$XMMRegister);
7222 %}
7223 ins_pipe( pipe_slow );
7224 %}
7225
7226 // Store Float
7227 instruct storeF( memory mem, regFPR1 src) %{
7228 predicate(UseSSE==0);
7229 match(Set mem (StoreF mem src));
7230
7231 ins_cost(100);
7232 format %{ "FST_S $mem,$src" %}
7233 opcode(0xD9); /* D9 /2 */
7234 ins_encode( enc_FP_store(mem,src) );
7235 ins_pipe( fpu_mem_reg );
7236 %}
7237
7238 // Store Float does rounding on x86
7239 instruct storeF_rounded( memory mem, regFPR1 src) %{
7240 predicate(UseSSE==0);
7241 match(Set mem (StoreF mem (RoundFloat src)));
7242
7912 match(Set dst (CastII dst));
7913 format %{ "#castII of $dst" %}
7914 ins_encode( /*empty encoding*/ );
7915 ins_cost(0);
7916 ins_pipe( empty );
7917 %}
7918
7919
7920 // Load-locked - same as a regular pointer load when used with compare-swap
7921 instruct loadPLocked(eRegP dst, memory mem) %{
7922 match(Set dst (LoadPLocked mem));
7923
7924 ins_cost(125);
7925 format %{ "MOV $dst,$mem\t# Load ptr. locked" %}
7926 opcode(0x8B);
7927 ins_encode( OpcP, RegMem(dst,mem));
7928 ins_pipe( ialu_reg_mem );
7929 %}
7930
7931 // LoadLong-locked - same as a volatile long load when used with compare-swap
7932 instruct loadLLocked(stackSlotL dst, memory mem) %{
7933 predicate(UseSSE<=1);
7934 match(Set dst (LoadLLocked mem));
7935
7936 ins_cost(200);
7937 format %{ "FILD $mem\t# Atomic volatile long load\n\t"
7938 "FISTp $dst" %}
7939 ins_encode(enc_loadL_volatile(mem,dst));
7940 ins_pipe( fpu_reg_mem );
7941 %}
7942
7943 instruct loadLX_Locked(stackSlotL dst, memory mem, regXD tmp) %{
7944 predicate(UseSSE>=2);
7945 match(Set dst (LoadLLocked mem));
7946 effect(TEMP tmp);
7947 ins_cost(180);
7948 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t"
7949 "MOVSD $dst,$tmp" %}
7950 ins_encode %{
7951 __ movdbl($tmp$$XMMRegister, $mem$$Address);
7952 __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister);
7953 %}
7954 ins_pipe( pipe_slow );
7955 %}
7956
7957 instruct loadLX_reg_Locked(eRegL dst, memory mem, regXD tmp) %{
7958 predicate(UseSSE>=2);
7959 match(Set dst (LoadLLocked mem));
7960 effect(TEMP tmp);
7961 ins_cost(160);
7962 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t"
7963 "MOVD $dst.lo,$tmp\n\t"
7964 "PSRLQ $tmp,32\n\t"
7965 "MOVD $dst.hi,$tmp" %}
7966 ins_encode %{
7967 __ movdbl($tmp$$XMMRegister, $mem$$Address);
7968 __ movdl($dst$$Register, $tmp$$XMMRegister);
7969 __ psrlq($tmp$$XMMRegister, 32);
7970 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
7971 %}
7972 ins_pipe( pipe_slow );
7973 %}
7974
7975 // Conditional-store of the updated heap-top.
7976 // Used during allocation of the shared heap.
7977 // Sets flags (EQ) on success. Implemented with a CMPXCHG on Intel.
7978 instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{
7979 match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
7980 // EAX is killed if there is contention, but then it's also unused.
7981 // In the common case of no contention, EAX holds the new oop address.
7982 format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %}
7983 ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) );
7984 ins_pipe( pipe_cmpxchg );
7985 %}
7986
7987 // Conditional-store of an int value.
7988 // ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG on Intel.
7989 instruct storeIConditional( memory mem, eAXRegI oldval, eRegI newval, eFlagsReg cr ) %{
7990 match(Set cr (StoreIConditional mem (Binary oldval newval)));
7991 effect(KILL oldval);
9613 OpcS, OpcP, PopFPU,
9614 CmpF_Result(dst));
9615 ins_pipe( pipe_slow );
9616 %}
9617
9618 // Compare into -1,0,1
9619 instruct cmpD_reg(eRegI dst, regD src1, regD src2, eAXRegI rax, eFlagsReg cr) %{
9620 predicate(UseSSE<=1);
9621 match(Set dst (CmpD3 src1 src2));
9622 effect(KILL cr, KILL rax);
9623 ins_cost(300);
9624 format %{ "FCMPD $dst,$src1,$src2" %}
9625 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9626 ins_encode( Push_Reg_D(src1),
9627 OpcP, RegOpc(src2),
9628 CmpF_Result(dst));
9629 ins_pipe( pipe_slow );
9630 %}
9631
9632 // float compare and set condition codes in EFLAGS by XMM regs
9633 instruct cmpXD_cc(eFlagsRegU cr, regXD src1, regXD src2) %{
9634 predicate(UseSSE>=2);
9635 match(Set cr (CmpD src1 src2));
9636 ins_cost(145);
9637 format %{ "UCOMISD $src1,$src2\n\t"
9638 "JNP,s exit\n\t"
9639 "PUSHF\t# saw NaN, set CF\n\t"
9640 "AND [rsp], #0xffffff2b\n\t"
9641 "POPF\n"
9642 "exit:" %}
9643 ins_encode %{
9644 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9645 emit_cmpfp_fixup(_masm);
9646 %}
9647 ins_pipe( pipe_slow );
9648 %}
9649
9650 instruct cmpXD_ccCF(eFlagsRegUCF cr, regXD src1, regXD src2) %{
9651 predicate(UseSSE>=2);
9652 match(Set cr (CmpD src1 src2));
9653 ins_cost(100);
9654 format %{ "UCOMISD $src1,$src2" %}
9655 ins_encode %{
9656 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9657 %}
9658 ins_pipe( pipe_slow );
9659 %}
9660
9661 // float compare and set condition codes in EFLAGS by XMM regs
9662 instruct cmpXD_ccmem(eFlagsRegU cr, regXD src1, memory src2) %{
9663 predicate(UseSSE>=2);
9664 match(Set cr (CmpD src1 (LoadD src2)));
9665 ins_cost(145);
9666 format %{ "UCOMISD $src1,$src2\n\t"
9667 "JNP,s exit\n\t"
9668 "PUSHF\t# saw NaN, set CF\n\t"
9669 "AND [rsp], #0xffffff2b\n\t"
9670 "POPF\n"
9671 "exit:" %}
9672 ins_encode %{
9673 __ ucomisd($src1$$XMMRegister, $src2$$Address);
9674 emit_cmpfp_fixup(_masm);
9675 %}
9676 ins_pipe( pipe_slow );
9677 %}
9678
9679 instruct cmpXD_ccmemCF(eFlagsRegUCF cr, regXD src1, memory src2) %{
9680 predicate(UseSSE>=2);
9681 match(Set cr (CmpD src1 (LoadD src2)));
9682 ins_cost(100);
9683 format %{ "UCOMISD $src1,$src2" %}
9684 ins_encode %{
9685 __ ucomisd($src1$$XMMRegister, $src2$$Address);
9686 %}
9687 ins_pipe( pipe_slow );
9688 %}
9689
9690 // Compare into -1,0,1 in XMM
9691 instruct cmpXD_reg(xRegI dst, regXD src1, regXD src2, eFlagsReg cr) %{
9692 predicate(UseSSE>=2);
9693 match(Set dst (CmpD3 src1 src2));
9694 effect(KILL cr);
9695 ins_cost(255);
9696 format %{ "UCOMISD $src1, $src2\n\t"
9697 "MOV $dst, #-1\n\t"
9698 "JP,s done\n\t"
9699 "JB,s done\n\t"
9700 "SETNE $dst\n\t"
9701 "MOVZB $dst, $dst\n"
9702 "done:" %}
9703 ins_encode %{
9704 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9705 emit_cmpfp3(_masm, $dst$$Register);
9706 %}
9707 ins_pipe( pipe_slow );
9708 %}
9709
9710 // Compare into -1,0,1 in XMM and memory
9711 instruct cmpXD_regmem(xRegI dst, regXD src1, memory src2, eFlagsReg cr) %{
9712 predicate(UseSSE>=2);
9713 match(Set dst (CmpD3 src1 (LoadD src2)));
9714 effect(KILL cr);
9715 ins_cost(275);
9716 format %{ "UCOMISD $src1, $src2\n\t"
9717 "MOV $dst, #-1\n\t"
9718 "JP,s done\n\t"
9719 "JB,s done\n\t"
9720 "SETNE $dst\n\t"
9721 "MOVZB $dst, $dst\n"
9722 "done:" %}
9723 ins_encode %{
9724 __ ucomisd($src1$$XMMRegister, $src2$$Address);
9725 emit_cmpfp3(_masm, $dst$$Register);
9726 %}
9727 ins_pipe( pipe_slow );
9728 %}
9729
9730
9731 instruct subD_reg(regD dst, regD src) %{
9732 predicate (UseSSE <=1);
9733 match(Set dst (SubD dst src));
9734
9735 format %{ "FLD $src\n\t"
9736 "DSUBp $dst,ST" %}
9737 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */
9738 ins_cost(150);
9739 ins_encode( Push_Reg_D(src),
9740 OpcP, RegOpc(dst) );
9741 ins_pipe( fpu_reg_reg );
9742 %}
9743
9744 instruct subD_reg_round(stackSlotD dst, regD src1, regD src2) %{
9745 predicate (UseSSE <=1);
9746 match(Set dst (RoundDouble (SubD src1 src2)));
9765 "DSUBp $dst,ST" %}
9766 opcode(0xDE, 0x5, 0xDD); /* DE C0+i */ /* LoadD DD /0 */
9767 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9768 OpcP, RegOpc(dst) );
9769 ins_pipe( fpu_reg_mem );
9770 %}
9771
9772 instruct absD_reg(regDPR1 dst, regDPR1 src) %{
9773 predicate (UseSSE<=1);
9774 match(Set dst (AbsD src));
9775 ins_cost(100);
9776 format %{ "FABS" %}
9777 opcode(0xE1, 0xD9);
9778 ins_encode( OpcS, OpcP );
9779 ins_pipe( fpu_reg_reg );
9780 %}
9781
9782 instruct absXD_reg( regXD dst ) %{
9783 predicate(UseSSE>=2);
9784 match(Set dst (AbsD dst));
9785 ins_cost(150);
9786 format %{ "ANDPD $dst,[0x7FFFFFFFFFFFFFFF]\t# ABS D by sign masking" %}
9787 ins_encode %{
9788 __ andpd($dst$$XMMRegister,
9789 ExternalAddress((address)double_signmask_pool));
9790 %}
9791 ins_pipe( pipe_slow );
9792 %}
9793
9794 instruct negD_reg(regDPR1 dst, regDPR1 src) %{
9795 predicate(UseSSE<=1);
9796 match(Set dst (NegD src));
9797 ins_cost(100);
9798 format %{ "FCHS" %}
9799 opcode(0xE0, 0xD9);
9800 ins_encode( OpcS, OpcP );
9801 ins_pipe( fpu_reg_reg );
9802 %}
9803
9804 instruct negXD_reg( regXD dst ) %{
9805 predicate(UseSSE>=2);
9806 match(Set dst (NegD dst));
9807 ins_cost(150);
9808 format %{ "XORPD $dst,[0x8000000000000000]\t# CHS D by sign flipping" %}
9809 ins_encode %{
9810 __ xorpd($dst$$XMMRegister,
9811 ExternalAddress((address)double_signflip_pool));
9812 %}
9813 ins_pipe( pipe_slow );
9814 %}
9815
9816 instruct addD_reg(regD dst, regD src) %{
9817 predicate(UseSSE<=1);
9818 match(Set dst (AddD dst src));
9819 format %{ "FLD $src\n\t"
9820 "DADD $dst,ST" %}
9821 size(4);
9822 ins_cost(150);
9823 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
9824 ins_encode( Push_Reg_D(src),
9825 OpcP, RegOpc(dst) );
9826 ins_pipe( fpu_reg_reg );
9827 %}
9901 instruct addD_reg_imm_round(stackSlotD dst, regD src, immD con) %{
9902 predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 );
9903 match(Set dst (RoundDouble (AddD src con)));
9904 ins_cost(200);
9905 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t"
9906 "DADD ST,$src\n\t"
9907 "FSTP_D $dst\t# D-round" %}
9908 ins_encode %{
9909 __ fld_d($constantaddress($con));
9910 __ fadd($src$$reg);
9911 __ fstp_d(Address(rsp, $dst$$disp));
9912 %}
9913 ins_pipe(fpu_mem_reg_con);
9914 %}
9915
9916 // Add two double precision floating point values in xmm
9917 instruct addXD_reg(regXD dst, regXD src) %{
9918 predicate(UseSSE>=2);
9919 match(Set dst (AddD dst src));
9920 format %{ "ADDSD $dst,$src" %}
9921 ins_encode %{
9922 __ addsd($dst$$XMMRegister, $src$$XMMRegister);
9923 %}
9924 ins_pipe( pipe_slow );
9925 %}
9926
9927 instruct addXD_imm(regXD dst, immXD con) %{
9928 predicate(UseSSE>=2);
9929 match(Set dst (AddD dst con));
9930 format %{ "ADDSD $dst,[$constantaddress]\t# load from constant table: double=$con" %}
9931 ins_encode %{
9932 __ addsd($dst$$XMMRegister, $constantaddress($con));
9933 %}
9934 ins_pipe(pipe_slow);
9935 %}
9936
9937 instruct addXD_mem(regXD dst, memory mem) %{
9938 predicate(UseSSE>=2);
9939 match(Set dst (AddD dst (LoadD mem)));
9940 format %{ "ADDSD $dst,$mem" %}
9941 ins_encode %{
9942 __ addsd($dst$$XMMRegister, $mem$$Address);
9943 %}
9944 ins_pipe( pipe_slow );
9945 %}
9946
9947 // Sub two double precision floating point values in xmm
9948 instruct subXD_reg(regXD dst, regXD src) %{
9949 predicate(UseSSE>=2);
9950 match(Set dst (SubD dst src));
9951 ins_cost(150);
9952 format %{ "SUBSD $dst,$src" %}
9953 ins_encode %{
9954 __ subsd($dst$$XMMRegister, $src$$XMMRegister);
9955 %}
9956 ins_pipe( pipe_slow );
9957 %}
9958
9959 instruct subXD_imm(regXD dst, immXD con) %{
9960 predicate(UseSSE>=2);
9961 match(Set dst (SubD dst con));
9962 ins_cost(150);
9963 format %{ "SUBSD $dst,[$constantaddress]\t# load from constant table: double=$con" %}
9964 ins_encode %{
9965 __ subsd($dst$$XMMRegister, $constantaddress($con));
9966 %}
9967 ins_pipe(pipe_slow);
9968 %}
9969
9970 instruct subXD_mem(regXD dst, memory mem) %{
9971 predicate(UseSSE>=2);
9972 match(Set dst (SubD dst (LoadD mem)));
9973 ins_cost(150);
9974 format %{ "SUBSD $dst,$mem" %}
9975 ins_encode %{
9976 __ subsd($dst$$XMMRegister, $mem$$Address);
9977 %}
9978 ins_pipe( pipe_slow );
9979 %}
9980
9981 // Mul two double precision floating point values in xmm
9982 instruct mulXD_reg(regXD dst, regXD src) %{
9983 predicate(UseSSE>=2);
9984 match(Set dst (MulD dst src));
9985 format %{ "MULSD $dst,$src" %}
9986 ins_encode %{
9987 __ mulsd($dst$$XMMRegister, $src$$XMMRegister);
9988 %}
9989 ins_pipe( pipe_slow );
9990 %}
9991
9992 instruct mulXD_imm(regXD dst, immXD con) %{
9993 predicate(UseSSE>=2);
9994 match(Set dst (MulD dst con));
9995 format %{ "MULSD $dst,[$constantaddress]\t# load from constant table: double=$con" %}
9996 ins_encode %{
9997 __ mulsd($dst$$XMMRegister, $constantaddress($con));
9998 %}
9999 ins_pipe(pipe_slow);
10000 %}
10001
10002 instruct mulXD_mem(regXD dst, memory mem) %{
10003 predicate(UseSSE>=2);
10004 match(Set dst (MulD dst (LoadD mem)));
10005 format %{ "MULSD $dst,$mem" %}
10006 ins_encode %{
10007 __ mulsd($dst$$XMMRegister, $mem$$Address);
10008 %}
10009 ins_pipe( pipe_slow );
10010 %}
10011
10012 // Div two double precision floating point values in xmm
10013 instruct divXD_reg(regXD dst, regXD src) %{
10014 predicate(UseSSE>=2);
10015 match(Set dst (DivD dst src));
10016 format %{ "DIVSD $dst,$src" %}
10017 opcode(0xF2, 0x0F, 0x5E);
10018 ins_encode %{
10019 __ divsd($dst$$XMMRegister, $src$$XMMRegister);
10020 %}
10021 ins_pipe( pipe_slow );
10022 %}
10023
10024 instruct divXD_imm(regXD dst, immXD con) %{
10025 predicate(UseSSE>=2);
10026 match(Set dst (DivD dst con));
10027 format %{ "DIVSD $dst,[$constantaddress]\t# load from constant table: double=$con" %}
10028 ins_encode %{
10029 __ divsd($dst$$XMMRegister, $constantaddress($con));
10030 %}
10031 ins_pipe(pipe_slow);
10032 %}
10033
10034 instruct divXD_mem(regXD dst, memory mem) %{
10035 predicate(UseSSE>=2);
10036 match(Set dst (DivD dst (LoadD mem)));
10037 format %{ "DIVSD $dst,$mem" %}
10038 ins_encode %{
10039 __ divsd($dst$$XMMRegister, $mem$$Address);
10040 %}
10041 ins_pipe( pipe_slow );
10042 %}
10043
10044
10045 instruct mulD_reg(regD dst, regD src) %{
10046 predicate(UseSSE<=1);
10047 match(Set dst (MulD dst src));
10048 format %{ "FLD $src\n\t"
10049 "DMULp $dst,ST" %}
10050 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
10051 ins_cost(150);
10052 ins_encode( Push_Reg_D(src),
10053 OpcP, RegOpc(dst) );
10054 ins_pipe( fpu_reg_reg );
10055 %}
10056
10057 // Strict FP instruction biases argument before multiply then
10058 // biases result to avoid double rounding of subnormals.
10059 //
10060 // scale arg1 by multiplying arg1 by 2^(-15360)
10652 OpcS, OpcP, PopFPU,
10653 CmpF_Result(dst));
10654 ins_pipe( pipe_slow );
10655 %}
10656
10657 // Compare into -1,0,1
10658 instruct cmpF_reg(eRegI dst, regF src1, regF src2, eAXRegI rax, eFlagsReg cr) %{
10659 predicate(UseSSE == 0);
10660 match(Set dst (CmpF3 src1 src2));
10661 effect(KILL cr, KILL rax);
10662 ins_cost(300);
10663 format %{ "FCMPF $dst,$src1,$src2" %}
10664 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10665 ins_encode( Push_Reg_D(src1),
10666 OpcP, RegOpc(src2),
10667 CmpF_Result(dst));
10668 ins_pipe( pipe_slow );
10669 %}
10670
10671 // float compare and set condition codes in EFLAGS by XMM regs
10672 instruct cmpX_cc(eFlagsRegU cr, regX src1, regX src2) %{
10673 predicate(UseSSE>=1);
10674 match(Set cr (CmpF src1 src2));
10675 ins_cost(145);
10676 format %{ "UCOMISS $src1,$src2\n\t"
10677 "JNP,s exit\n\t"
10678 "PUSHF\t# saw NaN, set CF\n\t"
10679 "AND [rsp], #0xffffff2b\n\t"
10680 "POPF\n"
10681 "exit:" %}
10682 ins_encode %{
10683 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10684 emit_cmpfp_fixup(_masm);
10685 %}
10686 ins_pipe( pipe_slow );
10687 %}
10688
10689 instruct cmpX_ccCF(eFlagsRegUCF cr, regX src1, regX src2) %{
10690 predicate(UseSSE>=1);
10691 match(Set cr (CmpF src1 src2));
10692 ins_cost(100);
10693 format %{ "UCOMISS $src1,$src2" %}
10694 ins_encode %{
10695 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10696 %}
10697 ins_pipe( pipe_slow );
10698 %}
10699
10700 // float compare and set condition codes in EFLAGS by XMM regs
10701 instruct cmpX_ccmem(eFlagsRegU cr, regX src1, memory src2) %{
10702 predicate(UseSSE>=1);
10703 match(Set cr (CmpF src1 (LoadF src2)));
10704 ins_cost(165);
10705 format %{ "UCOMISS $src1,$src2\n\t"
10706 "JNP,s exit\n\t"
10707 "PUSHF\t# saw NaN, set CF\n\t"
10708 "AND [rsp], #0xffffff2b\n\t"
10709 "POPF\n"
10710 "exit:" %}
10711 ins_encode %{
10712 __ ucomiss($src1$$XMMRegister, $src2$$Address);
10713 emit_cmpfp_fixup(_masm);
10714 %}
10715 ins_pipe( pipe_slow );
10716 %}
10717
10718 instruct cmpX_ccmemCF(eFlagsRegUCF cr, regX src1, memory src2) %{
10719 predicate(UseSSE>=1);
10720 match(Set cr (CmpF src1 (LoadF src2)));
10721 ins_cost(100);
10722 format %{ "UCOMISS $src1,$src2" %}
10723 ins_encode %{
10724 __ ucomiss($src1$$XMMRegister, $src2$$Address);
10725 %}
10726 ins_pipe( pipe_slow );
10727 %}
10728
10729 // Compare into -1,0,1 in XMM
10730 instruct cmpX_reg(xRegI dst, regX src1, regX src2, eFlagsReg cr) %{
10731 predicate(UseSSE>=1);
10732 match(Set dst (CmpF3 src1 src2));
10733 effect(KILL cr);
10734 ins_cost(255);
10735 format %{ "UCOMISS $src1, $src2\n\t"
10736 "MOV $dst, #-1\n\t"
10737 "JP,s done\n\t"
10738 "JB,s done\n\t"
10739 "SETNE $dst\n\t"
10740 "MOVZB $dst, $dst\n"
10741 "done:" %}
10742 ins_encode %{
10743 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10744 emit_cmpfp3(_masm, $dst$$Register);
10745 %}
10746 ins_pipe( pipe_slow );
10747 %}
10748
10749 // Compare into -1,0,1 in XMM and memory
10750 instruct cmpX_regmem(xRegI dst, regX src1, memory src2, eFlagsReg cr) %{
10751 predicate(UseSSE>=1);
10752 match(Set dst (CmpF3 src1 (LoadF src2)));
10753 effect(KILL cr);
10754 ins_cost(275);
10755 format %{ "UCOMISS $src1, $src2\n\t"
10756 "MOV $dst, #-1\n\t"
10757 "JP,s done\n\t"
10758 "JB,s done\n\t"
10759 "SETNE $dst\n\t"
10760 "MOVZB $dst, $dst\n"
10761 "done:" %}
10762 ins_encode %{
10763 __ ucomiss($src1$$XMMRegister, $src2$$Address);
10764 emit_cmpfp3(_masm, $dst$$Register);
10765 %}
10766 ins_pipe( pipe_slow );
10767 %}
10768
10769 // Spill to obtain 24-bit precision
10770 instruct subF24_reg(stackSlotF dst, regF src1, regF src2) %{
10771 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10772 match(Set dst (SubF src1 src2));
10773
10774 format %{ "FSUB $dst,$src1 - $src2" %}
10775 opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */
10776 ins_encode( Push_Reg_F(src1),
10777 OpcReg_F(src2),
10778 Pop_Mem_F(dst) );
10779 ins_pipe( fpu_mem_reg_reg );
10780 %}
10781 //
10782 // This instruction does not round to 24-bits
10783 instruct subF_reg(regF dst, regF src) %{
10784 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10785 match(Set dst (SubF dst src));
10805 %}
10806 //
10807 // This instruction does not round to 24-bits
10808 instruct addF_reg(regF dst, regF src) %{
10809 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10810 match(Set dst (AddF dst src));
10811
10812 format %{ "FLD $src\n\t"
10813 "FADDp $dst,ST" %}
10814 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
10815 ins_encode( Push_Reg_F(src),
10816 OpcP, RegOpc(dst) );
10817 ins_pipe( fpu_reg_reg );
10818 %}
10819
10820 // Add two single precision floating point values in xmm
10821 instruct addX_reg(regX dst, regX src) %{
10822 predicate(UseSSE>=1);
10823 match(Set dst (AddF dst src));
10824 format %{ "ADDSS $dst,$src" %}
10825 ins_encode %{
10826 __ addss($dst$$XMMRegister, $src$$XMMRegister);
10827 %}
10828 ins_pipe( pipe_slow );
10829 %}
10830
10831 instruct addX_imm(regX dst, immXF con) %{
10832 predicate(UseSSE>=1);
10833 match(Set dst (AddF dst con));
10834 format %{ "ADDSS $dst,[$constantaddress]\t# load from constant table: float=$con" %}
10835 ins_encode %{
10836 __ addss($dst$$XMMRegister, $constantaddress($con));
10837 %}
10838 ins_pipe(pipe_slow);
10839 %}
10840
10841 instruct addX_mem(regX dst, memory mem) %{
10842 predicate(UseSSE>=1);
10843 match(Set dst (AddF dst (LoadF mem)));
10844 format %{ "ADDSS $dst,$mem" %}
10845 ins_encode %{
10846 __ addss($dst$$XMMRegister, $mem$$Address);
10847 %}
10848 ins_pipe( pipe_slow );
10849 %}
10850
10851 // Subtract two single precision floating point values in xmm
10852 instruct subX_reg(regX dst, regX src) %{
10853 predicate(UseSSE>=1);
10854 match(Set dst (SubF dst src));
10855 ins_cost(150);
10856 format %{ "SUBSS $dst,$src" %}
10857 ins_encode %{
10858 __ subss($dst$$XMMRegister, $src$$XMMRegister);
10859 %}
10860 ins_pipe( pipe_slow );
10861 %}
10862
10863 instruct subX_imm(regX dst, immXF con) %{
10864 predicate(UseSSE>=1);
10865 match(Set dst (SubF dst con));
10866 ins_cost(150);
10867 format %{ "SUBSS $dst,[$constantaddress]\t# load from constant table: float=$con" %}
10868 ins_encode %{
10869 __ subss($dst$$XMMRegister, $constantaddress($con));
10870 %}
10871 ins_pipe(pipe_slow);
10872 %}
10873
10874 instruct subX_mem(regX dst, memory mem) %{
10875 predicate(UseSSE>=1);
10876 match(Set dst (SubF dst (LoadF mem)));
10877 ins_cost(150);
10878 format %{ "SUBSS $dst,$mem" %}
10879 ins_encode %{
10880 __ subss($dst$$XMMRegister, $mem$$Address);
10881 %}
10882 ins_pipe( pipe_slow );
10883 %}
10884
10885 // Multiply two single precision floating point values in xmm
10886 instruct mulX_reg(regX dst, regX src) %{
10887 predicate(UseSSE>=1);
10888 match(Set dst (MulF dst src));
10889 format %{ "MULSS $dst,$src" %}
10890 ins_encode %{
10891 __ mulss($dst$$XMMRegister, $src$$XMMRegister);
10892 %}
10893 ins_pipe( pipe_slow );
10894 %}
10895
10896 instruct mulX_imm(regX dst, immXF con) %{
10897 predicate(UseSSE>=1);
10898 match(Set dst (MulF dst con));
10899 format %{ "MULSS $dst,[$constantaddress]\t# load from constant table: float=$con" %}
10900 ins_encode %{
10901 __ mulss($dst$$XMMRegister, $constantaddress($con));
10902 %}
10903 ins_pipe(pipe_slow);
10904 %}
10905
10906 instruct mulX_mem(regX dst, memory mem) %{
10907 predicate(UseSSE>=1);
10908 match(Set dst (MulF dst (LoadF mem)));
10909 format %{ "MULSS $dst,$mem" %}
10910 ins_encode %{
10911 __ mulss($dst$$XMMRegister, $mem$$Address);
10912 %}
10913 ins_pipe( pipe_slow );
10914 %}
10915
10916 // Divide two single precision floating point values in xmm
10917 instruct divX_reg(regX dst, regX src) %{
10918 predicate(UseSSE>=1);
10919 match(Set dst (DivF dst src));
10920 format %{ "DIVSS $dst,$src" %}
10921 ins_encode %{
10922 __ divss($dst$$XMMRegister, $src$$XMMRegister);
10923 %}
10924 ins_pipe( pipe_slow );
10925 %}
10926
10927 instruct divX_imm(regX dst, immXF con) %{
10928 predicate(UseSSE>=1);
10929 match(Set dst (DivF dst con));
10930 format %{ "DIVSS $dst,[$constantaddress]\t# load from constant table: float=$con" %}
10931 ins_encode %{
10932 __ divss($dst$$XMMRegister, $constantaddress($con));
10933 %}
10934 ins_pipe(pipe_slow);
10935 %}
10936
10937 instruct divX_mem(regX dst, memory mem) %{
10938 predicate(UseSSE>=1);
10939 match(Set dst (DivF dst (LoadF mem)));
10940 format %{ "DIVSS $dst,$mem" %}
10941 ins_encode %{
10942 __ divss($dst$$XMMRegister, $mem$$Address);
10943 %}
10944 ins_pipe( pipe_slow );
10945 %}
10946
10947 // Get the square root of a single precision floating point values in xmm
10948 instruct sqrtX_reg(regX dst, regX src) %{
10949 predicate(UseSSE>=1);
10950 match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
10951 ins_cost(150);
10952 format %{ "SQRTSS $dst,$src" %}
10953 ins_encode %{
10954 __ sqrtss($dst$$XMMRegister, $src$$XMMRegister);
10955 %}
10956 ins_pipe( pipe_slow );
10957 %}
10958
10959 instruct sqrtX_mem(regX dst, memory mem) %{
10960 predicate(UseSSE>=1);
10961 match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF mem)))));
10962 ins_cost(150);
10963 format %{ "SQRTSS $dst,$mem" %}
10964 ins_encode %{
10965 __ sqrtss($dst$$XMMRegister, $mem$$Address);
10966 %}
10967 ins_pipe( pipe_slow );
10968 %}
10969
10970 // Get the square root of a double precision floating point values in xmm
10971 instruct sqrtXD_reg(regXD dst, regXD src) %{
10972 predicate(UseSSE>=2);
10973 match(Set dst (SqrtD src));
10974 ins_cost(150);
10975 format %{ "SQRTSD $dst,$src" %}
10976 ins_encode %{
10977 __ sqrtsd($dst$$XMMRegister, $src$$XMMRegister);
10978 %}
10979 ins_pipe( pipe_slow );
10980 %}
10981
10982 instruct sqrtXD_mem(regXD dst, memory mem) %{
10983 predicate(UseSSE>=2);
10984 match(Set dst (SqrtD (LoadD mem)));
10985 ins_cost(150);
10986 format %{ "SQRTSD $dst,$mem" %}
10987 ins_encode %{
10988 __ sqrtsd($dst$$XMMRegister, $mem$$Address);
10989 %}
10990 ins_pipe( pipe_slow );
10991 %}
10992
10993 instruct absF_reg(regFPR1 dst, regFPR1 src) %{
10994 predicate(UseSSE==0);
10995 match(Set dst (AbsF src));
10996 ins_cost(100);
10997 format %{ "FABS" %}
10998 opcode(0xE1, 0xD9);
10999 ins_encode( OpcS, OpcP );
11000 ins_pipe( fpu_reg_reg );
11001 %}
11002
11003 instruct absX_reg(regX dst ) %{
11004 predicate(UseSSE>=1);
11005 match(Set dst (AbsF dst));
11006 ins_cost(150);
11007 format %{ "ANDPS $dst,[0x7FFFFFFF]\t# ABS F by sign masking" %}
11008 ins_encode %{
11009 __ andps($dst$$XMMRegister,
11010 ExternalAddress((address)float_signmask_pool));
11011 %}
11012 ins_pipe( pipe_slow );
11013 %}
11014
11015 instruct negF_reg(regFPR1 dst, regFPR1 src) %{
11016 predicate(UseSSE==0);
11017 match(Set dst (NegF src));
11018 ins_cost(100);
11019 format %{ "FCHS" %}
11020 opcode(0xE0, 0xD9);
11021 ins_encode( OpcS, OpcP );
11022 ins_pipe( fpu_reg_reg );
11023 %}
11024
11025 instruct negX_reg( regX dst ) %{
11026 predicate(UseSSE>=1);
11027 match(Set dst (NegF dst));
11028 ins_cost(150);
11029 format %{ "XORPS $dst,[0x80000000]\t# CHS F by sign flipping" %}
11030 ins_encode %{
11031 __ xorps($dst$$XMMRegister,
11032 ExternalAddress((address)float_signflip_pool));
11033 %}
11034 ins_pipe( pipe_slow );
11035 %}
11036
11037 // Cisc-alternate to addF_reg
11038 // Spill to obtain 24-bit precision
11039 instruct addF24_reg_mem(stackSlotF dst, regF src1, memory src2) %{
11040 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11041 match(Set dst (AddF src1 (LoadF src2)));
11042
11043 format %{ "FLD $src2\n\t"
11044 "FADD ST,$src1\n\t"
11045 "FSTP_S $dst" %}
11046 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */
11047 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
11048 OpcReg_F(src1),
11049 Pop_Mem_F(dst) );
11050 ins_pipe( fpu_mem_reg_mem );
11051 %}
11052 //
11053 // Cisc-alternate to addF_reg
11419
11420 // Force rounding to 24-bit precision and 6-bit exponent
11421 instruct convD2F_reg(stackSlotF dst, regD src) %{
11422 predicate(UseSSE==0);
11423 match(Set dst (ConvD2F src));
11424 format %{ "FST_S $dst,$src\t# F-round" %}
11425 expand %{
11426 roundFloat_mem_reg(dst,src);
11427 %}
11428 %}
11429
11430 // Force rounding to 24-bit precision and 6-bit exponent
11431 instruct convD2X_reg(regX dst, regD src, eFlagsReg cr) %{
11432 predicate(UseSSE==1);
11433 match(Set dst (ConvD2F src));
11434 effect( KILL cr );
11435 format %{ "SUB ESP,4\n\t"
11436 "FST_S [ESP],$src\t# F-round\n\t"
11437 "MOVSS $dst,[ESP]\n\t"
11438 "ADD ESP,4" %}
11439 ins_encode %{
11440 __ subptr(rsp, 4);
11441 if ($src$$reg != FPR1L_enc) {
11442 __ fld_s($src$$reg-1);
11443 __ fstp_s(Address(rsp, 0));
11444 } else {
11445 __ fst_s(Address(rsp, 0));
11446 }
11447 __ movflt($dst$$XMMRegister, Address(rsp, 0));
11448 __ addptr(rsp, 4);
11449 %}
11450 ins_pipe( pipe_slow );
11451 %}
11452
11453 // Force rounding double precision to single precision
11454 instruct convXD2X_reg(regX dst, regXD src) %{
11455 predicate(UseSSE>=2);
11456 match(Set dst (ConvD2F src));
11457 format %{ "CVTSD2SS $dst,$src\t# F-round" %}
11458 ins_encode %{
11459 __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
11460 %}
11461 ins_pipe( pipe_slow );
11462 %}
11463
11464 instruct convF2D_reg_reg(regD dst, regF src) %{
11465 predicate(UseSSE==0);
11466 match(Set dst (ConvF2D src));
11467 format %{ "FST_S $dst,$src\t# D-round" %}
11468 ins_encode( Pop_Reg_Reg_D(dst, src));
11469 ins_pipe( fpu_reg_reg );
11470 %}
11471
11472 instruct convF2D_reg(stackSlotD dst, regF src) %{
11473 predicate(UseSSE==1);
11474 match(Set dst (ConvF2D src));
11475 format %{ "FST_D $dst,$src\t# D-round" %}
11476 expand %{
11477 roundDouble_mem_reg(dst,src);
11478 %}
11479 %}
11480
11481 instruct convX2D_reg(regD dst, regX src, eFlagsReg cr) %{
11482 predicate(UseSSE==1);
11483 match(Set dst (ConvF2D src));
11484 effect( KILL cr );
11485 format %{ "SUB ESP,4\n\t"
11486 "MOVSS [ESP] $src\n\t"
11487 "FLD_S [ESP]\n\t"
11488 "ADD ESP,4\n\t"
11489 "FSTP $dst\t# D-round" %}
11490 ins_encode %{
11491 __ subptr(rsp, 4);
11492 __ movflt(Address(rsp, 0), $src$$XMMRegister);
11493 __ fld_s(Address(rsp, 0));
11494 __ addptr(rsp, 4);
11495 __ fstp_d($dst$$reg);
11496 %}
11497 ins_pipe( pipe_slow );
11498 %}
11499
11500 instruct convX2XD_reg(regXD dst, regX src) %{
11501 predicate(UseSSE>=2);
11502 match(Set dst (ConvF2D src));
11503 format %{ "CVTSS2SD $dst,$src\t# D-round" %}
11504 ins_encode %{
11505 __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
11506 %}
11507 ins_pipe( pipe_slow );
11508 %}
11509
11510 // Convert a double to an int. If the double is a NAN, stuff a zero in instead.
11511 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
11512 predicate(UseSSE<=1);
11513 match(Set dst (ConvD2I src));
11514 effect( KILL tmp, KILL cr );
11515 format %{ "FLD $src\t# Convert double to int \n\t"
11516 "FLDCW trunc mode\n\t"
11517 "SUB ESP,4\n\t"
11518 "FISTp [ESP + #0]\n\t"
11519 "FLDCW std/24-bit mode\n\t"
11520 "POP EAX\n\t"
11521 "CMP EAX,0x80000000\n\t"
11522 "JNE,s fast\n\t"
11523 "FLD_D $src\n\t"
11524 "CALL d2i_wrapper\n"
11525 "fast:" %}
11526 ins_encode( Push_Reg_D(src), D2I_encoding(src) );
11527 ins_pipe( pipe_slow );
11528 %}
11529
11530 // Convert a double to an int. If the double is a NAN, stuff a zero in instead.
11531 instruct convXD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regXD src, eFlagsReg cr ) %{
11532 predicate(UseSSE>=2);
11533 match(Set dst (ConvD2I src));
11534 effect( KILL tmp, KILL cr );
11535 format %{ "CVTTSD2SI $dst, $src\n\t"
11536 "CMP $dst,0x80000000\n\t"
11537 "JNE,s fast\n\t"
11538 "SUB ESP, 8\n\t"
11539 "MOVSD [ESP], $src\n\t"
11540 "FLD_D [ESP]\n\t"
11541 "ADD ESP, 8\n\t"
11542 "CALL d2i_wrapper\n"
11543 "fast:" %}
11544 ins_encode %{
11545 Label fast;
11546 __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
11547 __ cmpl($dst$$Register, 0x80000000);
11548 __ jccb(Assembler::notEqual, fast);
11549 __ subptr(rsp, 8);
11550 __ movdbl(Address(rsp, 0), $src$$XMMRegister);
11551 __ fld_d(Address(rsp, 0));
11552 __ addptr(rsp, 8);
11553 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper())));
11554 __ bind(fast);
11555 %}
11556 ins_pipe( pipe_slow );
11557 %}
11558
11559 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
11560 predicate(UseSSE<=1);
11561 match(Set dst (ConvD2L src));
11562 effect( KILL cr );
11563 format %{ "FLD $src\t# Convert double to long\n\t"
11564 "FLDCW trunc mode\n\t"
11565 "SUB ESP,8\n\t"
11566 "FISTp [ESP + #0]\n\t"
11567 "FLDCW std/24-bit mode\n\t"
11568 "POP EAX\n\t"
11569 "POP EDX\n\t"
11570 "CMP EDX,0x80000000\n\t"
11571 "JNE,s fast\n\t"
11572 "TEST EAX,EAX\n\t"
11573 "JNE,s fast\n\t"
11574 "FLD $src\n\t"
11575 "CALL d2l_wrapper\n"
11581 // XMM lacks a float/double->long conversion, so use the old FPU stack.
11582 instruct convXD2L_reg_reg( eADXRegL dst, regXD src, eFlagsReg cr ) %{
11583 predicate (UseSSE>=2);
11584 match(Set dst (ConvD2L src));
11585 effect( KILL cr );
11586 format %{ "SUB ESP,8\t# Convert double to long\n\t"
11587 "MOVSD [ESP],$src\n\t"
11588 "FLD_D [ESP]\n\t"
11589 "FLDCW trunc mode\n\t"
11590 "FISTp [ESP + #0]\n\t"
11591 "FLDCW std/24-bit mode\n\t"
11592 "POP EAX\n\t"
11593 "POP EDX\n\t"
11594 "CMP EDX,0x80000000\n\t"
11595 "JNE,s fast\n\t"
11596 "TEST EAX,EAX\n\t"
11597 "JNE,s fast\n\t"
11598 "SUB ESP,8\n\t"
11599 "MOVSD [ESP],$src\n\t"
11600 "FLD_D [ESP]\n\t"
11601 "ADD ESP,8\n\t"
11602 "CALL d2l_wrapper\n"
11603 "fast:" %}
11604 ins_encode %{
11605 Label fast;
11606 __ subptr(rsp, 8);
11607 __ movdbl(Address(rsp, 0), $src$$XMMRegister);
11608 __ fld_d(Address(rsp, 0));
11609 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc()));
11610 __ fistp_d(Address(rsp, 0));
11611 // Restore the rounding mode, mask the exception
11612 if (Compile::current()->in_24_bit_fp_mode()) {
11613 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
11614 } else {
11615 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
11616 }
11617 // Load the converted long, adjust CPU stack
11618 __ pop(rax);
11619 __ pop(rdx);
11620 __ cmpl(rdx, 0x80000000);
11621 __ jccb(Assembler::notEqual, fast);
11622 __ testl(rax, rax);
11623 __ jccb(Assembler::notEqual, fast);
11624 __ subptr(rsp, 8);
11625 __ movdbl(Address(rsp, 0), $src$$XMMRegister);
11626 __ fld_d(Address(rsp, 0));
11627 __ addptr(rsp, 8);
11628 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper())));
11629 __ bind(fast);
11630 %}
11631 ins_pipe( pipe_slow );
11632 %}
11633
11634 // Convert a double to an int. Java semantics require we do complex
11635 // manglations in the corner cases. So we set the rounding mode to
11636 // 'zero', store the darned double down as an int, and reset the
11637 // rounding mode to 'nearest'. The hardware stores a flag value down
11638 // if we would overflow or converted a NAN; we check for this and
11639 // and go the slow path if needed.
11640 instruct convF2I_reg_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
11641 predicate(UseSSE==0);
11642 match(Set dst (ConvF2I src));
11643 effect( KILL tmp, KILL cr );
11644 format %{ "FLD $src\t# Convert float to int \n\t"
11645 "FLDCW trunc mode\n\t"
11646 "SUB ESP,4\n\t"
11647 "FISTp [ESP + #0]\n\t"
11648 "FLDCW std/24-bit mode\n\t"
11649 "POP EAX\n\t"
11650 "CMP EAX,0x80000000\n\t"
11654 "fast:" %}
11655 // D2I_encoding works for F2I
11656 ins_encode( Push_Reg_F(src), D2I_encoding(src) );
11657 ins_pipe( pipe_slow );
11658 %}
11659
11660 // Convert a float in xmm to an int reg.
11661 instruct convX2I_reg(eAXRegI dst, eDXRegI tmp, regX src, eFlagsReg cr ) %{
11662 predicate(UseSSE>=1);
11663 match(Set dst (ConvF2I src));
11664 effect( KILL tmp, KILL cr );
11665 format %{ "CVTTSS2SI $dst, $src\n\t"
11666 "CMP $dst,0x80000000\n\t"
11667 "JNE,s fast\n\t"
11668 "SUB ESP, 4\n\t"
11669 "MOVSS [ESP], $src\n\t"
11670 "FLD [ESP]\n\t"
11671 "ADD ESP, 4\n\t"
11672 "CALL d2i_wrapper\n"
11673 "fast:" %}
11674 ins_encode %{
11675 Label fast;
11676 __ cvttss2sil($dst$$Register, $src$$XMMRegister);
11677 __ cmpl($dst$$Register, 0x80000000);
11678 __ jccb(Assembler::notEqual, fast);
11679 __ subptr(rsp, 4);
11680 __ movflt(Address(rsp, 0), $src$$XMMRegister);
11681 __ fld_s(Address(rsp, 0));
11682 __ addptr(rsp, 4);
11683 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper())));
11684 __ bind(fast);
11685 %}
11686 ins_pipe( pipe_slow );
11687 %}
11688
11689 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
11690 predicate(UseSSE==0);
11691 match(Set dst (ConvF2L src));
11692 effect( KILL cr );
11693 format %{ "FLD $src\t# Convert float to long\n\t"
11694 "FLDCW trunc mode\n\t"
11695 "SUB ESP,8\n\t"
11696 "FISTp [ESP + #0]\n\t"
11697 "FLDCW std/24-bit mode\n\t"
11698 "POP EAX\n\t"
11699 "POP EDX\n\t"
11700 "CMP EDX,0x80000000\n\t"
11701 "JNE,s fast\n\t"
11702 "TEST EAX,EAX\n\t"
11703 "JNE,s fast\n\t"
11704 "FLD $src\n\t"
11705 "CALL d2l_wrapper\n"
11715 match(Set dst (ConvF2L src));
11716 effect( KILL cr );
11717 format %{ "SUB ESP,8\t# Convert float to long\n\t"
11718 "MOVSS [ESP],$src\n\t"
11719 "FLD_S [ESP]\n\t"
11720 "FLDCW trunc mode\n\t"
11721 "FISTp [ESP + #0]\n\t"
11722 "FLDCW std/24-bit mode\n\t"
11723 "POP EAX\n\t"
11724 "POP EDX\n\t"
11725 "CMP EDX,0x80000000\n\t"
11726 "JNE,s fast\n\t"
11727 "TEST EAX,EAX\n\t"
11728 "JNE,s fast\n\t"
11729 "SUB ESP,4\t# Convert float to long\n\t"
11730 "MOVSS [ESP],$src\n\t"
11731 "FLD_S [ESP]\n\t"
11732 "ADD ESP,4\n\t"
11733 "CALL d2l_wrapper\n"
11734 "fast:" %}
11735 ins_encode %{
11736 Label fast;
11737 __ subptr(rsp, 8);
11738 __ movflt(Address(rsp, 0), $src$$XMMRegister);
11739 __ fld_s(Address(rsp, 0));
11740 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc()));
11741 __ fistp_d(Address(rsp, 0));
11742 // Restore the rounding mode, mask the exception
11743 if (Compile::current()->in_24_bit_fp_mode()) {
11744 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
11745 } else {
11746 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
11747 }
11748 // Load the converted long, adjust CPU stack
11749 __ pop(rax);
11750 __ pop(rdx);
11751 __ cmpl(rdx, 0x80000000);
11752 __ jccb(Assembler::notEqual, fast);
11753 __ testl(rax, rax);
11754 __ jccb(Assembler::notEqual, fast);
11755 __ subptr(rsp, 4);
11756 __ movflt(Address(rsp, 0), $src$$XMMRegister);
11757 __ fld_s(Address(rsp, 0));
11758 __ addptr(rsp, 4);
11759 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper())));
11760 __ bind(fast);
11761 %}
11762 ins_pipe( pipe_slow );
11763 %}
11764
11765 instruct convI2D_reg(regD dst, stackSlotI src) %{
11766 predicate( UseSSE<=1 );
11767 match(Set dst (ConvI2D src));
11768 format %{ "FILD $src\n\t"
11769 "FSTP $dst" %}
11770 opcode(0xDB, 0x0); /* DB /0 */
11771 ins_encode(Push_Mem_I(src), Pop_Reg_D(dst));
11772 ins_pipe( fpu_reg_mem );
11773 %}
11774
11775 instruct convI2XD_reg(regXD dst, eRegI src) %{
11776 predicate( UseSSE>=2 && !UseXmmI2D );
11777 match(Set dst (ConvI2D src));
11778 format %{ "CVTSI2SD $dst,$src" %}
11779 ins_encode %{
11780 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
11781 %}
11782 ins_pipe( pipe_slow );
11783 %}
11784
11785 instruct convI2XD_mem(regXD dst, memory mem) %{
11786 predicate( UseSSE>=2 );
11787 match(Set dst (ConvI2D (LoadI mem)));
11788 format %{ "CVTSI2SD $dst,$mem" %}
11789 ins_encode %{
11790 __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address);
11791 %}
11792 ins_pipe( pipe_slow );
11793 %}
11794
11795 instruct convXI2XD_reg(regXD dst, eRegI src)
11796 %{
11797 predicate( UseSSE>=2 && UseXmmI2D );
11798 match(Set dst (ConvI2D src));
11799
11800 format %{ "MOVD $dst,$src\n\t"
11801 "CVTDQ2PD $dst,$dst\t# i2d" %}
11802 ins_encode %{
11803 __ movdl($dst$$XMMRegister, $src$$Register);
11804 __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
11805 %}
11806 ins_pipe(pipe_slow); // XXX
11807 %}
11808
11809 instruct convI2D_mem(regD dst, memory mem) %{
11810 predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
11811 match(Set dst (ConvI2D (LoadI mem)));
11867 ins_pipe( fpu_reg_mem );
11868 %}
11869
11870 // This instruction does not round to 24-bits
11871 instruct convI2F_mem(regF dst, memory mem) %{
11872 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11873 match(Set dst (ConvI2F (LoadI mem)));
11874 format %{ "FILD $mem\n\t"
11875 "FSTP $dst" %}
11876 opcode(0xDB); /* DB /0 */
11877 ins_encode( OpcP, RMopc_Mem(0x00,mem),
11878 Pop_Reg_F(dst));
11879 ins_pipe( fpu_reg_mem );
11880 %}
11881
11882 // Convert an int to a float in xmm; no rounding step needed.
11883 instruct convI2X_reg(regX dst, eRegI src) %{
11884 predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F );
11885 match(Set dst (ConvI2F src));
11886 format %{ "CVTSI2SS $dst, $src" %}
11887 ins_encode %{
11888 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
11889 %}
11890 ins_pipe( pipe_slow );
11891 %}
11892
11893 instruct convXI2X_reg(regX dst, eRegI src)
11894 %{
11895 predicate( UseSSE>=2 && UseXmmI2F );
11896 match(Set dst (ConvI2F src));
11897
11898 format %{ "MOVD $dst,$src\n\t"
11899 "CVTDQ2PS $dst,$dst\t# i2f" %}
11900 ins_encode %{
11901 __ movdl($dst$$XMMRegister, $src$$Register);
11902 __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11903 %}
11904 ins_pipe(pipe_slow); // XXX
11905 %}
11906
11907 instruct convI2L_reg( eRegL dst, eRegI src, eFlagsReg cr) %{
11908 match(Set dst (ConvI2L src));
11909 effect(KILL cr);
11993 "FSTP_S $dst\t# F-round" %}
11994 opcode(0xDF, 0x5); /* DF /5 */
11995 ins_encode(convert_long_double(src), Pop_Mem_F(dst));
11996 ins_pipe( pipe_slow );
11997 %}
11998
11999 instruct convL2I_reg( eRegI dst, eRegL src ) %{
12000 match(Set dst (ConvL2I src));
12001 effect( DEF dst, USE src );
12002 format %{ "MOV $dst,$src.lo" %}
12003 ins_encode(enc_CopyL_Lo(dst,src));
12004 ins_pipe( ialu_reg_reg );
12005 %}
12006
12007
12008 instruct MoveF2I_stack_reg(eRegI dst, stackSlotF src) %{
12009 match(Set dst (MoveF2I src));
12010 effect( DEF dst, USE src );
12011 ins_cost(100);
12012 format %{ "MOV $dst,$src\t# MoveF2I_stack_reg" %}
12013 ins_encode %{
12014 __ movl($dst$$Register, Address(rsp, $src$$disp));
12015 %}
12016 ins_pipe( ialu_reg_mem );
12017 %}
12018
12019 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
12020 predicate(UseSSE==0);
12021 match(Set dst (MoveF2I src));
12022 effect( DEF dst, USE src );
12023
12024 ins_cost(125);
12025 format %{ "FST_S $dst,$src\t# MoveF2I_reg_stack" %}
12026 ins_encode( Pop_Mem_Reg_F(dst, src) );
12027 ins_pipe( fpu_mem_reg );
12028 %}
12029
12030 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regX src) %{
12031 predicate(UseSSE>=1);
12032 match(Set dst (MoveF2I src));
12033 effect( DEF dst, USE src );
12034
12035 ins_cost(95);
12036 format %{ "MOVSS $dst,$src\t# MoveF2I_reg_stack_sse" %}
12037 ins_encode %{
12038 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
12039 %}
12040 ins_pipe( pipe_slow );
12041 %}
12042
12043 instruct MoveF2I_reg_reg_sse(eRegI dst, regX src) %{
12044 predicate(UseSSE>=2);
12045 match(Set dst (MoveF2I src));
12046 effect( DEF dst, USE src );
12047 ins_cost(85);
12048 format %{ "MOVD $dst,$src\t# MoveF2I_reg_reg_sse" %}
12049 ins_encode %{
12050 __ movdl($dst$$Register, $src$$XMMRegister);
12051 %}
12052 ins_pipe( pipe_slow );
12053 %}
12054
12055 instruct MoveI2F_reg_stack(stackSlotF dst, eRegI src) %{
12056 match(Set dst (MoveI2F src));
12057 effect( DEF dst, USE src );
12058
12059 ins_cost(100);
12060 format %{ "MOV $dst,$src\t# MoveI2F_reg_stack" %}
12061 ins_encode %{
12062 __ movl(Address(rsp, $dst$$disp), $src$$Register);
12063 %}
12064 ins_pipe( ialu_mem_reg );
12065 %}
12066
12067
12068 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
12069 predicate(UseSSE==0);
12070 match(Set dst (MoveI2F src));
12071 effect(DEF dst, USE src);
12072
12073 ins_cost(125);
12074 format %{ "FLD_S $src\n\t"
12075 "FSTP $dst\t# MoveI2F_stack_reg" %}
12076 opcode(0xD9); /* D9 /0, FLD m32real */
12077 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
12078 Pop_Reg_F(dst) );
12079 ins_pipe( fpu_reg_mem );
12080 %}
12081
12082 instruct MoveI2F_stack_reg_sse(regX dst, stackSlotI src) %{
12083 predicate(UseSSE>=1);
12084 match(Set dst (MoveI2F src));
12085 effect( DEF dst, USE src );
12086
12087 ins_cost(95);
12088 format %{ "MOVSS $dst,$src\t# MoveI2F_stack_reg_sse" %}
12089 ins_encode %{
12090 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
12091 %}
12092 ins_pipe( pipe_slow );
12093 %}
12094
12095 instruct MoveI2F_reg_reg_sse(regX dst, eRegI src) %{
12096 predicate(UseSSE>=2);
12097 match(Set dst (MoveI2F src));
12098 effect( DEF dst, USE src );
12099
12100 ins_cost(85);
12101 format %{ "MOVD $dst,$src\t# MoveI2F_reg_reg_sse" %}
12102 ins_encode %{
12103 __ movdl($dst$$XMMRegister, $src$$Register);
12104 %}
12105 ins_pipe( pipe_slow );
12106 %}
12107
12108 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{
12109 match(Set dst (MoveD2L src));
12110 effect(DEF dst, USE src);
12111
12112 ins_cost(250);
12113 format %{ "MOV $dst.lo,$src\n\t"
12114 "MOV $dst.hi,$src+4\t# MoveD2L_stack_reg" %}
12115 opcode(0x8B, 0x8B);
12116 ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src));
12117 ins_pipe( ialu_mem_long_reg );
12118 %}
12119
12120 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
12121 predicate(UseSSE<=1);
12122 match(Set dst (MoveD2L src));
12123 effect(DEF dst, USE src);
12124
12125 ins_cost(125);
12126 format %{ "FST_D $dst,$src\t# MoveD2L_reg_stack" %}
12127 ins_encode( Pop_Mem_Reg_D(dst, src) );
12128 ins_pipe( fpu_mem_reg );
12129 %}
12130
12131 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regXD src) %{
12132 predicate(UseSSE>=2);
12133 match(Set dst (MoveD2L src));
12134 effect(DEF dst, USE src);
12135 ins_cost(95);
12136 format %{ "MOVSD $dst,$src\t# MoveD2L_reg_stack_sse" %}
12137 ins_encode %{
12138 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
12139 %}
12140 ins_pipe( pipe_slow );
12141 %}
12142
12143 instruct MoveD2L_reg_reg_sse(eRegL dst, regXD src, regXD tmp) %{
12144 predicate(UseSSE>=2);
12145 match(Set dst (MoveD2L src));
12146 effect(DEF dst, USE src, TEMP tmp);
12147 ins_cost(85);
12148 format %{ "MOVD $dst.lo,$src\n\t"
12149 "PSHUFLW $tmp,$src,0x4E\n\t"
12150 "MOVD $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %}
12151 ins_encode %{
12152 __ movdl($dst$$Register, $src$$XMMRegister);
12153 __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e);
12154 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
12155 %}
12156 ins_pipe( pipe_slow );
12157 %}
12158
12159 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{
12160 match(Set dst (MoveL2D src));
12161 effect(DEF dst, USE src);
12162
12163 ins_cost(200);
12164 format %{ "MOV $dst,$src.lo\n\t"
12165 "MOV $dst+4,$src.hi\t# MoveL2D_reg_stack" %}
12166 opcode(0x89, 0x89);
12167 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
12168 ins_pipe( ialu_mem_long_reg );
12169 %}
12170
12171
12172 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
12173 predicate(UseSSE<=1);
12174 match(Set dst (MoveL2D src));
12175 effect(DEF dst, USE src);
12176 ins_cost(125);
12177
12178 format %{ "FLD_D $src\n\t"
12179 "FSTP $dst\t# MoveL2D_stack_reg" %}
12180 opcode(0xDD); /* DD /0, FLD m64real */
12181 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
12182 Pop_Reg_D(dst) );
12183 ins_pipe( fpu_reg_mem );
12184 %}
12185
12186
12187 instruct MoveL2D_stack_reg_sse(regXD dst, stackSlotL src) %{
12188 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
12189 match(Set dst (MoveL2D src));
12190 effect(DEF dst, USE src);
12191
12192 ins_cost(95);
12193 format %{ "MOVSD $dst,$src\t# MoveL2D_stack_reg_sse" %}
12194 ins_encode %{
12195 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
12196 %}
12197 ins_pipe( pipe_slow );
12198 %}
12199
12200 instruct MoveL2D_stack_reg_sse_partial(regXD dst, stackSlotL src) %{
12201 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
12202 match(Set dst (MoveL2D src));
12203 effect(DEF dst, USE src);
12204
12205 ins_cost(95);
12206 format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %}
12207 ins_encode %{
12208 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
12209 %}
12210 ins_pipe( pipe_slow );
12211 %}
12212
12213 instruct MoveL2D_reg_reg_sse(regXD dst, eRegL src, regXD tmp) %{
12214 predicate(UseSSE>=2);
12215 match(Set dst (MoveL2D src));
12216 effect(TEMP dst, USE src, TEMP tmp);
12217 ins_cost(85);
12218 format %{ "MOVD $dst,$src.lo\n\t"
12219 "MOVD $tmp,$src.hi\n\t"
12220 "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %}
12221 ins_encode %{
12222 __ movdl($dst$$XMMRegister, $src$$Register);
12223 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
12224 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
12225 %}
12226 ins_pipe( pipe_slow );
12227 %}
12228
12229 // Replicate scalar to packed byte (1 byte) values in xmm
12230 instruct Repl8B_reg(regXD dst, regXD src) %{
12231 predicate(UseSSE>=2);
12232 match(Set dst (Replicate8B src));
12233 format %{ "MOVDQA $dst,$src\n\t"
12234 "PUNPCKLBW $dst,$dst\n\t"
12235 "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
12236 ins_encode %{
12237 if ($dst$$reg != $src$$reg) {
12238 __ movdqa($dst$$XMMRegister, $src$$XMMRegister);
12239 }
12240 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
12241 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
12242 %}
12243 ins_pipe( pipe_slow );
12244 %}
12245
12246 // Replicate scalar to packed byte (1 byte) values in xmm
12247 instruct Repl8B_eRegI(regXD dst, eRegI src) %{
12248 predicate(UseSSE>=2);
12249 match(Set dst (Replicate8B src));
12250 format %{ "MOVD $dst,$src\n\t"
12251 "PUNPCKLBW $dst,$dst\n\t"
12252 "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
12253 ins_encode %{
12254 __ movdl($dst$$XMMRegister, $src$$Register);
12255 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
12256 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
12257 %}
12258 ins_pipe( pipe_slow );
12259 %}
12260
12261 // Replicate scalar zero to packed byte (1 byte) values in xmm
12262 instruct Repl8B_immI0(regXD dst, immI0 zero) %{
12263 predicate(UseSSE>=2);
12264 match(Set dst (Replicate8B zero));
12265 format %{ "PXOR $dst,$dst\t! replicate8B" %}
12266 ins_encode %{
12267 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
12268 %}
12269 ins_pipe( fpu_reg_reg );
12270 %}
12271
12272 // Replicate scalar to packed shore (2 byte) values in xmm
12273 instruct Repl4S_reg(regXD dst, regXD src) %{
12274 predicate(UseSSE>=2);
12275 match(Set dst (Replicate4S src));
12276 format %{ "PSHUFLW $dst,$src,0x00\t! replicate4S" %}
12277 ins_encode %{
12278 __ pshuflw($dst$$XMMRegister, $src$$XMMRegister, 0x00);
12279 %}
12280 ins_pipe( fpu_reg_reg );
12281 %}
12282
12283 // Replicate scalar to packed shore (2 byte) values in xmm
12284 instruct Repl4S_eRegI(regXD dst, eRegI src) %{
12285 predicate(UseSSE>=2);
12286 match(Set dst (Replicate4S src));
12287 format %{ "MOVD $dst,$src\n\t"
12288 "PSHUFLW $dst,$dst,0x00\t! replicate4S" %}
12289 ins_encode %{
12290 __ movdl($dst$$XMMRegister, $src$$Register);
12291 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
12292 %}
12293 ins_pipe( fpu_reg_reg );
12294 %}
12295
12296 // Replicate scalar zero to packed short (2 byte) values in xmm
12297 instruct Repl4S_immI0(regXD dst, immI0 zero) %{
12298 predicate(UseSSE>=2);
12299 match(Set dst (Replicate4S zero));
12300 format %{ "PXOR $dst,$dst\t! replicate4S" %}
12301 ins_encode %{
12302 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
12303 %}
12304 ins_pipe( fpu_reg_reg );
12305 %}
12306
12307 // Replicate scalar to packed char (2 byte) values in xmm
12308 instruct Repl4C_reg(regXD dst, regXD src) %{
12309 predicate(UseSSE>=2);
12310 match(Set dst (Replicate4C src));
12311 format %{ "PSHUFLW $dst,$src,0x00\t! replicate4C" %}
12312 ins_encode %{
12313 __ pshuflw($dst$$XMMRegister, $src$$XMMRegister, 0x00);
12314 %}
12315 ins_pipe( fpu_reg_reg );
12316 %}
12317
12318 // Replicate scalar to packed char (2 byte) values in xmm
12319 instruct Repl4C_eRegI(regXD dst, eRegI src) %{
12320 predicate(UseSSE>=2);
12321 match(Set dst (Replicate4C src));
12322 format %{ "MOVD $dst,$src\n\t"
12323 "PSHUFLW $dst,$dst,0x00\t! replicate4C" %}
12324 ins_encode %{
12325 __ movdl($dst$$XMMRegister, $src$$Register);
12326 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
12327 %}
12328 ins_pipe( fpu_reg_reg );
12329 %}
12330
12331 // Replicate scalar zero to packed char (2 byte) values in xmm
12332 instruct Repl4C_immI0(regXD dst, immI0 zero) %{
12333 predicate(UseSSE>=2);
12334 match(Set dst (Replicate4C zero));
12335 format %{ "PXOR $dst,$dst\t! replicate4C" %}
12336 ins_encode %{
12337 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
12338 %}
12339 ins_pipe( fpu_reg_reg );
12340 %}
12341
12342 // Replicate scalar to packed integer (4 byte) values in xmm
12343 instruct Repl2I_reg(regXD dst, regXD src) %{
12344 predicate(UseSSE>=2);
12345 match(Set dst (Replicate2I src));
12346 format %{ "PSHUFD $dst,$src,0x00\t! replicate2I" %}
12347 ins_encode %{
12348 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
12349 %}
12350 ins_pipe( fpu_reg_reg );
12351 %}
12352
12353 // Replicate scalar to packed integer (4 byte) values in xmm
12354 instruct Repl2I_eRegI(regXD dst, eRegI src) %{
12355 predicate(UseSSE>=2);
12356 match(Set dst (Replicate2I src));
12357 format %{ "MOVD $dst,$src\n\t"
12358 "PSHUFD $dst,$dst,0x00\t! replicate2I" %}
12359 ins_encode %{
12360 __ movdl($dst$$XMMRegister, $src$$Register);
12361 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
12362 %}
12363 ins_pipe( fpu_reg_reg );
12364 %}
12365
12366 // Replicate scalar zero to packed integer (2 byte) values in xmm
12367 instruct Repl2I_immI0(regXD dst, immI0 zero) %{
12368 predicate(UseSSE>=2);
12369 match(Set dst (Replicate2I zero));
12370 format %{ "PXOR $dst,$dst\t! replicate2I" %}
12371 ins_encode %{
12372 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
12373 %}
12374 ins_pipe( fpu_reg_reg );
12375 %}
12376
12377 // Replicate scalar to packed single precision floating point values in xmm
12378 instruct Repl2F_reg(regXD dst, regXD src) %{
12379 predicate(UseSSE>=2);
12380 match(Set dst (Replicate2F src));
12381 format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
12382 ins_encode %{
12383 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0xe0);
12384 %}
12385 ins_pipe( fpu_reg_reg );
12386 %}
12387
12388 // Replicate scalar to packed single precision floating point values in xmm
12389 instruct Repl2F_regX(regXD dst, regX src) %{
12390 predicate(UseSSE>=2);
12391 match(Set dst (Replicate2F src));
12392 format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
12393 ins_encode %{
12394 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0xe0);
12395 %}
12396 ins_pipe( fpu_reg_reg );
12397 %}
12398
12399 // Replicate scalar to packed single precision floating point values in xmm
12400 instruct Repl2F_immXF0(regXD dst, immXF0 zero) %{
12401 predicate(UseSSE>=2);
12402 match(Set dst (Replicate2F zero));
12403 format %{ "PXOR $dst,$dst\t! replicate2F" %}
12404 ins_encode %{
12405 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
12406 %}
12407 ins_pipe( fpu_reg_reg );
12408 %}
12409
12410 // =======================================================================
12411 // fast clearing of an array
12412 instruct rep_stos(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
12413 match(Set dummy (ClearArray cnt base));
12414 effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
12415 format %{ "SHL ECX,1\t# Convert doublewords to words\n\t"
12416 "XOR EAX,EAX\n\t"
12417 "REP STOS\t# store EAX into [EDI++] while ECX--" %}
12418 opcode(0,0x4);
12419 ins_encode( Opcode(0xD1), RegOpc(ECX),
12420 OpcRegReg(0x33,EAX,EAX),
12421 Opcode(0xF3), Opcode(0xAB) );
12422 ins_pipe( pipe_slow );
12423 %}
12424
12425 instruct string_compare(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
12426 eAXRegI result, regXD tmp1, eFlagsReg cr) %{
|