< prev index next >

src/cpu/x86/vm/x86.ad

Print this page




1737         break;
1738       case Op_URShiftVS:
1739       case Op_RShiftVS:
1740       case Op_LShiftVS:
1741       case Op_MulVS:
1742       case Op_AddVS:
1743       case Op_SubVS:
1744         if ((vlen == 32) && (VM_Version::supports_avx512bw() == false))
1745           ret_value = false;
1746         break;
1747       case Op_CMoveVD:
1748         if (vlen != 4)
1749           ret_value  = false;
1750         break;
1751     }
1752   }
1753 
1754   return ret_value;  // Per default match rules are supported.
1755 }
1756 
















1757 const int Matcher::float_pressure(int default_pressure_threshold) {
1758   int float_pressure_threshold = default_pressure_threshold;
1759 #ifdef _LP64
1760   if (UseAVX > 2) {
1761     // Increase pressure threshold on machines with AVX3 which have
1762     // 2x more XMM registers.
1763     float_pressure_threshold = default_pressure_threshold * 2;
1764   }
1765 #endif
1766   return float_pressure_threshold;
1767 }
1768 
1769 // Max vector size in bytes. 0 if not supported.
1770 const int Matcher::vector_width_in_bytes(BasicType bt) {
1771   assert(is_java_primitive(bt), "only primitive type vectors");
1772   if (UseSSE < 2) return 0;
1773   // SSE2 supports 128bit vectors for all types.
1774   // AVX2 supports 256bit vectors for all types.
1775   // AVX2/EVEX supports 512bit vectors for all types.
1776   int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16;


1854   // In 64-bit VM size calculation is very complex. Emitting instructions
1855   // into scratch buffer is used to get size in 64-bit VM.
1856   LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); )
1857   assert(ireg == Op_VecS || // 32bit vector
1858          (src_lo & 1) == 0 && (src_lo + 1) == src_hi &&
1859          (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi,
1860          "no non-adjacent vector moves" );
1861   if (cbuf) {
1862     MacroAssembler _masm(cbuf);
1863     int offset = __ offset();
1864     switch (ireg) {
1865     case Op_VecS: // copy whole register
1866     case Op_VecD:
1867     case Op_VecX:
1868       __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
1869       break;
1870     case Op_VecY:
1871       __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
1872       break;
1873     case Op_VecZ:
1874       __ evmovdqul(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2);
1875       break;
1876     default:
1877       ShouldNotReachHere();
1878     }
1879     int size = __ offset() - offset;
1880 #ifdef ASSERT
1881     // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
1882     assert(!do_size || size == 4, "incorrect size calculattion");
1883 #endif
1884     return size;
1885 #ifndef PRODUCT
1886   } else if (!do_size) {
1887     switch (ireg) {
1888     case Op_VecS:
1889     case Op_VecD:
1890     case Op_VecX:
1891       st->print("movdqu  %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
1892       break;
1893     case Op_VecY:
1894     case Op_VecZ:


1909   // into scratch buffer is used to get size in 64-bit VM.
1910   LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); )
1911   if (cbuf) {
1912     MacroAssembler _masm(cbuf);
1913     int offset = __ offset();
1914     if (is_load) {
1915       switch (ireg) {
1916       case Op_VecS:
1917         __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
1918         break;
1919       case Op_VecD:
1920         __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
1921         break;
1922       case Op_VecX:
1923         __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
1924         break;
1925       case Op_VecY:
1926         __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
1927         break;
1928       case Op_VecZ:
1929         __ evmovdqul(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2);
1930         break;
1931       default:
1932         ShouldNotReachHere();
1933       }
1934     } else { // store
1935       switch (ireg) {
1936       case Op_VecS:
1937         __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
1938         break;
1939       case Op_VecD:
1940         __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
1941         break;
1942       case Op_VecX:
1943         __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
1944         break;
1945       case Op_VecY:
1946         __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
1947         break;
1948       case Op_VecZ:
1949         __ evmovdqul(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2);
1950         break;
1951       default:
1952         ShouldNotReachHere();
1953       }
1954     }
1955     int size = __ offset() - offset;
1956 #ifdef ASSERT
1957     int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : (UseAVX > 2) ? 6 : 4);
1958     // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
1959     assert(!do_size || size == (5+offset_size), "incorrect size calculattion");
1960 #endif
1961     return size;
1962 #ifndef PRODUCT
1963   } else if (!do_size) {
1964     if (is_load) {
1965       switch (ireg) {
1966       case Op_VecS:
1967         st->print("movd    %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
1968         break;
1969       case Op_VecD:


2069   }
2070   jdouble dval = *((jdouble*) &val);  // coerce to double type
2071   return dval;
2072 }
2073 
2074 #ifndef PRODUCT
2075   void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
2076     st->print("nop \t# %d bytes pad for loops and calls", _count);
2077   }
2078 #endif
2079 
2080   void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const {
2081     MacroAssembler _masm(&cbuf);
2082     __ nop(_count);
2083   }
2084 
2085   uint MachNopNode::size(PhaseRegAlloc*) const {
2086     return _count;
2087   }
2088 















2089 #ifndef PRODUCT
2090   void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const {
2091     st->print("# breakpoint");
2092   }
2093 #endif
2094 
2095   void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const {
2096     MacroAssembler _masm(&cbuf);
2097     __ int3();
2098   }
2099 
2100   uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
2101     return MachNode::size(ra_);
2102   }
2103 
2104 %}
2105 
2106 encode %{
2107 
2108   enc_class call_epilog %{


2155     // equal' for overflow.
2156     overflow     (0x20, "o");  // not really supported by the instruction
2157     no_overflow  (0x21, "no"); // not really supported by the instruction
2158   %}
2159 %}
2160 
2161 
2162 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit)
2163 
2164 // ============================================================================
2165 
2166 instruct ShouldNotReachHere() %{
2167   match(Halt);
2168   format %{ "int3\t# ShouldNotReachHere" %}
2169   ins_encode %{
2170     __ int3();
2171   %}
2172   ins_pipe(pipe_slow);
2173 %}
2174 













2175 // ============================================================================
2176 
2177 instruct addF_reg(regF dst, regF src) %{
2178   predicate((UseSSE>=1) && (UseAVX == 0));
2179   match(Set dst (AddF dst src));
2180 
2181   format %{ "addss   $dst, $src" %}
2182   ins_cost(150);
2183   ins_encode %{
2184     __ addss($dst$$XMMRegister, $src$$XMMRegister);
2185   %}
2186   ins_pipe(pipe_slow);
2187 %}
2188 
2189 instruct addF_mem(regF dst, memory src) %{
2190   predicate((UseSSE>=1) && (UseAVX == 0));
2191   match(Set dst (AddF dst (LoadF src)));
2192 
2193   format %{ "addss   $dst, $src" %}
2194   ins_cost(150);


3030   format %{ "movdqu  $dst,$mem\t! load vector (16 bytes)" %}
3031   ins_encode %{
3032     __ movdqu($dst$$XMMRegister, $mem$$Address);
3033   %}
3034   ins_pipe( pipe_slow );
3035 %}
3036 
3037 // Load vectors (32 bytes long)
3038 instruct loadV32(vecY dst, memory mem) %{
3039   predicate(n->as_LoadVector()->memory_size() == 32);
3040   match(Set dst (LoadVector mem));
3041   ins_cost(125);
3042   format %{ "vmovdqu $dst,$mem\t! load vector (32 bytes)" %}
3043   ins_encode %{
3044     __ vmovdqu($dst$$XMMRegister, $mem$$Address);
3045   %}
3046   ins_pipe( pipe_slow );
3047 %}
3048 
3049 // Load vectors (64 bytes long)
3050 instruct loadV64(vecZ dst, memory mem) %{
3051   predicate(n->as_LoadVector()->memory_size() == 64);
3052   match(Set dst (LoadVector mem));
3053   ins_cost(125);
3054   format %{ "vmovdqu $dst k0,$mem\t! load vector (64 bytes)" %}
3055   ins_encode %{
3056     int vector_len = 2;
3057     __ evmovdqul($dst$$XMMRegister, $mem$$Address, vector_len);
3058   %}
3059   ins_pipe( pipe_slow );
3060 %}
3061 













3062 // Store vectors
3063 instruct storeV4(memory mem, vecS src) %{
3064   predicate(n->as_StoreVector()->memory_size() == 4);
3065   match(Set mem (StoreVector mem src));
3066   ins_cost(145);
3067   format %{ "movd    $mem,$src\t! store vector (4 bytes)" %}
3068   ins_encode %{
3069     __ movdl($mem$$Address, $src$$XMMRegister);
3070   %}
3071   ins_pipe( pipe_slow );
3072 %}
3073 
3074 instruct storeV8(memory mem, vecD src) %{
3075   predicate(n->as_StoreVector()->memory_size() == 8);
3076   match(Set mem (StoreVector mem src));
3077   ins_cost(145);
3078   format %{ "movq    $mem,$src\t! store vector (8 bytes)" %}
3079   ins_encode %{
3080     __ movq($mem$$Address, $src$$XMMRegister);
3081   %}


3087   match(Set mem (StoreVector mem src));
3088   ins_cost(145);
3089   format %{ "movdqu  $mem,$src\t! store vector (16 bytes)" %}
3090   ins_encode %{
3091     __ movdqu($mem$$Address, $src$$XMMRegister);
3092   %}
3093   ins_pipe( pipe_slow );
3094 %}
3095 
3096 instruct storeV32(memory mem, vecY src) %{
3097   predicate(n->as_StoreVector()->memory_size() == 32);
3098   match(Set mem (StoreVector mem src));
3099   ins_cost(145);
3100   format %{ "vmovdqu $mem,$src\t! store vector (32 bytes)" %}
3101   ins_encode %{
3102     __ vmovdqu($mem$$Address, $src$$XMMRegister);
3103   %}
3104   ins_pipe( pipe_slow );
3105 %}
3106 
3107 instruct storeV64(memory mem, vecZ src) %{
3108   predicate(n->as_StoreVector()->memory_size() == 64);
3109   match(Set mem (StoreVector mem src));
3110   ins_cost(145);
3111   format %{ "vmovdqu $mem k0,$src\t! store vector (64 bytes)" %}
3112   ins_encode %{
3113     int vector_len = 2;
3114     __ evmovdqul($mem$$Address, $src$$XMMRegister, vector_len);












3115   %}
3116   ins_pipe( pipe_slow );
3117 %}
3118 
3119 // ====================LEGACY REPLICATE=======================================
3120 
3121 instruct Repl4B_mem(vecS dst, memory mem) %{
3122   predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vlbw());
3123   match(Set dst (ReplicateB (LoadB mem)));
3124   format %{ "punpcklbw $dst,$mem\n\t"
3125             "pshuflw $dst,$dst,0x00\t! replicate4B" %}
3126   ins_encode %{
3127     __ punpcklbw($dst$$XMMRegister, $mem$$Address);
3128     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3129   %}
3130   ins_pipe( pipe_slow );
3131 %}
3132 
3133 instruct Repl8B_mem(vecD dst, memory mem) %{
3134   predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vlbw());




1737         break;
1738       case Op_URShiftVS:
1739       case Op_RShiftVS:
1740       case Op_LShiftVS:
1741       case Op_MulVS:
1742       case Op_AddVS:
1743       case Op_SubVS:
1744         if ((vlen == 32) && (VM_Version::supports_avx512bw() == false))
1745           ret_value = false;
1746         break;
1747       case Op_CMoveVD:
1748         if (vlen != 4)
1749           ret_value  = false;
1750         break;
1751     }
1752   }
1753 
1754   return ret_value;  // Per default match rules are supported.
1755 }
1756 
1757 const bool Matcher::has_predicated_vectors(void) {
1758   bool ret_value = false;
1759   switch(UseAVX) {
1760     case 0:
1761     case 1:
1762     case 2:
1763       break;
1764 
1765     case 3:
1766       ret_value = VM_Version::supports_avx512vl();
1767       break;
1768   }
1769 
1770   return ret_value;
1771 }
1772 
1773 const int Matcher::float_pressure(int default_pressure_threshold) {
1774   int float_pressure_threshold = default_pressure_threshold;
1775 #ifdef _LP64
1776   if (UseAVX > 2) {
1777     // Increase pressure threshold on machines with AVX3 which have
1778     // 2x more XMM registers.
1779     float_pressure_threshold = default_pressure_threshold * 2;
1780   }
1781 #endif
1782   return float_pressure_threshold;
1783 }
1784 
1785 // Max vector size in bytes. 0 if not supported.
1786 const int Matcher::vector_width_in_bytes(BasicType bt) {
1787   assert(is_java_primitive(bt), "only primitive type vectors");
1788   if (UseSSE < 2) return 0;
1789   // SSE2 supports 128bit vectors for all types.
1790   // AVX2 supports 256bit vectors for all types.
1791   // AVX2/EVEX supports 512bit vectors for all types.
1792   int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16;


1870   // In 64-bit VM size calculation is very complex. Emitting instructions
1871   // into scratch buffer is used to get size in 64-bit VM.
1872   LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); )
1873   assert(ireg == Op_VecS || // 32bit vector
1874          (src_lo & 1) == 0 && (src_lo + 1) == src_hi &&
1875          (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi,
1876          "no non-adjacent vector moves" );
1877   if (cbuf) {
1878     MacroAssembler _masm(cbuf);
1879     int offset = __ offset();
1880     switch (ireg) {
1881     case Op_VecS: // copy whole register
1882     case Op_VecD:
1883     case Op_VecX:
1884       __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
1885       break;
1886     case Op_VecY:
1887       __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
1888       break;
1889     case Op_VecZ:
1890       __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2);
1891       break;
1892     default:
1893       ShouldNotReachHere();
1894     }
1895     int size = __ offset() - offset;
1896 #ifdef ASSERT
1897     // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
1898     assert(!do_size || size == 4, "incorrect size calculattion");
1899 #endif
1900     return size;
1901 #ifndef PRODUCT
1902   } else if (!do_size) {
1903     switch (ireg) {
1904     case Op_VecS:
1905     case Op_VecD:
1906     case Op_VecX:
1907       st->print("movdqu  %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
1908       break;
1909     case Op_VecY:
1910     case Op_VecZ:


1925   // into scratch buffer is used to get size in 64-bit VM.
1926   LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); )
1927   if (cbuf) {
1928     MacroAssembler _masm(cbuf);
1929     int offset = __ offset();
1930     if (is_load) {
1931       switch (ireg) {
1932       case Op_VecS:
1933         __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
1934         break;
1935       case Op_VecD:
1936         __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
1937         break;
1938       case Op_VecX:
1939         __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
1940         break;
1941       case Op_VecY:
1942         __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
1943         break;
1944       case Op_VecZ:
1945         __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2);
1946         break;
1947       default:
1948         ShouldNotReachHere();
1949       }
1950     } else { // store
1951       switch (ireg) {
1952       case Op_VecS:
1953         __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
1954         break;
1955       case Op_VecD:
1956         __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
1957         break;
1958       case Op_VecX:
1959         __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
1960         break;
1961       case Op_VecY:
1962         __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
1963         break;
1964       case Op_VecZ:
1965         __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2);
1966         break;
1967       default:
1968         ShouldNotReachHere();
1969       }
1970     }
1971     int size = __ offset() - offset;
1972 #ifdef ASSERT
1973     int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : (UseAVX > 2) ? 6 : 4);
1974     // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
1975     assert(!do_size || size == (5+offset_size), "incorrect size calculattion");
1976 #endif
1977     return size;
1978 #ifndef PRODUCT
1979   } else if (!do_size) {
1980     if (is_load) {
1981       switch (ireg) {
1982       case Op_VecS:
1983         st->print("movd    %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
1984         break;
1985       case Op_VecD:


2085   }
2086   jdouble dval = *((jdouble*) &val);  // coerce to double type
2087   return dval;
2088 }
2089 
2090 #ifndef PRODUCT
2091   void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
2092     st->print("nop \t# %d bytes pad for loops and calls", _count);
2093   }
2094 #endif
2095 
2096   void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const {
2097     MacroAssembler _masm(&cbuf);
2098     __ nop(_count);
2099   }
2100 
2101   uint MachNopNode::size(PhaseRegAlloc*) const {
2102     return _count;
2103   }
2104 
2105   void MachMskNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const {
2106     MacroAssembler _masm(&cbuf);
2107     __ restoremsk();
2108   }
2109 
2110   uint MachMskNode::size(PhaseRegAlloc* ra_) const {
2111     return MachNode::size(ra_);
2112   }
2113 
2114 #ifndef PRODUCT
2115   void MachMskNode::format(PhaseRegAlloc*, outputStream* st) const {
2116     st->print("restoremsk \t# mask restore for loops");
2117   }
2118 #endif
2119 
2120 #ifndef PRODUCT
2121   void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const {
2122     st->print("# breakpoint");
2123   }
2124 #endif
2125 
2126   void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const {
2127     MacroAssembler _masm(&cbuf);
2128     __ int3();
2129   }
2130 
2131   uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
2132     return MachNode::size(ra_);
2133   }
2134 
2135 %}
2136 
2137 encode %{
2138 
2139   enc_class call_epilog %{


2186     // equal' for overflow.
2187     overflow     (0x20, "o");  // not really supported by the instruction
2188     no_overflow  (0x21, "no"); // not really supported by the instruction
2189   %}
2190 %}
2191 
2192 
2193 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit)
2194 
2195 // ============================================================================
2196 
2197 instruct ShouldNotReachHere() %{
2198   match(Halt);
2199   format %{ "int3\t# ShouldNotReachHere" %}
2200   ins_encode %{
2201     __ int3();
2202   %}
2203   ins_pipe(pipe_slow);
2204 %}
2205 
2206 // =================================EVEX special===============================
2207 
2208 instruct set_mask(rRegI dst, rRegI src) %{
2209   predicate(VM_Version::supports_avx512vl());
2210   match(Set dst (MaskCreateI src));
2211   effect(TEMP dst);
2212   format %{ "createmsk   $dst, $src" %}
2213   ins_encode %{
2214     __ createmsk($dst$$Register, $src$$Register);
2215   %}
2216   ins_pipe(pipe_slow);
2217 %}
2218 
2219 // ============================================================================
2220 
2221 instruct addF_reg(regF dst, regF src) %{
2222   predicate((UseSSE>=1) && (UseAVX == 0));
2223   match(Set dst (AddF dst src));
2224 
2225   format %{ "addss   $dst, $src" %}
2226   ins_cost(150);
2227   ins_encode %{
2228     __ addss($dst$$XMMRegister, $src$$XMMRegister);
2229   %}
2230   ins_pipe(pipe_slow);
2231 %}
2232 
2233 instruct addF_mem(regF dst, memory src) %{
2234   predicate((UseSSE>=1) && (UseAVX == 0));
2235   match(Set dst (AddF dst (LoadF src)));
2236 
2237   format %{ "addss   $dst, $src" %}
2238   ins_cost(150);


3074   format %{ "movdqu  $dst,$mem\t! load vector (16 bytes)" %}
3075   ins_encode %{
3076     __ movdqu($dst$$XMMRegister, $mem$$Address);
3077   %}
3078   ins_pipe( pipe_slow );
3079 %}
3080 
3081 // Load vectors (32 bytes long)
3082 instruct loadV32(vecY dst, memory mem) %{
3083   predicate(n->as_LoadVector()->memory_size() == 32);
3084   match(Set dst (LoadVector mem));
3085   ins_cost(125);
3086   format %{ "vmovdqu $dst,$mem\t! load vector (32 bytes)" %}
3087   ins_encode %{
3088     __ vmovdqu($dst$$XMMRegister, $mem$$Address);
3089   %}
3090   ins_pipe( pipe_slow );
3091 %}
3092 
3093 // Load vectors (64 bytes long)
3094 instruct loadV64_dword(vecZ dst, memory mem) %{
3095   predicate(n->as_LoadVector()->memory_size() == 64 && n->as_LoadVector()->element_size() <= 4);
3096   match(Set dst (LoadVector mem));
3097   ins_cost(125);
3098   format %{ "vmovdqul $dst k0,$mem\t! load vector (64 bytes)" %}
3099   ins_encode %{
3100     int vector_len = 2;
3101     __ evmovdqul($dst$$XMMRegister, $mem$$Address, vector_len);
3102   %}
3103   ins_pipe( pipe_slow );
3104 %}
3105 
3106 // Load vectors (64 bytes long)
3107 instruct loadV64_qword(vecZ dst, memory mem) %{
3108   predicate(n->as_LoadVector()->memory_size() == 64 && n->as_LoadVector()->element_size() > 4);
3109   match(Set dst (LoadVector mem));
3110   ins_cost(125);
3111   format %{ "vmovdquq $dst k0,$mem\t! load vector (64 bytes)" %}
3112   ins_encode %{
3113     int vector_len = 2;
3114     __ evmovdquq($dst$$XMMRegister, $mem$$Address, vector_len);
3115   %}
3116   ins_pipe( pipe_slow );
3117 %}
3118 
3119 // Store vectors
3120 instruct storeV4(memory mem, vecS src) %{
3121   predicate(n->as_StoreVector()->memory_size() == 4);
3122   match(Set mem (StoreVector mem src));
3123   ins_cost(145);
3124   format %{ "movd    $mem,$src\t! store vector (4 bytes)" %}
3125   ins_encode %{
3126     __ movdl($mem$$Address, $src$$XMMRegister);
3127   %}
3128   ins_pipe( pipe_slow );
3129 %}
3130 
3131 instruct storeV8(memory mem, vecD src) %{
3132   predicate(n->as_StoreVector()->memory_size() == 8);
3133   match(Set mem (StoreVector mem src));
3134   ins_cost(145);
3135   format %{ "movq    $mem,$src\t! store vector (8 bytes)" %}
3136   ins_encode %{
3137     __ movq($mem$$Address, $src$$XMMRegister);
3138   %}


3144   match(Set mem (StoreVector mem src));
3145   ins_cost(145);
3146   format %{ "movdqu  $mem,$src\t! store vector (16 bytes)" %}
3147   ins_encode %{
3148     __ movdqu($mem$$Address, $src$$XMMRegister);
3149   %}
3150   ins_pipe( pipe_slow );
3151 %}
3152 
3153 instruct storeV32(memory mem, vecY src) %{
3154   predicate(n->as_StoreVector()->memory_size() == 32);
3155   match(Set mem (StoreVector mem src));
3156   ins_cost(145);
3157   format %{ "vmovdqu $mem,$src\t! store vector (32 bytes)" %}
3158   ins_encode %{
3159     __ vmovdqu($mem$$Address, $src$$XMMRegister);
3160   %}
3161   ins_pipe( pipe_slow );
3162 %}
3163 
3164 instruct storeV64_dword(memory mem, vecZ src) %{
3165   predicate(n->as_StoreVector()->memory_size() == 64 && n->as_StoreVector()->element_size() <= 4);
3166   match(Set mem (StoreVector mem src));
3167   ins_cost(145);
3168   format %{ "vmovdqul $mem k0,$src\t! store vector (64 bytes)" %}
3169   ins_encode %{
3170     int vector_len = 2;
3171     __ evmovdqul($mem$$Address, $src$$XMMRegister, vector_len);
3172   %}
3173   ins_pipe( pipe_slow );
3174 %}
3175 
3176 instruct storeV64_qword(memory mem, vecZ src) %{
3177   predicate(n->as_StoreVector()->memory_size() == 64 && n->as_StoreVector()->element_size() > 4);
3178   match(Set mem (StoreVector mem src));
3179   ins_cost(145);
3180   format %{ "vmovdquq $mem k0,$src\t! store vector (64 bytes)" %}
3181   ins_encode %{
3182     int vector_len = 2;
3183     __ evmovdquq($mem$$Address, $src$$XMMRegister, vector_len);
3184   %}
3185   ins_pipe( pipe_slow );
3186 %}
3187 
3188 // ====================LEGACY REPLICATE=======================================
3189 
3190 instruct Repl4B_mem(vecS dst, memory mem) %{
3191   predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vlbw());
3192   match(Set dst (ReplicateB (LoadB mem)));
3193   format %{ "punpcklbw $dst,$mem\n\t"
3194             "pshuflw $dst,$dst,0x00\t! replicate4B" %}
3195   ins_encode %{
3196     __ punpcklbw($dst$$XMMRegister, $mem$$Address);
3197     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3198   %}
3199   ins_pipe( pipe_slow );
3200 %}
3201 
3202 instruct Repl8B_mem(vecD dst, memory mem) %{
3203   predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vlbw());


< prev index next >