< prev index next >

src/hotspot/cpu/x86/x86.ad

Print this page




1354   __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
1355   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset));
1356   __ end_a_stub();
1357   return offset;
1358 }
1359 
1360 
1361 //=============================================================================
1362 
1363   // Float masks come from different places depending on platform.
1364 #ifdef _LP64
1365   static address float_signmask()  { return StubRoutines::x86::float_sign_mask(); }
1366   static address float_signflip()  { return StubRoutines::x86::float_sign_flip(); }
1367   static address double_signmask() { return StubRoutines::x86::double_sign_mask(); }
1368   static address double_signflip() { return StubRoutines::x86::double_sign_flip(); }
1369   static address vector_float_signmask() { return StubRoutines::x86::vector_float_sign_mask(); }
1370   static address vector_float_signflip() { return StubRoutines::x86::vector_float_sign_flip(); }
1371   static address vector_double_signmask() { return StubRoutines::x86::vector_double_sign_mask(); }
1372   static address vector_double_signflip() { return StubRoutines::x86::vector_double_sign_flip(); }
1373   static address vector_all_bits_set() { return StubRoutines::x86::vector_all_bits_set(); }

1374   static address vector_byte_bitset() { return StubRoutines::x86::vector_byte_bitset(); }
1375   static address vector_long_perm_mask() { return StubRoutines::x86::vector_long_perm_mask(); }
1376   static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); }
1377   static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); }
1378   static address vector_int_to_byte_mask() { return StubRoutines::x86::vector_int_to_byte_mask(); }
1379   static address vector_int_to_short_mask() { return StubRoutines::x86::vector_int_to_short_mask(); }
1380   static address vector_32_bit_mask() { return StubRoutines::x86::vector_32_bit_mask(); }
1381   static address vector_64_bit_mask() { return StubRoutines::x86::vector_64_bit_mask(); }
1382   static address vector_all_ones_mask() { return StubRoutines::x86::vector_all_ones_mask(); }
1383   static address vector_int_shufflemask() { return StubRoutines::x86::vector_int_shuffle_mask(); }
1384   static address vector_int_sizemask() { return StubRoutines::x86::vector_int_size_mask(); }
1385   static address vector_short_shufflemask() { return StubRoutines::x86::vector_short_shuffle_mask(); }
1386   static address vector_short_sizemask() { return StubRoutines::x86::vector_short_size_mask(); }
1387   static address vector_long_shufflemask() { return StubRoutines::x86::vector_long_shuffle_mask(); }
1388   static address vector_long_sizemask() { return StubRoutines::x86::vector_long_size_mask(); }
1389 #else
1390   static address float_signmask()  { return (address)float_signmask_pool; }
1391   static address float_signflip()  { return (address)float_signflip_pool; }
1392   static address double_signmask() { return (address)double_signmask_pool; }
1393   static address double_signflip() { return (address)double_signflip_pool; }


2110   while(bit_width < 32) {
2111     val |= (val << bit_width);
2112     bit_width <<= 1;
2113   }
2114   return val;
2115 }
2116 
2117 static inline jlong replicate8_imm(int con, int width) {
2118   // Load a constant of "width" (in bytes) and replicate it to fill 64bit.
2119   assert(width == 1 || width == 2 || width == 4, "only byte, short or int types here");
2120   int bit_width = width * 8;
2121   jlong val = con;
2122   val &= (((jlong) 1) << bit_width) - 1;  // mask off sign bits
2123   while(bit_width < 64) {
2124     val |= (val << bit_width);
2125     bit_width <<= 1;
2126   }
2127   return val;
2128 }
2129 

2130 #ifndef PRODUCT
2131   void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
2132     st->print("nop \t# %d bytes pad for loops and calls", _count);
2133   }
2134 #endif
2135 
2136   void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const {
2137     MacroAssembler _masm(&cbuf);
2138     __ nop(_count);
2139   }
2140 
2141   uint MachNopNode::size(PhaseRegAlloc*) const {
2142     return _count;
2143   }
2144 
2145 #ifndef PRODUCT
2146   void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const {
2147     st->print("# breakpoint");
2148   }
2149 #endif


23079     __ vpabsd($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
23080     __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len);
23081     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len);
23082   %}
23083   ins_pipe( pipe_slow );
23084 %}
23085 
23086 instruct storemask8l(vecD dst, vecZ src, rRegL scratch) %{
23087   predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8 && static_cast<const VectorStoreMaskNode*>(n)->GetInputMaskSize() == 8);
23088   match(Set dst (VectorStoreMask src));
23089   effect(TEMP scratch);
23090   format %{ "vpcmpeqq k2,$src,0xFFFFFFFF\n\t"
23091            "vmovdqub $dst,k2,0x01010101\t! store mask (8L to 8B)" %}
23092   ins_encode %{
23093     int vector_len = 2;
23094     KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation.
23095     Assembler::ComparisonPredicate cp = Assembler::eq;
23096     __ evpcmpq(ktmp, k0, $src$$XMMRegister, ExternalAddress(vector_all_bits_set()), cp, vector_len, $scratch$$Register);
23097     // The dst is only 128-bit - thus we can do a smaller move.
23098     __ evmovdqub($dst$$XMMRegister, ktmp, ExternalAddress(vector_byte_bitset()), false, 0, $scratch$$Register);


























































23099   %}
23100   ins_pipe( pipe_slow );
23101 %}
23102 
23103 //-------------------------------- LOAD_SHUFFLE ----------------------------------
23104 
23105 instruct loadshuffle8b(vecD dst, vecD src) %{
23106   predicate(UseSSE > 1  && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
23107   match(Set dst (VectorLoadShuffle src));
23108   format %{ "movdqu $dst, $src\t! load shuffle (load 8B for 8BRearrange)" %}
23109   ins_encode %{
23110      __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
23111   %}
23112   ins_pipe( pipe_slow );
23113 %}
23114 
23115 instruct loadshuffle16b(vecX dst, vecX src) %{
23116   predicate(UseSSE > 1 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
23117   match(Set dst (VectorLoadShuffle src));
23118   format %{ "movdqu $dst, $src\t! load shuffle (load 16B for 16BRearrange)" %}




1354   __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
1355   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset));
1356   __ end_a_stub();
1357   return offset;
1358 }
1359 
1360 
1361 //=============================================================================
1362 
1363   // Float masks come from different places depending on platform.
1364 #ifdef _LP64
1365   static address float_signmask()  { return StubRoutines::x86::float_sign_mask(); }
1366   static address float_signflip()  { return StubRoutines::x86::float_sign_flip(); }
1367   static address double_signmask() { return StubRoutines::x86::double_sign_mask(); }
1368   static address double_signflip() { return StubRoutines::x86::double_sign_flip(); }
1369   static address vector_float_signmask() { return StubRoutines::x86::vector_float_sign_mask(); }
1370   static address vector_float_signflip() { return StubRoutines::x86::vector_float_sign_flip(); }
1371   static address vector_double_signmask() { return StubRoutines::x86::vector_double_sign_mask(); }
1372   static address vector_double_signflip() { return StubRoutines::x86::vector_double_sign_flip(); }
1373   static address vector_all_bits_set() { return StubRoutines::x86::vector_all_bits_set(); }
1374   static address vector_iota_indices() { return StubRoutines::x86::vector_iota_indices(); }
1375   static address vector_byte_bitset() { return StubRoutines::x86::vector_byte_bitset(); }
1376   static address vector_long_perm_mask() { return StubRoutines::x86::vector_long_perm_mask(); }
1377   static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); }
1378   static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); }
1379   static address vector_int_to_byte_mask() { return StubRoutines::x86::vector_int_to_byte_mask(); }
1380   static address vector_int_to_short_mask() { return StubRoutines::x86::vector_int_to_short_mask(); }
1381   static address vector_32_bit_mask() { return StubRoutines::x86::vector_32_bit_mask(); }
1382   static address vector_64_bit_mask() { return StubRoutines::x86::vector_64_bit_mask(); }
1383   static address vector_all_ones_mask() { return StubRoutines::x86::vector_all_ones_mask(); }
1384   static address vector_int_shufflemask() { return StubRoutines::x86::vector_int_shuffle_mask(); }
1385   static address vector_int_sizemask() { return StubRoutines::x86::vector_int_size_mask(); }
1386   static address vector_short_shufflemask() { return StubRoutines::x86::vector_short_shuffle_mask(); }
1387   static address vector_short_sizemask() { return StubRoutines::x86::vector_short_size_mask(); }
1388   static address vector_long_shufflemask() { return StubRoutines::x86::vector_long_shuffle_mask(); }
1389   static address vector_long_sizemask() { return StubRoutines::x86::vector_long_size_mask(); }
1390 #else
1391   static address float_signmask()  { return (address)float_signmask_pool; }
1392   static address float_signflip()  { return (address)float_signflip_pool; }
1393   static address double_signmask() { return (address)double_signmask_pool; }
1394   static address double_signflip() { return (address)double_signflip_pool; }


2111   while(bit_width < 32) {
2112     val |= (val << bit_width);
2113     bit_width <<= 1;
2114   }
2115   return val;
2116 }
2117 
2118 static inline jlong replicate8_imm(int con, int width) {
2119   // Load a constant of "width" (in bytes) and replicate it to fill 64bit.
2120   assert(width == 1 || width == 2 || width == 4, "only byte, short or int types here");
2121   int bit_width = width * 8;
2122   jlong val = con;
2123   val &= (((jlong) 1) << bit_width) - 1;  // mask off sign bits
2124   while(bit_width < 64) {
2125     val |= (val << bit_width);
2126     bit_width <<= 1;
2127   }
2128   return val;
2129 }
2130 
2131 
2132 #ifndef PRODUCT
2133   void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
2134     st->print("nop \t# %d bytes pad for loops and calls", _count);
2135   }
2136 #endif
2137 
2138   void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const {
2139     MacroAssembler _masm(&cbuf);
2140     __ nop(_count);
2141   }
2142 
2143   uint MachNopNode::size(PhaseRegAlloc*) const {
2144     return _count;
2145   }
2146 
2147 #ifndef PRODUCT
2148   void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const {
2149     st->print("# breakpoint");
2150   }
2151 #endif


23081     __ vpabsd($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
23082     __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len);
23083     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len);
23084   %}
23085   ins_pipe( pipe_slow );
23086 %}
23087 
23088 instruct storemask8l(vecD dst, vecZ src, rRegL scratch) %{
23089   predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8 && static_cast<const VectorStoreMaskNode*>(n)->GetInputMaskSize() == 8);
23090   match(Set dst (VectorStoreMask src));
23091   effect(TEMP scratch);
23092   format %{ "vpcmpeqq k2,$src,0xFFFFFFFF\n\t"
23093            "vmovdqub $dst,k2,0x01010101\t! store mask (8L to 8B)" %}
23094   ins_encode %{
23095     int vector_len = 2;
23096     KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation.
23097     Assembler::ComparisonPredicate cp = Assembler::eq;
23098     __ evpcmpq(ktmp, k0, $src$$XMMRegister, ExternalAddress(vector_all_bits_set()), cp, vector_len, $scratch$$Register);
23099     // The dst is only 128-bit - thus we can do a smaller move.
23100     __ evmovdqub($dst$$XMMRegister, ktmp, ExternalAddress(vector_byte_bitset()), false, 0, $scratch$$Register);
23101   %}
23102   ins_pipe( pipe_slow );
23103 %}
23104 
23105 //-------------------------------- LOAD_IOTA_INDICES----------------------------------
23106 
23107 instruct loadcon4b(vecS dst, immI0 src, rRegI scratch) %{
23108   predicate(UseSSE > 1  && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
23109   match(Set dst (VectorLoadConst src));
23110   effect(TEMP scratch);
23111   format %{ "movdqu $dst, CONSTANT_MEMORY\t! load iota indices" %}
23112   ins_encode %{
23113       __ movdqu($dst$$XMMRegister, ExternalAddress(vector_iota_indices()), $scratch$$Register);
23114   %}
23115   ins_pipe( pipe_slow );
23116 %}
23117 
23118 instruct loadcon8b(vecD dst, immI0 src, rRegI scratch) %{
23119   predicate(UseSSE > 1  && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
23120   match(Set dst (VectorLoadConst src));
23121   effect(TEMP scratch);
23122   format %{ "movdqu $dst, CONSTANT_MEMORY\t! load iota indices" %}
23123   ins_encode %{
23124       __ movdqu($dst$$XMMRegister, ExternalAddress(vector_iota_indices()), $scratch$$Register);
23125   %}
23126   ins_pipe( pipe_slow );
23127 %}
23128 
23129 instruct loadcon16b(vecX dst, immI0 src, rRegI scratch) %{
23130   predicate(UseAVX > 0  && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
23131   match(Set dst (VectorLoadConst src));
23132   effect(TEMP scratch);
23133   format %{ "vpmovdqu $dst, CONSTANT_MEMORY\t! load iota indices"  %}
23134   ins_encode %{
23135      __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_iota_indices()), $scratch$$Register);
23136   %}
23137   ins_pipe( pipe_slow );
23138 %}
23139 
23140 instruct loadcon32b(vecY dst, immI0 src, rRegI scratch) %{
23141   predicate(UseAVX > 0  && n->as_Vector()->length() == 32 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
23142   match(Set dst (VectorLoadConst src));
23143   effect(TEMP scratch);
23144   format %{ "vmovdqu $dst, CONSTANT_MEMORY\t! load iota indices" %}
23145   ins_encode %{
23146      __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_iota_indices()), $scratch$$Register);
23147   %}
23148   ins_pipe( pipe_slow );
23149 %}
23150 
23151 instruct loadcon64b(vecZ dst, immI0 src, rRegL scratch) %{
23152   predicate(UseAVX > 2  && n->as_Vector()->length() == 64 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
23153   match(Set dst (VectorLoadConst src));
23154   effect(TEMP scratch);
23155   format %{ "vmovdqub $dst,k0, CONSTANT_MEMORY\t! load iota indices" %}
23156   ins_encode %{
23157      int vector_len = 2;
23158      __ evmovdqub($dst$$XMMRegister, k0, ExternalAddress(vector_iota_indices()), false, vector_len, $scratch$$Register);
23159   %}
23160   ins_pipe( pipe_slow );
23161 %}
23162 
23163 //-------------------------------- LOAD_SHUFFLE ----------------------------------
23164 
23165 instruct loadshuffle8b(vecD dst, vecD src) %{
23166   predicate(UseSSE > 1  && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
23167   match(Set dst (VectorLoadShuffle src));
23168   format %{ "movdqu $dst, $src\t! load shuffle (load 8B for 8BRearrange)" %}
23169   ins_encode %{
23170      __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
23171   %}
23172   ins_pipe( pipe_slow );
23173 %}
23174 
23175 instruct loadshuffle16b(vecX dst, vecX src) %{
23176   predicate(UseSSE > 1 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
23177   match(Set dst (VectorLoadShuffle src));
23178   format %{ "movdqu $dst, $src\t! load shuffle (load 16B for 16BRearrange)" %}


< prev index next >