7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Common Architecture Description File 26 27 source %{ 28 // Float masks come from different places depending on platform. 29 #ifdef _LP64 30 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); } 31 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); } 32 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); } 33 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); } 34 #else 35 static address float_signmask() { return (address)float_signmask_pool; } 36 static address float_signflip() { return (address)float_signflip_pool; } 37 static address double_signmask() { return (address)double_signmask_pool; } 38 static address double_signflip() { return (address)double_signflip_pool; } 39 #endif 40 41 #ifndef PRODUCT 42 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const { 43 st->print("nop \t# %d bytes pad for loops and calls", _count); 44 } 45 #endif 46 47 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const { 48 MacroAssembler _masm(&cbuf); 49 __ nop(_count); 50 } 51 52 uint MachNopNode::size(PhaseRegAlloc*) const { 53 return _count; 54 } 55 56 #ifndef PRODUCT 57 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const { 58 st->print("# breakpoint"); 59 } 60 #endif 61 86 MacroAssembler _masm(&cbuf); 87 __ movptr(rsp, rbp_mh_SP_save); 88 %} 89 90 enc_class call_epilog %{ 91 if (VerifyStackAtCalls) { 92 // Check that stack depth is unchanged: find majik cookie on stack 93 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word)); 94 MacroAssembler _masm(&cbuf); 95 Label L; 96 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d); 97 __ jccb(Assembler::equal, L); 98 // Die if stack mismatch 99 __ int3(); 100 __ bind(L); 101 } 102 %} 103 104 %} 105 106 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit) 107 108 // ============================================================================ 109 110 instruct ShouldNotReachHere() %{ 111 match(Halt); 112 format %{ "int3\t# ShouldNotReachHere" %} 113 ins_encode %{ 114 __ int3(); 115 %} 116 ins_pipe(pipe_slow); 117 %} 118 119 // ============================================================================ 120 121 instruct addF_reg(regF dst, regF src) %{ 122 predicate((UseSSE>=1) && (UseAVX == 0)); 123 match(Set dst (AddF dst src)); 124 125 format %{ "addss $dst, $src" %} 835 836 format %{ "sqrtsd $dst, $src" %} 837 ins_cost(150); 838 ins_encode %{ 839 __ sqrtsd($dst$$XMMRegister, $src$$Address); 840 %} 841 ins_pipe(pipe_slow); 842 %} 843 844 instruct sqrtD_imm(regD dst, immD con) %{ 845 predicate(UseSSE>=2); 846 match(Set dst (SqrtD con)); 847 format %{ "sqrtsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 848 ins_cost(150); 849 ins_encode %{ 850 __ sqrtsd($dst$$XMMRegister, $constantaddress($con)); 851 %} 852 ins_pipe(pipe_slow); 853 %} 854 | 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Common Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // archtecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // XMM registers. 256-bit registers or 8 words each, labeled (a)-h. 63 // Word a in each register holds a Float, words ab hold a Double. 64 // The whole registers are used in SSE4.2 version intrinsics, 65 // array copy stubs and superword operations (see UseSSE42Intrinsics, 66 // UseXMMForArrayCopy and UseSuperword flags). 67 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX). 68 // Linux ABI: No register preserved across function calls 69 // XMM0-XMM7 might hold parameters 70 // Windows ABI: XMM6-XMM15 preserved across function calls 71 // XMM0-XMM3 might hold parameters 72 73 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()); 74 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next()); 75 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next()->next()); 76 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next()->next()->next()); 77 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next()->next()->next()->next()); 78 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next()->next()->next()->next()->next()); 79 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next()->next()->next()->next()->next()->next()); 80 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); 81 82 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()); 83 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next()); 84 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next()->next()); 85 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next()->next()->next()); 86 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next()->next()->next()->next()); 87 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next()->next()->next()->next()->next()); 88 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next()->next()->next()->next()->next()->next()); 89 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); 90 91 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()); 92 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next()); 93 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next()->next()); 94 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next()->next()->next()); 95 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next()->next()->next()->next()); 96 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next()->next()->next()->next()->next()); 97 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next()->next()->next()->next()->next()->next()); 98 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); 99 100 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()); 101 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next()); 102 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next()->next()); 103 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next()->next()->next()); 104 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next()->next()->next()->next()); 105 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next()->next()->next()->next()->next()); 106 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next()->next()->next()->next()->next()->next()); 107 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); 108 109 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()); 110 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next()); 111 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next()->next()); 112 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next()->next()->next()); 113 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next()->next()->next()->next()); 114 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next()->next()->next()->next()->next()); 115 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next()->next()->next()->next()->next()->next()); 116 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); 117 118 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()); 119 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next()); 120 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next()->next()); 121 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next()->next()->next()); 122 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next()->next()->next()->next()); 123 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next()->next()->next()->next()->next()); 124 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next()->next()->next()->next()->next()->next()); 125 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); 126 127 #ifdef _WIN64 128 129 reg_def XMM6 ( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()); 130 reg_def XMM6b( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next()); 131 reg_def XMM6c( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next()->next()); 132 reg_def XMM6d( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()); 133 reg_def XMM6e( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next()); 134 reg_def XMM6f( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next()->next()); 135 reg_def XMM6g( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next()->next()->next()); 136 reg_def XMM6h( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); 137 138 reg_def XMM7 ( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()); 139 reg_def XMM7b( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next()); 140 reg_def XMM7c( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next()->next()); 141 reg_def XMM7d( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()); 142 reg_def XMM7e( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next()); 143 reg_def XMM7f( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next()->next()); 144 reg_def XMM7g( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next()->next()->next()); 145 reg_def XMM7h( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); 146 147 reg_def XMM8 ( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()); 148 reg_def XMM8b( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next()); 149 reg_def XMM8c( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next()->next()); 150 reg_def XMM8d( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()); 151 reg_def XMM8e( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next()); 152 reg_def XMM8f( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next()->next()); 153 reg_def XMM8g( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next()->next()->next()); 154 reg_def XMM8h( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); 155 156 reg_def XMM9 ( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()); 157 reg_def XMM9b( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next()); 158 reg_def XMM9c( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next()->next()); 159 reg_def XMM9d( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()); 160 reg_def XMM9e( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next()); 161 reg_def XMM9f( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next()->next()); 162 reg_def XMM9g( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next()->next()->next()); 163 reg_def XMM9h( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); 164 165 reg_def XMM10 ( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()); 166 reg_def XMM10b( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next()); 167 reg_def XMM10c( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next()->next()); 168 reg_def XMM10d( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()); 169 reg_def XMM10e( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next()); 170 reg_def XMM10f( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next()->next()); 171 reg_def XMM10g( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next()->next()->next()); 172 reg_def XMM10h( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); 173 174 reg_def XMM11 ( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()); 175 reg_def XMM11b( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next()); 176 reg_def XMM11c( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next()->next()); 177 reg_def XMM11d( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()); 178 reg_def XMM11e( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next()); 179 reg_def XMM11f( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next()->next()); 180 reg_def XMM11g( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next()->next()->next()); 181 reg_def XMM11h( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); 182 183 reg_def XMM12 ( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()); 184 reg_def XMM12b( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next()); 185 reg_def XMM12c( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next()->next()); 186 reg_def XMM12d( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()); 187 reg_def XMM12e( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next()); 188 reg_def XMM12f( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next()->next()); 189 reg_def XMM12g( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next()->next()->next()); 190 reg_def XMM12h( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); 191 192 reg_def XMM13 ( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()); 193 reg_def XMM13b( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next()); 194 reg_def XMM13c( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next()->next()); 195 reg_def XMM13d( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()); 196 reg_def XMM13e( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next()); 197 reg_def XMM13f( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next()->next()); 198 reg_def XMM13g( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next()->next()->next()); 199 reg_def XMM13h( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); 200 201 reg_def XMM14 ( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()); 202 reg_def XMM14b( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next()); 203 reg_def XMM14c( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next()->next()); 204 reg_def XMM14d( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()); 205 reg_def XMM14e( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next()); 206 reg_def XMM14f( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next()->next()); 207 reg_def XMM14g( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next()->next()->next()); 208 reg_def XMM14h( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); 209 210 reg_def XMM15 ( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()); 211 reg_def XMM15b( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next()); 212 reg_def XMM15c( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next()->next()); 213 reg_def XMM15d( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()); 214 reg_def XMM15e( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next()); 215 reg_def XMM15f( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next()->next()); 216 reg_def XMM15g( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next()->next()->next()); 217 reg_def XMM15h( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); 218 219 #else // _WIN64 220 221 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()); 222 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next()); 223 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next()->next()); 224 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()); 225 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next()); 226 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next()->next()); 227 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next()->next()->next()); 228 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); 229 230 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()); 231 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next()); 232 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next()->next()); 233 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()); 234 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next()); 235 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next()->next()); 236 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next()->next()->next()); 237 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); 238 239 #ifdef _LP64 240 241 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()); 242 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next()); 243 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next()->next()); 244 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()); 245 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next()); 246 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next()->next()); 247 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next()->next()->next()); 248 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); 249 250 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()); 251 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next()); 252 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next()->next()); 253 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()); 254 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next()); 255 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next()->next()); 256 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next()->next()->next()); 257 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); 258 259 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()); 260 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next()); 261 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next()->next()); 262 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()); 263 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next()); 264 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next()->next()); 265 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next()->next()->next()); 266 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); 267 268 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()); 269 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next()); 270 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next()->next()); 271 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()); 272 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next()); 273 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next()->next()); 274 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next()->next()->next()); 275 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); 276 277 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()); 278 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next()); 279 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next()->next()); 280 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()); 281 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next()); 282 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next()->next()); 283 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next()->next()->next()); 284 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); 285 286 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()); 287 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next()); 288 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next()->next()); 289 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()); 290 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next()); 291 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next()->next()); 292 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next()->next()->next()); 293 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); 294 295 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()); 296 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next()); 297 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next()->next()); 298 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()); 299 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next()); 300 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next()->next()); 301 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next()->next()->next()); 302 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); 303 304 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()); 305 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next()); 306 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next()->next()); 307 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()); 308 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next()); 309 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next()->next()); 310 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next()->next()->next()); 311 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); 312 313 #endif // _LP64 314 315 #endif // _WIN64 316 317 #ifdef _LP64 318 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad()); 319 #else 320 reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad()); 321 #endif // _LP64 322 323 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 324 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 325 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 326 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 327 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 328 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 329 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 330 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 331 #ifdef _LP64 332 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 333 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 334 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 335 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 336 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 337 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 338 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 339 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h 340 #endif 341 ); 342 343 // flags allocation class should be last. 344 alloc_class chunk2(RFLAGS); 345 346 // Singleton class for condition codes 347 reg_class int_flags(RFLAGS); 348 349 // Class for all float registers 350 reg_class float_reg(XMM0, 351 XMM1, 352 XMM2, 353 XMM3, 354 XMM4, 355 XMM5, 356 XMM6, 357 XMM7 358 #ifdef _LP64 359 ,XMM8, 360 XMM9, 361 XMM10, 362 XMM11, 363 XMM12, 364 XMM13, 365 XMM14, 366 XMM15 367 #endif 368 ); 369 370 // Class for all double registers 371 reg_class double_reg(XMM0, XMM0b, 372 XMM1, XMM1b, 373 XMM2, XMM2b, 374 XMM3, XMM3b, 375 XMM4, XMM4b, 376 XMM5, XMM5b, 377 XMM6, XMM6b, 378 XMM7, XMM7b 379 #ifdef _LP64 380 ,XMM8, XMM8b, 381 XMM9, XMM9b, 382 XMM10, XMM10b, 383 XMM11, XMM11b, 384 XMM12, XMM12b, 385 XMM13, XMM13b, 386 XMM14, XMM14b, 387 XMM15, XMM15b 388 #endif 389 ); 390 391 // Class for all 32bit vector registers 392 reg_class vectors_reg(XMM0, 393 XMM1, 394 XMM2, 395 XMM3, 396 XMM4, 397 XMM5, 398 XMM6, 399 XMM7 400 #ifdef _LP64 401 ,XMM8, 402 XMM9, 403 XMM10, 404 XMM11, 405 XMM12, 406 XMM13, 407 XMM14, 408 XMM15 409 #endif 410 ); 411 412 // Class for all 64bit vector registers 413 reg_class vectord_reg(XMM0, XMM0b, 414 XMM1, XMM1b, 415 XMM2, XMM2b, 416 XMM3, XMM3b, 417 XMM4, XMM4b, 418 XMM5, XMM5b, 419 XMM6, XMM6b, 420 XMM7, XMM7b 421 #ifdef _LP64 422 ,XMM8, XMM8b, 423 XMM9, XMM9b, 424 XMM10, XMM10b, 425 XMM11, XMM11b, 426 XMM12, XMM12b, 427 XMM13, XMM13b, 428 XMM14, XMM14b, 429 XMM15, XMM15b 430 #endif 431 ); 432 433 // Class for all 128bit vector registers 434 reg_class vectorx_reg(XMM0, XMM0b, XMM0c, XMM0d, 435 XMM1, XMM1b, XMM1c, XMM1d, 436 XMM2, XMM2b, XMM2c, XMM2d, 437 XMM3, XMM3b, XMM3c, XMM3d, 438 XMM4, XMM4b, XMM4c, XMM4d, 439 XMM5, XMM5b, XMM5c, XMM5d, 440 XMM6, XMM6b, XMM6c, XMM6d, 441 XMM7, XMM7b, XMM7c, XMM7d 442 #ifdef _LP64 443 ,XMM8, XMM8b, XMM8c, XMM8d, 444 XMM9, XMM9b, XMM9c, XMM9d, 445 XMM10, XMM10b, XMM10c, XMM10d, 446 XMM11, XMM11b, XMM11c, XMM11d, 447 XMM12, XMM12b, XMM12c, XMM12d, 448 XMM13, XMM13b, XMM13c, XMM13d, 449 XMM14, XMM14b, XMM14c, XMM14d, 450 XMM15, XMM15b, XMM15c, XMM15d 451 #endif 452 ); 453 454 // Class for all 256bit vector registers 455 reg_class vectory_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 456 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 457 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 458 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 459 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 460 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 461 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 462 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 463 #ifdef _LP64 464 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 465 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 466 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 467 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 468 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 469 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 470 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 471 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h 472 #endif 473 ); 474 475 %} 476 477 source %{ 478 // Float masks come from different places depending on platform. 479 #ifdef _LP64 480 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); } 481 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); } 482 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); } 483 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); } 484 #else 485 static address float_signmask() { return (address)float_signmask_pool; } 486 static address float_signflip() { return (address)float_signflip_pool; } 487 static address double_signmask() { return (address)double_signmask_pool; } 488 static address double_signflip() { return (address)double_signflip_pool; } 489 #endif 490 491 // Map Types to machine register types 492 const int Matcher::base2reg[Type::lastype] = { 493 Node::NotAMachineReg,0,0, Op_RegI, Op_RegL, 0, Op_RegN, 494 Node::NotAMachineReg, Node::NotAMachineReg, /* tuple, array */ 495 Op_VecS, Op_VecD, Op_VecX, Op_VecY, /* Vectors */ 496 Op_RegP, Op_RegP, Op_RegP, Op_RegP, Op_RegP, Op_RegP, /* the pointers */ 497 0, 0/*abio*/, 498 Op_RegP /* Return address */, 0, /* the memories */ 499 Op_RegF, Op_RegF, Op_RegF, Op_RegD, Op_RegD, Op_RegD, 500 0 /*bottom*/ 501 }; 502 503 // Max vector size in bytes. 0 if not supported. 504 const int Matcher::vector_width_in_bytes(BasicType bt) { 505 assert(is_java_primitive(bt), "only primitive type vectors"); 506 if (UseSSE < 2) return 0; 507 // SSE2 supports 128bit vectors for all types. 508 // AVX2 supports 256bit vectors for all types. 509 int size = (UseAVX > 1) ? 32 : 16; 510 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE. 511 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE)) 512 size = 32; 513 // Use flag to limit vector size. 514 size = MIN2(size,(int)MaxVectorSize); 515 // Minimum 2 values in vector (or 4 for bytes). 516 switch (bt) { 517 case T_DOUBLE: 518 case T_LONG: 519 if (size < 16) return 0; 520 case T_FLOAT: 521 case T_INT: 522 if (size < 8) return 0; 523 case T_BOOLEAN: 524 case T_BYTE: 525 case T_CHAR: 526 case T_SHORT: 527 if (size < 4) return 0; 528 break; 529 default: 530 ShouldNotReachHere(); 531 } 532 return size; 533 } 534 535 // Limits on vector size (number of elements) loaded into vector. 536 const int Matcher::max_vector_size(const BasicType bt) { 537 return vector_width_in_bytes(bt)/type2aelembytes(bt); 538 } 539 const int Matcher::min_vector_size(const BasicType bt) { 540 int max_size = max_vector_size(bt); 541 // Min size which can be loaded into vector is 4 bytes. 542 int size = (type2aelembytes(bt) == 1) ? 4 : 2; 543 return MIN2(size,max_size); 544 } 545 546 // Vector ideal reg corresponding to specidied size in bytes 547 const int Matcher::vector_ideal_reg(int size) { 548 assert(MaxVectorSize >= size, ""); 549 switch(size) { 550 case 4: return Op_VecS; 551 case 8: return Op_VecD; 552 case 16: return Op_VecX; 553 case 32: return Op_VecY; 554 } 555 ShouldNotReachHere(); 556 return 0; 557 } 558 559 // x86 supports misaligned vectors store/load. 560 const bool Matcher::misaligned_vectors_ok() { 561 return !AlignVector; // can be changed by flag 562 } 563 564 // Helper methods for MachSpillCopyNode::implementation(). 565 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 566 int src_hi, int dst_hi, uint ireg, outputStream* st) { 567 // In 64-bit VM size calculation is very complex. Emitting instructions 568 // into scratch buffer is used to get size in 64-bit VM. 569 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 570 assert(ireg == Op_VecS || // 32bit vector 571 (src_lo & 1) == 0 && (src_lo + 1) == src_hi && 572 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi, 573 "no non-adjacent vector moves" ); 574 if (cbuf) { 575 MacroAssembler _masm(cbuf); 576 int offset = __ offset(); 577 switch (ireg) { 578 case Op_VecS: // copy whole register 579 case Op_VecD: 580 case Op_VecX: 581 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 582 break; 583 case Op_VecY: 584 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 585 break; 586 default: 587 ShouldNotReachHere(); 588 } 589 int size = __ offset() - offset; 590 #ifdef ASSERT 591 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 592 assert(!do_size || size == 4, "incorrect size calculattion"); 593 #endif 594 return size; 595 #ifndef PRODUCT 596 } else if (!do_size) { 597 switch (ireg) { 598 case Op_VecS: 599 case Op_VecD: 600 case Op_VecX: 601 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 602 break; 603 case Op_VecY: 604 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 605 break; 606 default: 607 ShouldNotReachHere(); 608 } 609 #endif 610 } 611 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 612 return 4; 613 } 614 615 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, 616 int stack_offset, int reg, uint ireg, outputStream* st) { 617 // In 64-bit VM size calculation is very complex. Emitting instructions 618 // into scratch buffer is used to get size in 64-bit VM. 619 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 620 if (cbuf) { 621 MacroAssembler _masm(cbuf); 622 int offset = __ offset(); 623 if (is_load) { 624 switch (ireg) { 625 case Op_VecS: 626 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 627 break; 628 case Op_VecD: 629 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 630 break; 631 case Op_VecX: 632 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 633 break; 634 case Op_VecY: 635 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 636 break; 637 default: 638 ShouldNotReachHere(); 639 } 640 } else { // store 641 switch (ireg) { 642 case Op_VecS: 643 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 644 break; 645 case Op_VecD: 646 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 647 break; 648 case Op_VecX: 649 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 650 break; 651 case Op_VecY: 652 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 653 break; 654 default: 655 ShouldNotReachHere(); 656 } 657 } 658 int size = __ offset() - offset; 659 #ifdef ASSERT 660 int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : 4); 661 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 662 assert(!do_size || size == (5+offset_size), "incorrect size calculattion"); 663 #endif 664 return size; 665 #ifndef PRODUCT 666 } else if (!do_size) { 667 if (is_load) { 668 switch (ireg) { 669 case Op_VecS: 670 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 671 break; 672 case Op_VecD: 673 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 674 break; 675 case Op_VecX: 676 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 677 break; 678 case Op_VecY: 679 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 680 break; 681 default: 682 ShouldNotReachHere(); 683 } 684 } else { // store 685 switch (ireg) { 686 case Op_VecS: 687 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 688 break; 689 case Op_VecD: 690 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 691 break; 692 case Op_VecX: 693 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 694 break; 695 case Op_VecY: 696 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 697 break; 698 default: 699 ShouldNotReachHere(); 700 } 701 } 702 #endif 703 } 704 int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : 4); 705 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 706 return 5+offset_size; 707 } 708 709 static inline jfloat replicate4_imm(int con, int width) { 710 // Load a constant of "width" (in bytes) and replicate it to fill 32bit. 711 assert(width == 1 || width == 2, "only byte or short types here"); 712 int bit_width = width * 8; 713 jint val = con; 714 val &= (1 << bit_width) - 1; // mask off sign bits 715 while(bit_width < 32) { 716 val |= (val << bit_width); 717 bit_width <<= 1; 718 } 719 jfloat fval = *((jfloat*) &val); // coerce to float type 720 return fval; 721 } 722 723 static inline jdouble replicate8_imm(int con, int width) { 724 // Load a constant of "width" (in bytes) and replicate it to fill 64bit. 725 assert(width == 1 || width == 2 || width == 4, "only byte, short or int types here"); 726 int bit_width = width * 8; 727 jlong val = con; 728 val &= (((jlong) 1) << bit_width) - 1; // mask off sign bits 729 while(bit_width < 64) { 730 val |= (val << bit_width); 731 bit_width <<= 1; 732 } 733 jdouble dval = *((jdouble*) &val); // coerce to double type 734 return dval; 735 } 736 737 #ifndef PRODUCT 738 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const { 739 st->print("nop \t# %d bytes pad for loops and calls", _count); 740 } 741 #endif 742 743 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const { 744 MacroAssembler _masm(&cbuf); 745 __ nop(_count); 746 } 747 748 uint MachNopNode::size(PhaseRegAlloc*) const { 749 return _count; 750 } 751 752 #ifndef PRODUCT 753 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const { 754 st->print("# breakpoint"); 755 } 756 #endif 757 782 MacroAssembler _masm(&cbuf); 783 __ movptr(rsp, rbp_mh_SP_save); 784 %} 785 786 enc_class call_epilog %{ 787 if (VerifyStackAtCalls) { 788 // Check that stack depth is unchanged: find majik cookie on stack 789 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word)); 790 MacroAssembler _masm(&cbuf); 791 Label L; 792 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d); 793 __ jccb(Assembler::equal, L); 794 // Die if stack mismatch 795 __ int3(); 796 __ bind(L); 797 } 798 %} 799 800 %} 801 802 803 //----------OPERANDS----------------------------------------------------------- 804 // Operand definitions must precede instruction definitions for correct parsing 805 // in the ADLC because operands constitute user defined types which are used in 806 // instruction definitions. 807 808 // Vectors 809 operand vecS() %{ 810 constraint(ALLOC_IN_RC(vectors_reg)); 811 match(VecS); 812 813 format %{ %} 814 interface(REG_INTER); 815 %} 816 817 operand vecD() %{ 818 constraint(ALLOC_IN_RC(vectord_reg)); 819 match(VecD); 820 821 format %{ %} 822 interface(REG_INTER); 823 %} 824 825 operand vecX() %{ 826 constraint(ALLOC_IN_RC(vectorx_reg)); 827 match(VecX); 828 829 format %{ %} 830 interface(REG_INTER); 831 %} 832 833 operand vecY() %{ 834 constraint(ALLOC_IN_RC(vectory_reg)); 835 match(VecY); 836 837 format %{ %} 838 interface(REG_INTER); 839 %} 840 841 842 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit) 843 844 // ============================================================================ 845 846 instruct ShouldNotReachHere() %{ 847 match(Halt); 848 format %{ "int3\t# ShouldNotReachHere" %} 849 ins_encode %{ 850 __ int3(); 851 %} 852 ins_pipe(pipe_slow); 853 %} 854 855 // ============================================================================ 856 857 instruct addF_reg(regF dst, regF src) %{ 858 predicate((UseSSE>=1) && (UseAVX == 0)); 859 match(Set dst (AddF dst src)); 860 861 format %{ "addss $dst, $src" %} 1571 1572 format %{ "sqrtsd $dst, $src" %} 1573 ins_cost(150); 1574 ins_encode %{ 1575 __ sqrtsd($dst$$XMMRegister, $src$$Address); 1576 %} 1577 ins_pipe(pipe_slow); 1578 %} 1579 1580 instruct sqrtD_imm(regD dst, immD con) %{ 1581 predicate(UseSSE>=2); 1582 match(Set dst (SqrtD con)); 1583 format %{ "sqrtsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 1584 ins_cost(150); 1585 ins_encode %{ 1586 __ sqrtsd($dst$$XMMRegister, $constantaddress($con)); 1587 %} 1588 ins_pipe(pipe_slow); 1589 %} 1590 1591 1592 // ====================VECTOR INSTRUCTIONS===================================== 1593 1594 // Load vectors (4 bytes long) 1595 instruct loadV4(vecS dst, memory mem) %{ 1596 predicate(n->as_LoadVector()->memory_size() == 4); 1597 match(Set dst (LoadVector mem)); 1598 ins_cost(125); 1599 format %{ "movd $dst,$mem\t! load vector (4 bytes)" %} 1600 ins_encode %{ 1601 __ movdl($dst$$XMMRegister, $mem$$Address); 1602 %} 1603 ins_pipe( pipe_slow ); 1604 %} 1605 1606 // Load vectors (8 bytes long) 1607 instruct loadV8(vecD dst, memory mem) %{ 1608 predicate(n->as_LoadVector()->memory_size() == 8); 1609 match(Set dst (LoadVector mem)); 1610 ins_cost(125); 1611 format %{ "movq $dst,$mem\t! load vector (8 bytes)" %} 1612 ins_encode %{ 1613 __ movq($dst$$XMMRegister, $mem$$Address); 1614 %} 1615 ins_pipe( pipe_slow ); 1616 %} 1617 1618 // Load vectors (16 bytes long) 1619 instruct loadV16(vecX dst, memory mem) %{ 1620 predicate(n->as_LoadVector()->memory_size() == 16); 1621 match(Set dst (LoadVector mem)); 1622 ins_cost(125); 1623 format %{ "movdqu $dst,$mem\t! load vector (16 bytes)" %} 1624 ins_encode %{ 1625 __ movdqu($dst$$XMMRegister, $mem$$Address); 1626 %} 1627 ins_pipe( pipe_slow ); 1628 %} 1629 1630 // Load vectors (32 bytes long) 1631 instruct loadV32(vecY dst, memory mem) %{ 1632 predicate(n->as_LoadVector()->memory_size() == 32); 1633 match(Set dst (LoadVector mem)); 1634 ins_cost(125); 1635 format %{ "vmovdqu $dst,$mem\t! load vector (32 bytes)" %} 1636 ins_encode %{ 1637 __ vmovdqu($dst$$XMMRegister, $mem$$Address); 1638 %} 1639 ins_pipe( pipe_slow ); 1640 %} 1641 1642 // Store vectors 1643 instruct storeV4(memory mem, vecS src) %{ 1644 predicate(n->as_StoreVector()->memory_size() == 4); 1645 match(Set mem (StoreVector mem src)); 1646 ins_cost(145); 1647 format %{ "movd $mem,$src\t! store vector (4 bytes)" %} 1648 ins_encode %{ 1649 __ movdl($mem$$Address, $src$$XMMRegister); 1650 %} 1651 ins_pipe( pipe_slow ); 1652 %} 1653 1654 instruct storeV8(memory mem, vecD src) %{ 1655 predicate(n->as_StoreVector()->memory_size() == 8); 1656 match(Set mem (StoreVector mem src)); 1657 ins_cost(145); 1658 format %{ "movq $mem,$src\t! store vector (8 bytes)" %} 1659 ins_encode %{ 1660 __ movq($mem$$Address, $src$$XMMRegister); 1661 %} 1662 ins_pipe( pipe_slow ); 1663 %} 1664 1665 instruct storeV16(memory mem, vecX src) %{ 1666 predicate(n->as_StoreVector()->memory_size() == 16); 1667 match(Set mem (StoreVector mem src)); 1668 ins_cost(145); 1669 format %{ "movdqu $mem,$src\t! store vector (16 bytes)" %} 1670 ins_encode %{ 1671 __ movdqu($mem$$Address, $src$$XMMRegister); 1672 %} 1673 ins_pipe( pipe_slow ); 1674 %} 1675 1676 instruct storeV32(memory mem, vecY src) %{ 1677 predicate(n->as_StoreVector()->memory_size() == 32); 1678 match(Set mem (StoreVector mem src)); 1679 ins_cost(145); 1680 format %{ "vmovdqu $mem,$src\t! store vector (32 bytes)" %} 1681 ins_encode %{ 1682 __ vmovdqu($mem$$Address, $src$$XMMRegister); 1683 %} 1684 ins_pipe( pipe_slow ); 1685 %} 1686 1687 // Replicate byte scalar to be vector 1688 instruct Repl4B(vecS dst, rRegI src) %{ 1689 predicate(n->as_Vector()->length() == 4); 1690 match(Set dst (ReplicateB src)); 1691 format %{ "movd $dst,$src\n\t" 1692 "punpcklbw $dst,$dst\n\t" 1693 "pshuflw $dst,$dst,0x00\t! replicate4B" %} 1694 ins_encode %{ 1695 __ movdl($dst$$XMMRegister, $src$$Register); 1696 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 1697 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1698 %} 1699 ins_pipe( pipe_slow ); 1700 %} 1701 1702 instruct Repl8B(vecD dst, rRegI src) %{ 1703 predicate(n->as_Vector()->length() == 8); 1704 match(Set dst (ReplicateB src)); 1705 format %{ "movd $dst,$src\n\t" 1706 "punpcklbw $dst,$dst\n\t" 1707 "pshuflw $dst,$dst,0x00\t! replicate8B" %} 1708 ins_encode %{ 1709 __ movdl($dst$$XMMRegister, $src$$Register); 1710 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 1711 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1712 %} 1713 ins_pipe( pipe_slow ); 1714 %} 1715 1716 instruct Repl16B(vecX dst, rRegI src) %{ 1717 predicate(n->as_Vector()->length() == 16); 1718 match(Set dst (ReplicateB src)); 1719 format %{ "movd $dst,$src\n\t" 1720 "punpcklbw $dst,$dst\n\t" 1721 "pshuflw $dst,$dst,0x00\n\t" 1722 "movlhps $dst,$dst\t! replicate16B" %} 1723 ins_encode %{ 1724 __ movdl($dst$$XMMRegister, $src$$Register); 1725 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 1726 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1727 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); 1728 %} 1729 ins_pipe( pipe_slow ); 1730 %} 1731 1732 instruct Repl32B(vecY dst, rRegI src) %{ 1733 predicate(n->as_Vector()->length() == 32); 1734 match(Set dst (ReplicateB src)); 1735 format %{ "movd $dst,$src\n\t" 1736 "punpcklbw $dst,$dst\n\t" 1737 "pshuflw $dst,$dst,0x00\n\t" 1738 "movlhps $dst,$dst\n\t" 1739 "vinsertf128h $dst,$dst,$dst\t! replicate32B" %} 1740 ins_encode %{ 1741 __ movdl($dst$$XMMRegister, $src$$Register); 1742 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 1743 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1744 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); 1745 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 1746 %} 1747 ins_pipe( pipe_slow ); 1748 %} 1749 1750 // Replicate byte scalar immediate to be vector by loading from const table. 1751 instruct Repl4B_imm(vecS dst, immI con) %{ 1752 predicate(n->as_Vector()->length() == 4); 1753 match(Set dst (ReplicateB con)); 1754 format %{ "movss $dst,[$constantaddress]\t! replicate4B($con)" %} 1755 ins_encode %{ 1756 __ movflt($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 1))); 1757 %} 1758 ins_pipe( pipe_slow ); 1759 %} 1760 1761 instruct Repl8B_imm(vecD dst, immI con) %{ 1762 predicate(n->as_Vector()->length() == 8); 1763 match(Set dst (ReplicateB con)); 1764 format %{ "movsd $dst,[$constantaddress]\t! replicate8B($con)" %} 1765 ins_encode %{ 1766 __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 1767 %} 1768 ins_pipe( pipe_slow ); 1769 %} 1770 1771 instruct Repl16B_imm(vecX dst, immI con) %{ 1772 predicate(n->as_Vector()->length() == 16); 1773 match(Set dst (ReplicateB con)); 1774 format %{ "movsd $dst,[$constantaddress]\t! replicate16B($con)\n\t" 1775 "movlhps $dst,$dst" %} 1776 ins_encode %{ 1777 __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 1778 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); 1779 %} 1780 ins_pipe( pipe_slow ); 1781 %} 1782 1783 instruct Repl32B_imm(vecY dst, immI con) %{ 1784 predicate(n->as_Vector()->length() == 32); 1785 match(Set dst (ReplicateB con)); 1786 format %{ "movsd $dst,[$constantaddress]\t! lreplicate32B($con)\n\t" 1787 "movlhps $dst,$dst\n\t" 1788 "vinsertf128h $dst,$dst,$dst" %} 1789 ins_encode %{ 1790 __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 1791 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); 1792 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 1793 %} 1794 ins_pipe( pipe_slow ); 1795 %} 1796 1797 // Replicate byte scalar zero to be vector 1798 instruct Repl4B_zero(vecS dst, immI0 zero) %{ 1799 predicate(n->as_Vector()->length() == 4); 1800 match(Set dst (ReplicateB zero)); 1801 format %{ "pxor $dst,$dst\t! replicate4B zero" %} 1802 ins_encode %{ 1803 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 1804 %} 1805 ins_pipe( fpu_reg_reg ); 1806 %} 1807 1808 instruct Repl8B_zero(vecD dst, immI0 zero) %{ 1809 predicate(n->as_Vector()->length() == 8); 1810 match(Set dst (ReplicateB zero)); 1811 format %{ "pxor $dst,$dst\t! replicate8B zero" %} 1812 ins_encode %{ 1813 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 1814 %} 1815 ins_pipe( fpu_reg_reg ); 1816 %} 1817 1818 instruct Repl16B_zero(vecX dst, immI0 zero) %{ 1819 predicate(n->as_Vector()->length() == 16); 1820 match(Set dst (ReplicateB zero)); 1821 format %{ "pxor $dst,$dst\t! replicate16B zero" %} 1822 ins_encode %{ 1823 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 1824 %} 1825 ins_pipe( fpu_reg_reg ); 1826 %} 1827 1828 instruct Repl32B_zero(vecY dst, immI0 zero) %{ 1829 predicate(n->as_Vector()->length() == 32); 1830 match(Set dst (ReplicateB zero)); 1831 format %{ "vxorpd $dst,$dst,$dst\t! replicate32B zero" %} 1832 ins_encode %{ 1833 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 1834 bool vector256 = true; 1835 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); 1836 %} 1837 ins_pipe( fpu_reg_reg ); 1838 %} 1839 1840 // Replicate short (2 byte) scalar to be vector 1841 instruct Repl2S(vecS dst, rRegI src) %{ 1842 predicate(n->as_Vector()->length() == 2); 1843 match(Set dst (ReplicateS src)); 1844 format %{ "movd $dst,$src\n\t" 1845 "pshuflw $dst,$dst,0x00\t! replicate2S" %} 1846 ins_encode %{ 1847 __ movdl($dst$$XMMRegister, $src$$Register); 1848 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1849 %} 1850 ins_pipe( fpu_reg_reg ); 1851 %} 1852 1853 instruct Repl4S(vecD dst, rRegI src) %{ 1854 predicate(n->as_Vector()->length() == 4); 1855 match(Set dst (ReplicateS src)); 1856 format %{ "movd $dst,$src\n\t" 1857 "pshuflw $dst,$dst,0x00\t! replicate4S" %} 1858 ins_encode %{ 1859 __ movdl($dst$$XMMRegister, $src$$Register); 1860 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1861 %} 1862 ins_pipe( fpu_reg_reg ); 1863 %} 1864 1865 instruct Repl8S(vecX dst, rRegI src) %{ 1866 predicate(n->as_Vector()->length() == 8); 1867 match(Set dst (ReplicateS src)); 1868 format %{ "movd $dst,$src\n\t" 1869 "pshuflw $dst,$dst,0x00\n\t" 1870 "movlhps $dst,$dst\t! replicate8S" %} 1871 ins_encode %{ 1872 __ movdl($dst$$XMMRegister, $src$$Register); 1873 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1874 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); 1875 %} 1876 ins_pipe( pipe_slow ); 1877 %} 1878 1879 instruct Repl16S(vecY dst, rRegI src) %{ 1880 predicate(n->as_Vector()->length() == 16); 1881 match(Set dst (ReplicateS src)); 1882 format %{ "movd $dst,$src\n\t" 1883 "pshuflw $dst,$dst,0x00\n\t" 1884 "movlhps $dst,$dst\n\t" 1885 "vinsertf128h $dst,$dst,$dst\t! replicate16S" %} 1886 ins_encode %{ 1887 __ movdl($dst$$XMMRegister, $src$$Register); 1888 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1889 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); 1890 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 1891 %} 1892 ins_pipe( pipe_slow ); 1893 %} 1894 1895 // Replicate short (2 byte) scalar immediate to be vector by loading from const table. 1896 instruct Repl2S_imm(vecS dst, immI con) %{ 1897 predicate(n->as_Vector()->length() == 2); 1898 match(Set dst (ReplicateS con)); 1899 format %{ "movss $dst,[$constantaddress]\t! replicate2S($con)" %} 1900 ins_encode %{ 1901 __ movflt($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 2))); 1902 %} 1903 ins_pipe( fpu_reg_reg ); 1904 %} 1905 1906 instruct Repl4S_imm(vecD dst, immI con) %{ 1907 predicate(n->as_Vector()->length() == 4); 1908 match(Set dst (ReplicateS con)); 1909 format %{ "movsd $dst,[$constantaddress]\t! replicate4S($con)" %} 1910 ins_encode %{ 1911 __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 1912 %} 1913 ins_pipe( fpu_reg_reg ); 1914 %} 1915 1916 instruct Repl8S_imm(vecX dst, immI con) %{ 1917 predicate(n->as_Vector()->length() == 8); 1918 match(Set dst (ReplicateS con)); 1919 format %{ "movsd $dst,[$constantaddress]\t! replicate8S($con)\n\t" 1920 "movlhps $dst,$dst" %} 1921 ins_encode %{ 1922 __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 1923 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); 1924 %} 1925 ins_pipe( pipe_slow ); 1926 %} 1927 1928 instruct Repl16S_imm(vecY dst, immI con) %{ 1929 predicate(n->as_Vector()->length() == 16); 1930 match(Set dst (ReplicateS con)); 1931 format %{ "movsd $dst,[$constantaddress]\t! replicate16S($con)\n\t" 1932 "movlhps $dst,$dst\n\t" 1933 "vinsertf128h $dst,$dst,$dst" %} 1934 ins_encode %{ 1935 __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 1936 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); 1937 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 1938 %} 1939 ins_pipe( pipe_slow ); 1940 %} 1941 1942 // Replicate short (2 byte) scalar zero to be vector 1943 instruct Repl2S_zero(vecS dst, immI0 zero) %{ 1944 predicate(n->as_Vector()->length() == 2); 1945 match(Set dst (ReplicateS zero)); 1946 format %{ "pxor $dst,$dst\t! replicate2S zero" %} 1947 ins_encode %{ 1948 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 1949 %} 1950 ins_pipe( fpu_reg_reg ); 1951 %} 1952 1953 instruct Repl4S_zero(vecD dst, immI0 zero) %{ 1954 predicate(n->as_Vector()->length() == 4); 1955 match(Set dst (ReplicateS zero)); 1956 format %{ "pxor $dst,$dst\t! replicate4S zero" %} 1957 ins_encode %{ 1958 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 1959 %} 1960 ins_pipe( fpu_reg_reg ); 1961 %} 1962 1963 instruct Repl8S_zero(vecX dst, immI0 zero) %{ 1964 predicate(n->as_Vector()->length() == 8); 1965 match(Set dst (ReplicateS zero)); 1966 format %{ "pxor $dst,$dst\t! replicate8S zero" %} 1967 ins_encode %{ 1968 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 1969 %} 1970 ins_pipe( fpu_reg_reg ); 1971 %} 1972 1973 instruct Repl16S_zero(vecY dst, immI0 zero) %{ 1974 predicate(n->as_Vector()->length() == 16); 1975 match(Set dst (ReplicateS zero)); 1976 format %{ "vxorpd $dst,$dst,$dst\t! replicate16S zero" %} 1977 ins_encode %{ 1978 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 1979 bool vector256 = true; 1980 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); 1981 %} 1982 ins_pipe( fpu_reg_reg ); 1983 %} 1984 1985 // Replicate char (2 byte) scalar to be vector 1986 instruct Repl2C(vecS dst, rRegI src) %{ 1987 predicate(n->as_Vector()->length() == 2); 1988 match(Set dst (ReplicateC src)); 1989 format %{ "movd $dst,$src\n\t" 1990 "pshuflw $dst,$dst,0x00\t! replicate2C" %} 1991 ins_encode %{ 1992 __ movdl($dst$$XMMRegister, $src$$Register); 1993 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1994 %} 1995 ins_pipe( fpu_reg_reg ); 1996 %} 1997 1998 instruct Repl4C(vecD dst, rRegI src) %{ 1999 predicate(n->as_Vector()->length() == 4); 2000 match(Set dst (ReplicateC src)); 2001 format %{ "movd $dst,$src\n\t" 2002 "pshuflw $dst,$dst,0x00\t! replicate4C" %} 2003 ins_encode %{ 2004 __ movdl($dst$$XMMRegister, $src$$Register); 2005 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2006 %} 2007 ins_pipe( fpu_reg_reg ); 2008 %} 2009 2010 instruct Repl8C(vecX dst, rRegI src) %{ 2011 predicate(n->as_Vector()->length() == 8); 2012 match(Set dst (ReplicateC src)); 2013 format %{ "movd $dst,$src\n\t" 2014 "pshuflw $dst,$dst,0x00\n\t" 2015 "movlhps $dst,$dst\t! replicate8C" %} 2016 ins_encode %{ 2017 __ movdl($dst$$XMMRegister, $src$$Register); 2018 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2019 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); 2020 %} 2021 ins_pipe( pipe_slow ); 2022 %} 2023 2024 instruct Repl16C(vecY dst, rRegI src) %{ 2025 predicate(n->as_Vector()->length() == 16); 2026 match(Set dst (ReplicateC src)); 2027 format %{ "movd $dst,$src\n\t" 2028 "pshuflw $dst,$dst,0x00\n\t" 2029 "movlhps $dst,$dst\n\t" 2030 "vinsertf128h $dst,$dst,$dst\t! replicate16C" %} 2031 ins_encode %{ 2032 __ movdl($dst$$XMMRegister, $src$$Register); 2033 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2034 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); 2035 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2036 %} 2037 ins_pipe( pipe_slow ); 2038 %} 2039 2040 // Replicate char (2 byte) scalar immediate to be vector by loading from const table. 2041 instruct Repl2C_imm(vecS dst, immI con) %{ 2042 predicate(n->as_Vector()->length() == 2); 2043 match(Set dst (ReplicateC con)); 2044 format %{ "movss $dst,[$constantaddress]\t! replicate2C($con)" %} 2045 ins_encode %{ 2046 __ movflt($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 2))); 2047 %} 2048 ins_pipe( fpu_reg_reg ); 2049 %} 2050 2051 instruct Repl4C_imm(vecD dst, immI con) %{ 2052 predicate(n->as_Vector()->length() == 4); 2053 match(Set dst (ReplicateC con)); 2054 format %{ "movsd $dst,[$constantaddress]\t! replicate4C($con)" %} 2055 ins_encode %{ 2056 __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 2057 %} 2058 ins_pipe( fpu_reg_reg ); 2059 %} 2060 2061 instruct Repl8C_imm(vecX dst, immI con) %{ 2062 predicate(n->as_Vector()->length() == 8); 2063 match(Set dst (ReplicateC con)); 2064 format %{ "movsd $dst,[$constantaddress]\t! replicate8C($con)\n\t" 2065 "movlhps $dst,$dst" %} 2066 ins_encode %{ 2067 __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 2068 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); 2069 %} 2070 ins_pipe( pipe_slow ); 2071 %} 2072 2073 instruct Repl16C_imm(vecY dst, immI con) %{ 2074 predicate(n->as_Vector()->length() == 16); 2075 match(Set dst (ReplicateC con)); 2076 format %{ "movsd $dst,[$constantaddress]\t! replicate16C($con)\n\t" 2077 "movlhps $dst,$dst\n\t" 2078 "vinsertf128h $dst,$dst,$dst" %} 2079 ins_encode %{ 2080 __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 2081 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); 2082 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2083 %} 2084 ins_pipe( pipe_slow ); 2085 %} 2086 2087 // Replicate char (2 byte) scalar zero to be vector 2088 instruct Repl2C_zero(vecS dst, immI0 zero) %{ 2089 predicate(n->as_Vector()->length() == 2); 2090 match(Set dst (ReplicateC zero)); 2091 format %{ "pxor $dst,$dst\t! replicate2C zero" %} 2092 ins_encode %{ 2093 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 2094 %} 2095 ins_pipe( fpu_reg_reg ); 2096 %} 2097 2098 instruct Repl4C_zero(vecD dst, immI0 zero) %{ 2099 predicate(n->as_Vector()->length() == 4); 2100 match(Set dst (ReplicateC zero)); 2101 format %{ "pxor $dst,$dst\t! replicate4C zero" %} 2102 ins_encode %{ 2103 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 2104 %} 2105 ins_pipe( fpu_reg_reg ); 2106 %} 2107 2108 instruct Repl8C_zero(vecX dst, immI0 zero) %{ 2109 predicate(n->as_Vector()->length() == 8); 2110 match(Set dst (ReplicateC zero)); 2111 format %{ "pxor $dst,$dst\t! replicate8C zero" %} 2112 ins_encode %{ 2113 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 2114 %} 2115 ins_pipe( fpu_reg_reg ); 2116 %} 2117 2118 instruct Repl16C_zero(vecY dst, immI0 zero) %{ 2119 predicate(n->as_Vector()->length() == 16); 2120 match(Set dst (ReplicateC zero)); 2121 format %{ "vxorpd $dst,$dst,$dst\t! replicate16C zero" %} 2122 ins_encode %{ 2123 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 2124 bool vector256 = true; 2125 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); 2126 %} 2127 ins_pipe( fpu_reg_reg ); 2128 %} 2129 2130 // Replicate integer (4 byte) scalar to be vector 2131 instruct Repl2I(vecD dst, rRegI src) %{ 2132 predicate(n->as_Vector()->length() == 2); 2133 match(Set dst (ReplicateI src)); 2134 format %{ "movd $dst,$src\n\t" 2135 "pshufd $dst,$dst,0x00\t! replicate2I" %} 2136 ins_encode %{ 2137 __ movdl($dst$$XMMRegister, $src$$Register); 2138 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2139 %} 2140 ins_pipe( fpu_reg_reg ); 2141 %} 2142 2143 instruct Repl4I(vecX dst, rRegI src) %{ 2144 predicate(n->as_Vector()->length() == 4); 2145 match(Set dst (ReplicateI src)); 2146 format %{ "movd $dst,$src\n\t" 2147 "pshufd $dst,$dst,0x00\t! replicate4I" %} 2148 ins_encode %{ 2149 __ movdl($dst$$XMMRegister, $src$$Register); 2150 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2151 %} 2152 ins_pipe( pipe_slow ); 2153 %} 2154 2155 instruct Repl8I(vecY dst, rRegI src) %{ 2156 predicate(n->as_Vector()->length() == 8); 2157 match(Set dst (ReplicateI src)); 2158 format %{ "movd $dst,$src\n\t" 2159 "pshufd $dst,$dst,0x00\n\t" 2160 "vinsertf128h $dst,$dst,$dst\t! replicate8I" %} 2161 ins_encode %{ 2162 __ movdl($dst$$XMMRegister, $src$$Register); 2163 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2164 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2165 %} 2166 ins_pipe( pipe_slow ); 2167 %} 2168 2169 // Replicate integer (4 byte) scalar immediate to be vector by loading from const table. 2170 instruct Repl2I_imm(vecD dst, immI con) %{ 2171 predicate(n->as_Vector()->length() == 2); 2172 match(Set dst (ReplicateI con)); 2173 format %{ "movsd $dst,[$constantaddress]\t! replicate2I($con)" %} 2174 ins_encode %{ 2175 __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 2176 %} 2177 ins_pipe( fpu_reg_reg ); 2178 %} 2179 2180 instruct Repl4I_imm(vecX dst, immI con) %{ 2181 predicate(n->as_Vector()->length() == 4); 2182 match(Set dst (ReplicateI con)); 2183 format %{ "movsd $dst,[$constantaddress]\t! replicate4I($con)\n\t" 2184 "movlhps $dst,$dst" %} 2185 ins_encode %{ 2186 __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 2187 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); 2188 %} 2189 ins_pipe( pipe_slow ); 2190 %} 2191 2192 instruct Repl8I_imm(vecY dst, immI con) %{ 2193 predicate(n->as_Vector()->length() == 8); 2194 match(Set dst (ReplicateI con)); 2195 format %{ "movsd $dst,[$constantaddress]\t! replicate8I($con)\n\t" 2196 "movlhps $dst,$dst\n\t" 2197 "vinsertf128h $dst,$dst,$dst" %} 2198 ins_encode %{ 2199 __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 2200 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); 2201 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2202 %} 2203 ins_pipe( pipe_slow ); 2204 %} 2205 2206 // Integer could be loaded into xmm register directly from memory. 2207 instruct Repl2I_mem(vecD dst, memory mem) %{ 2208 predicate(n->as_Vector()->length() == 2); 2209 match(Set dst (ReplicateI mem)); 2210 format %{ "movd $dst,$mem\n\t" 2211 "pshufd $dst,$dst,0x00\t! replicate2I" %} 2212 ins_encode %{ 2213 __ movdl($dst$$XMMRegister, $mem$$Address); 2214 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2215 %} 2216 ins_pipe( fpu_reg_reg ); 2217 %} 2218 2219 instruct Repl4I_mem(vecX dst, memory mem) %{ 2220 predicate(n->as_Vector()->length() == 4); 2221 match(Set dst (ReplicateI mem)); 2222 format %{ "movd $dst,$mem\n\t" 2223 "pshufd $dst,$dst,0x00\t! replicate4I" %} 2224 ins_encode %{ 2225 __ movdl($dst$$XMMRegister, $mem$$Address); 2226 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2227 %} 2228 ins_pipe( pipe_slow ); 2229 %} 2230 2231 instruct Repl8I_mem(vecY dst, memory mem) %{ 2232 predicate(n->as_Vector()->length() == 8); 2233 match(Set dst (ReplicateI mem)); 2234 format %{ "movd $dst,$mem\n\t" 2235 "pshufd $dst,$dst,0x00\n\t" 2236 "vinsertf128h $dst,$dst,$dst\t! replicate8I" %} 2237 ins_encode %{ 2238 __ movdl($dst$$XMMRegister, $mem$$Address); 2239 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2240 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2241 %} 2242 ins_pipe( pipe_slow ); 2243 %} 2244 2245 // Replicate integer (4 byte) scalar zero to be vector 2246 instruct Repl2I_zero(vecD dst, immI0 zero) %{ 2247 predicate(n->as_Vector()->length() == 2); 2248 match(Set dst (ReplicateI zero)); 2249 format %{ "pxor $dst,$dst\t! replicate2I" %} 2250 ins_encode %{ 2251 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 2252 %} 2253 ins_pipe( fpu_reg_reg ); 2254 %} 2255 2256 instruct Repl4I_zero(vecX dst, immI0 zero) %{ 2257 predicate(n->as_Vector()->length() == 4); 2258 match(Set dst (ReplicateI zero)); 2259 format %{ "pxor $dst,$dst\t! replicate4I zero)" %} 2260 ins_encode %{ 2261 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 2262 %} 2263 ins_pipe( fpu_reg_reg ); 2264 %} 2265 2266 instruct Repl8I_zero(vecY dst, immI0 zero) %{ 2267 predicate(n->as_Vector()->length() == 8); 2268 match(Set dst (ReplicateI zero)); 2269 format %{ "vxorpd $dst,$dst,$dst\t! replicate8I zero" %} 2270 ins_encode %{ 2271 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 2272 bool vector256 = true; 2273 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); 2274 %} 2275 ins_pipe( fpu_reg_reg ); 2276 %} 2277 2278 // Replicate long (8 byte) scalar to be vector 2279 #ifdef _LP64 2280 instruct Repl2L(vecX dst, rRegL src) %{ 2281 predicate(n->as_Vector()->length() == 2); 2282 match(Set dst (ReplicateL src)); 2283 format %{ "movdq $dst,$src\n\t" 2284 "movlhps $dst,$dst\t! replicate2L" %} 2285 ins_encode %{ 2286 __ movdq($dst$$XMMRegister, $src$$Register); 2287 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); 2288 %} 2289 ins_pipe( pipe_slow ); 2290 %} 2291 2292 instruct Repl4L(vecY dst, rRegL src) %{ 2293 predicate(n->as_Vector()->length() == 4); 2294 match(Set dst (ReplicateL src)); 2295 format %{ "movdq $dst,$src\n\t" 2296 "movlhps $dst,$dst\n\t" 2297 "vinsertf128h $dst,$dst,$dst\t! replicate4L" %} 2298 ins_encode %{ 2299 __ movdq($dst$$XMMRegister, $src$$Register); 2300 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); 2301 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2302 %} 2303 ins_pipe( pipe_slow ); 2304 %} 2305 #else // _LP64 2306 instruct Repl2L(vecX dst, eRegL src, regD tmp) %{ 2307 predicate(n->as_Vector()->length() == 2); 2308 match(Set dst (ReplicateL src)); 2309 effect(TEMP dst, USE src, TEMP tmp); 2310 format %{ "movdl $dst,$src.lo\n\t" 2311 "movdl $tmp,$src.hi\n\t" 2312 "punpckldq $dst,$tmp\n\t" 2313 "movlhps $dst,$dst\t! replicate2L"%} 2314 ins_encode %{ 2315 __ movdl($dst$$XMMRegister, $src$$Register); 2316 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 2317 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 2318 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); 2319 %} 2320 ins_pipe( pipe_slow ); 2321 %} 2322 2323 instruct Repl4L(vecY dst, eRegL src, regD tmp) %{ 2324 predicate(n->as_Vector()->length() == 4); 2325 match(Set dst (ReplicateL src)); 2326 effect(TEMP dst, USE src, TEMP tmp); 2327 format %{ "movdl $dst,$src.lo\n\t" 2328 "movdl $tmp,$src.hi\n\t" 2329 "punpckldq $dst,$tmp\n\t" 2330 "movlhps $dst,$dst\n\t" 2331 "vinsertf128h $dst,$dst,$dst\t! replicate4L" %} 2332 ins_encode %{ 2333 __ movdl($dst$$XMMRegister, $src$$Register); 2334 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 2335 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 2336 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); 2337 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2338 %} 2339 ins_pipe( pipe_slow ); 2340 %} 2341 #endif // _LP64 2342 2343 // Replicate long (8 byte) scalar immediate to be vector by loading from const table. 2344 instruct Repl2L_imm(vecX dst, immL con) %{ 2345 predicate(n->as_Vector()->length() == 2); 2346 match(Set dst (ReplicateL con)); 2347 format %{ "movsd $dst,[$constantaddress]\t! replicate2L($con)\n\t" 2348 "movlhps $dst,$dst" %} 2349 ins_encode %{ 2350 __ movdbl($dst$$XMMRegister, $constantaddress($con)); 2351 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); 2352 %} 2353 ins_pipe( pipe_slow ); 2354 %} 2355 2356 instruct Repl4L_imm(vecY dst, immL con) %{ 2357 predicate(n->as_Vector()->length() == 4); 2358 match(Set dst (ReplicateL con)); 2359 format %{ "movsd $dst,[$constantaddress]\t! replicate4L($con)\n\t" 2360 "movlhps $dst,$dst\n\t" 2361 "vinsertf128h $dst,$dst,$dst" %} 2362 ins_encode %{ 2363 __ movdbl($dst$$XMMRegister, $constantaddress($con)); 2364 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); 2365 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2366 %} 2367 ins_pipe( pipe_slow ); 2368 %} 2369 2370 // Long could be loaded into xmm register directly from memory. 2371 instruct Repl2L_mem(vecX dst, memory mem) %{ 2372 predicate(n->as_Vector()->length() == 2); 2373 match(Set dst (ReplicateL mem)); 2374 format %{ "movq $dst,$mem\n\t" 2375 "movlhps $dst,$dst\t! replicate2L" %} 2376 ins_encode %{ 2377 __ movq($dst$$XMMRegister, $mem$$Address); 2378 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); 2379 %} 2380 ins_pipe( pipe_slow ); 2381 %} 2382 2383 instruct Repl4L_mem(vecY dst, memory mem) %{ 2384 predicate(n->as_Vector()->length() == 4); 2385 match(Set dst (ReplicateL mem)); 2386 format %{ "movq $dst,$mem\n\t" 2387 "movlhps $dst,$dst\n\t" 2388 "vinsertf128h $dst,$dst,$dst\t! replicate4L" %} 2389 ins_encode %{ 2390 __ movq($dst$$XMMRegister, $mem$$Address); 2391 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); 2392 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2393 %} 2394 ins_pipe( pipe_slow ); 2395 %} 2396 2397 // Replicate long (8 byte) scalar zero to be vector 2398 instruct Repl2L_zero(vecX dst, immL0 zero) %{ 2399 predicate(n->as_Vector()->length() == 2); 2400 match(Set dst (ReplicateL zero)); 2401 format %{ "pxor $dst,$dst\t! replicate2L zero" %} 2402 ins_encode %{ 2403 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 2404 %} 2405 ins_pipe( fpu_reg_reg ); 2406 %} 2407 2408 instruct Repl4L_zero(vecY dst, immL0 zero) %{ 2409 predicate(n->as_Vector()->length() == 4); 2410 match(Set dst (ReplicateL zero)); 2411 format %{ "vxorpd $dst,$dst,$dst\t! replicate4L zero" %} 2412 ins_encode %{ 2413 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 2414 bool vector256 = true; 2415 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); 2416 %} 2417 ins_pipe( fpu_reg_reg ); 2418 %} 2419 2420 // Replicate float (4 byte) scalar to be vector 2421 instruct Repl2F(vecD dst, regF src) %{ 2422 predicate(n->as_Vector()->length() == 2); 2423 match(Set dst (ReplicateF src)); 2424 format %{ "pshufd $dst,$dst,0x00\t! replicate2F" %} 2425 ins_encode %{ 2426 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 2427 %} 2428 ins_pipe( fpu_reg_reg ); 2429 %} 2430 2431 instruct Repl4F(vecX dst, regF src) %{ 2432 predicate(n->as_Vector()->length() == 4); 2433 match(Set dst (ReplicateF src)); 2434 format %{ "pshufd $dst,$dst,0x00\t! replicate4F" %} 2435 ins_encode %{ 2436 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 2437 %} 2438 ins_pipe( pipe_slow ); 2439 %} 2440 2441 instruct Repl8F(vecY dst, regF src) %{ 2442 predicate(n->as_Vector()->length() == 8); 2443 match(Set dst (ReplicateF src)); 2444 format %{ "pshufd $dst,$src,0x00\n\t" 2445 "vinsertf128h $dst,$dst,$dst\t! replicate8F" %} 2446 ins_encode %{ 2447 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 2448 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2449 %} 2450 ins_pipe( pipe_slow ); 2451 %} 2452 2453 // Replicate float (4 byte) scalar zero to be vector 2454 instruct Repl2F_zero(vecD dst, immF0 zero) %{ 2455 predicate(n->as_Vector()->length() == 2); 2456 match(Set dst (ReplicateF zero)); 2457 format %{ "xorps $dst,$dst\t! replicate2F zero" %} 2458 ins_encode %{ 2459 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 2460 %} 2461 ins_pipe( fpu_reg_reg ); 2462 %} 2463 2464 instruct Repl4F_zero(vecX dst, immF0 zero) %{ 2465 predicate(n->as_Vector()->length() == 4); 2466 match(Set dst (ReplicateF zero)); 2467 format %{ "xorps $dst,$dst\t! replicate4F zero" %} 2468 ins_encode %{ 2469 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 2470 %} 2471 ins_pipe( fpu_reg_reg ); 2472 %} 2473 2474 instruct Repl8F_zero(vecY dst, immF0 zero) %{ 2475 predicate(n->as_Vector()->length() == 8); 2476 match(Set dst (ReplicateF zero)); 2477 format %{ "vxorps $dst,$dst,$dst\t! replicate8F zero" %} 2478 ins_encode %{ 2479 bool vector256 = true; 2480 __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); 2481 %} 2482 ins_pipe( fpu_reg_reg ); 2483 %} 2484 2485 // Replicate double (8 bytes) scalar to be vector 2486 instruct Repl2D(vecX dst, regD src) %{ 2487 predicate(n->as_Vector()->length() == 2); 2488 match(Set dst (ReplicateD src)); 2489 format %{ "pshufd $dst,$src,0x44\t! replicate2D" %} 2490 ins_encode %{ 2491 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 2492 %} 2493 ins_pipe( pipe_slow ); 2494 %} 2495 2496 instruct Repl4D(vecY dst, regD src) %{ 2497 predicate(n->as_Vector()->length() == 4); 2498 match(Set dst (ReplicateD src)); 2499 format %{ "pshufd $dst,$src,0x44\n\t" 2500 "vinsertf128h $dst,$dst,$dst\t! replicate4D" %} 2501 ins_encode %{ 2502 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 2503 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2504 %} 2505 ins_pipe( pipe_slow ); 2506 %} 2507 2508 // Replicate double (8 byte) scalar zero to be vector 2509 instruct Repl2D_zero(vecX dst, immD0 zero) %{ 2510 predicate(n->as_Vector()->length() == 2); 2511 match(Set dst (ReplicateD zero)); 2512 format %{ "xorpd $dst,$dst\t! replicate2D zero" %} 2513 ins_encode %{ 2514 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister); 2515 %} 2516 ins_pipe( fpu_reg_reg ); 2517 %} 2518 2519 instruct Repl4D_zero(vecY dst, immD0 zero) %{ 2520 predicate(n->as_Vector()->length() == 4); 2521 match(Set dst (ReplicateD zero)); 2522 format %{ "vxorpd $dst,$dst,$dst,vect256\t! replicate4D zero" %} 2523 ins_encode %{ 2524 bool vector256 = true; 2525 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); 2526 %} 2527 ins_pipe( fpu_reg_reg ); 2528 %} 2529 |