42 //
43 // SOC = Save-On-Call: The register allocator assumes that these registers
44 // can be used without saving upon entry to the method,
45 // but that they must be saved at call sites.
46 //
47 // SOE = Save-On-Entry: The register allocator assumes that these registers
48 // must be saved before using them upon entry to the
49 // method, but they do not need to be saved at call
50 // sites.
51 //
52 // AS = Always-Save: The register allocator assumes that these registers
53 // must be saved before using them upon entry to the
54 // method, & that they must be saved at call sites.
55 //
56 // Ideal Register Type is used to determine how to save & restore a
57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI.
59 //
60 // The encoding number is the actual bit-pattern placed into the opcodes.
61
62 // XMM registers. 256-bit registers or 8 words each, labeled (a)-h.
63 // Word a in each register holds a Float, words ab hold a Double.
64 // The whole registers are used in SSE4.2 version intrinsics,
65 // array copy stubs and superword operations (see UseSSE42Intrinsics,
66 // UseXMMForArrayCopy and UseSuperword flags).
67 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX).
68 // Linux ABI: No register preserved across function calls
69 // XMM0-XMM7 might hold parameters
70 // Windows ABI: XMM6-XMM15 preserved across function calls
71 // XMM0-XMM3 might hold parameters
72
73 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
74 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1));
75 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2));
76 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3));
77 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4));
78 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5));
79 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6));
80 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7));
81
82 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
83 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1));
84 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2));
85 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3));
86 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4));
87 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5));
88 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6));
89 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7));
90
91 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
92 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1));
93 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2));
94 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3));
95 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4));
96 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5));
97 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6));
98 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7));
99
100 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
101 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1));
102 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2));
103 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3));
104 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4));
105 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5));
106 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6));
107 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7));
108
109 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
110 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1));
111 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2));
112 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3));
113 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4));
114 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5));
115 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6));
116 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7));
117
118 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
119 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1));
120 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2));
121 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3));
122 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4));
123 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5));
124 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6));
125 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7));
126
127 #ifdef _WIN64
128
129 reg_def XMM6 ( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg());
130 reg_def XMM6b( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(1));
131 reg_def XMM6c( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(2));
132 reg_def XMM6d( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(3));
133 reg_def XMM6e( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(4));
134 reg_def XMM6f( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(5));
135 reg_def XMM6g( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(6));
136 reg_def XMM6h( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(7));
137
138 reg_def XMM7 ( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg());
139 reg_def XMM7b( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(1));
140 reg_def XMM7c( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(2));
141 reg_def XMM7d( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(3));
142 reg_def XMM7e( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(4));
143 reg_def XMM7f( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(5));
144 reg_def XMM7g( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(6));
145 reg_def XMM7h( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(7));
146
147 reg_def XMM8 ( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg());
148 reg_def XMM8b( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(1));
149 reg_def XMM8c( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(2));
150 reg_def XMM8d( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(3));
151 reg_def XMM8e( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(4));
152 reg_def XMM8f( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(5));
153 reg_def XMM8g( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(6));
154 reg_def XMM8h( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(7));
155
156 reg_def XMM9 ( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg());
157 reg_def XMM9b( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(1));
158 reg_def XMM9c( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(2));
159 reg_def XMM9d( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(3));
160 reg_def XMM9e( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(4));
161 reg_def XMM9f( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(5));
162 reg_def XMM9g( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(6));
163 reg_def XMM9h( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(7));
164
165 reg_def XMM10 ( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg());
166 reg_def XMM10b( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(1));
167 reg_def XMM10c( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(2));
168 reg_def XMM10d( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(3));
169 reg_def XMM10e( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(4));
170 reg_def XMM10f( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(5));
171 reg_def XMM10g( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(6));
172 reg_def XMM10h( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(7));
173
174 reg_def XMM11 ( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg());
175 reg_def XMM11b( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(1));
176 reg_def XMM11c( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(2));
177 reg_def XMM11d( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(3));
178 reg_def XMM11e( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(4));
179 reg_def XMM11f( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(5));
180 reg_def XMM11g( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(6));
181 reg_def XMM11h( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(7));
182
183 reg_def XMM12 ( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg());
184 reg_def XMM12b( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(1));
185 reg_def XMM12c( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(2));
186 reg_def XMM12d( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(3));
187 reg_def XMM12e( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(4));
188 reg_def XMM12f( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(5));
189 reg_def XMM12g( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(6));
190 reg_def XMM12h( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(7));
191
192 reg_def XMM13 ( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg());
193 reg_def XMM13b( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(1));
194 reg_def XMM13c( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(2));
195 reg_def XMM13d( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(3));
196 reg_def XMM13e( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(4));
197 reg_def XMM13f( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(5));
198 reg_def XMM13g( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(6));
199 reg_def XMM13h( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(7));
200
201 reg_def XMM14 ( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg());
202 reg_def XMM14b( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(1));
203 reg_def XMM14c( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(2));
204 reg_def XMM14d( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(3));
205 reg_def XMM14e( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(4));
206 reg_def XMM14f( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(5));
207 reg_def XMM14g( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(6));
208 reg_def XMM14h( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(7));
209
210 reg_def XMM15 ( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg());
211 reg_def XMM15b( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(1));
212 reg_def XMM15c( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(2));
213 reg_def XMM15d( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(3));
214 reg_def XMM15e( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(4));
215 reg_def XMM15f( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(5));
216 reg_def XMM15g( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(6));
217 reg_def XMM15h( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(7));
218
219 #else // _WIN64
220
221 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
222 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1));
223 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2));
224 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3));
225 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4));
226 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5));
227 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6));
228 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7));
229
230 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
231 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1));
232 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2));
233 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3));
234 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4));
235 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5));
236 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6));
237 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7));
238
239 #ifdef _LP64
240
241 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg());
242 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1));
243 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2));
244 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3));
245 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4));
246 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5));
247 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6));
248 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7));
249
250 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg());
251 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1));
252 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2));
253 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3));
254 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4));
255 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5));
256 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6));
257 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7));
258
259 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
260 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1));
261 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2));
262 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3));
263 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4));
264 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5));
265 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6));
266 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7));
267
268 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
269 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1));
270 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2));
271 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3));
272 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4));
273 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5));
274 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6));
275 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7));
276
277 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
278 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1));
279 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2));
280 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3));
281 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4));
282 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5));
283 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6));
284 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7));
285
286 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
287 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1));
288 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2));
289 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3));
290 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4));
291 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5));
292 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6));
293 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7));
294
295 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
296 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1));
297 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2));
298 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3));
299 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4));
300 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5));
301 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6));
302 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7));
303
304 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
305 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1));
306 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2));
307 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3));
308 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4));
309 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5));
310 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6));
311 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7));
312
313 #endif // _LP64
314
315 #endif // _WIN64
316
317 #ifdef _LP64
318 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
319 #else
320 reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad());
321 #endif // _LP64
322
323 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h,
324 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h,
325 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h,
326 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h,
327 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h,
328 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h,
329 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h,
330 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h
331 #ifdef _LP64
332 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h,
333 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h,
334 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
335 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
336 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
337 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
338 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
339 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h
340 #endif
341 );
342
343 // flags allocation class should be last.
344 alloc_class chunk2(RFLAGS);
345
346 // Singleton class for condition codes
347 reg_class int_flags(RFLAGS);
348
349 // Class for all float registers
350 reg_class float_reg(XMM0,
351 XMM1,
352 XMM2,
353 XMM3,
354 XMM4,
355 XMM5,
356 XMM6,
357 XMM7
358 #ifdef _LP64
359 ,XMM8,
360 XMM9,
361 XMM10,
362 XMM11,
363 XMM12,
364 XMM13,
365 XMM14,
366 XMM15
367 #endif
368 );
369
370 // Class for all double registers
371 reg_class double_reg(XMM0, XMM0b,
372 XMM1, XMM1b,
373 XMM2, XMM2b,
374 XMM3, XMM3b,
375 XMM4, XMM4b,
376 XMM5, XMM5b,
377 XMM6, XMM6b,
378 XMM7, XMM7b
379 #ifdef _LP64
380 ,XMM8, XMM8b,
381 XMM9, XMM9b,
382 XMM10, XMM10b,
383 XMM11, XMM11b,
384 XMM12, XMM12b,
385 XMM13, XMM13b,
386 XMM14, XMM14b,
387 XMM15, XMM15b
388 #endif
389 );
390
391 // Class for all 32bit vector registers
392 reg_class vectors_reg(XMM0,
393 XMM1,
394 XMM2,
395 XMM3,
396 XMM4,
397 XMM5,
398 XMM6,
399 XMM7
400 #ifdef _LP64
401 ,XMM8,
402 XMM9,
403 XMM10,
404 XMM11,
405 XMM12,
406 XMM13,
407 XMM14,
408 XMM15
409 #endif
410 );
411
412 // Class for all 64bit vector registers
413 reg_class vectord_reg(XMM0, XMM0b,
414 XMM1, XMM1b,
415 XMM2, XMM2b,
416 XMM3, XMM3b,
417 XMM4, XMM4b,
418 XMM5, XMM5b,
419 XMM6, XMM6b,
420 XMM7, XMM7b
421 #ifdef _LP64
422 ,XMM8, XMM8b,
423 XMM9, XMM9b,
424 XMM10, XMM10b,
425 XMM11, XMM11b,
426 XMM12, XMM12b,
427 XMM13, XMM13b,
428 XMM14, XMM14b,
429 XMM15, XMM15b
430 #endif
431 );
432
433 // Class for all 128bit vector registers
434 reg_class vectorx_reg(XMM0, XMM0b, XMM0c, XMM0d,
435 XMM1, XMM1b, XMM1c, XMM1d,
436 XMM2, XMM2b, XMM2c, XMM2d,
437 XMM3, XMM3b, XMM3c, XMM3d,
438 XMM4, XMM4b, XMM4c, XMM4d,
439 XMM5, XMM5b, XMM5c, XMM5d,
440 XMM6, XMM6b, XMM6c, XMM6d,
441 XMM7, XMM7b, XMM7c, XMM7d
442 #ifdef _LP64
443 ,XMM8, XMM8b, XMM8c, XMM8d,
444 XMM9, XMM9b, XMM9c, XMM9d,
445 XMM10, XMM10b, XMM10c, XMM10d,
446 XMM11, XMM11b, XMM11c, XMM11d,
447 XMM12, XMM12b, XMM12c, XMM12d,
448 XMM13, XMM13b, XMM13c, XMM13d,
449 XMM14, XMM14b, XMM14c, XMM14d,
450 XMM15, XMM15b, XMM15c, XMM15d
451 #endif
452 );
453
454 // Class for all 256bit vector registers
455 reg_class vectory_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h,
456 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h,
457 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h,
458 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h,
459 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h,
460 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h,
461 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h,
462 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h
463 #ifdef _LP64
464 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h,
465 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h,
466 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
467 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
468 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
469 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
470 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
471 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h
472 #endif
473 );
474
475 %}
476
477
478 //----------SOURCE BLOCK-------------------------------------------------------
479 // This is a block of C++ code which provides values, functions, and
480 // definitions necessary in the rest of the architecture description
481
482 source_hpp %{
483 // Header information of the source block.
484 // Method declarations/definitions which are used outside
485 // the ad-scope can conveniently be defined here.
486 //
487 // To keep related declarations/definitions/uses close together,
488 // we switch between source %{ }% and source_hpp %{ }% freely as needed.
489
490 class NativeJump;
491
492 class CallStubImpl {
493
494 //--------------------------------------------------------------
606 static address float_signflip() { return (address)float_signflip_pool; }
607 static address double_signmask() { return (address)double_signmask_pool; }
608 static address double_signflip() { return (address)double_signflip_pool; }
609 #endif
610
611
612 const bool Matcher::match_rule_supported(int opcode) {
613 if (!has_match_rule(opcode))
614 return false;
615
616 switch (opcode) {
617 case Op_PopCountI:
618 case Op_PopCountL:
619 if (!UsePopCountInstruction)
620 return false;
621 break;
622 case Op_MulVI:
623 if ((UseSSE < 4) && (UseAVX < 1)) // only with SSE4_1 or AVX
624 return false;
625 break;
626 case Op_AddReductionVL:
627 if (UseAVX < 3) // only EVEX : vector connectivity becomes an issue here
628 return false;
629 case Op_AddReductionVI:
630 if (UseSSE < 3) // requires at least SSE3
631 return false;
632 case Op_MulReductionVI:
633 if (UseSSE < 4) // requires at least SSE4
634 return false;
635 case Op_AddReductionVF:
636 case Op_AddReductionVD:
637 case Op_MulReductionVF:
638 case Op_MulReductionVD:
639 if (UseSSE < 1) // requires at least SSE
640 return false;
641 break;
642 case Op_CompareAndSwapL:
643 #ifdef _LP64
644 case Op_CompareAndSwapP:
645 #endif
646 if (!VM_Version::supports_cx8())
647 return false;
648 break;
649 }
650
651 return true; // Per default match rules are supported.
652 }
653
654 // Max vector size in bytes. 0 if not supported.
655 const int Matcher::vector_width_in_bytes(BasicType bt) {
656 assert(is_java_primitive(bt), "only primitive type vectors");
657 if (UseSSE < 2) return 0;
658 // SSE2 supports 128bit vectors for all types.
659 // AVX2 supports 256bit vectors for all types.
660 int size = (UseAVX > 1) ? 32 : 16;
661 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE.
662 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE))
663 size = 32;
664 // Use flag to limit vector size.
665 size = MIN2(size,(int)MaxVectorSize);
666 // Minimum 2 values in vector (or 4 for bytes).
667 switch (bt) {
668 case T_DOUBLE:
669 case T_LONG:
670 if (size < 16) return 0;
671 case T_FLOAT:
672 case T_INT:
673 if (size < 8) return 0;
674 case T_BOOLEAN:
675 case T_BYTE:
676 case T_CHAR:
677 case T_SHORT:
678 if (size < 4) return 0;
679 break;
680 default:
681 ShouldNotReachHere();
682 }
683 return size;
685
686 // Limits on vector size (number of elements) loaded into vector.
687 const int Matcher::max_vector_size(const BasicType bt) {
688 return vector_width_in_bytes(bt)/type2aelembytes(bt);
689 }
690 const int Matcher::min_vector_size(const BasicType bt) {
691 int max_size = max_vector_size(bt);
692 // Min size which can be loaded into vector is 4 bytes.
693 int size = (type2aelembytes(bt) == 1) ? 4 : 2;
694 return MIN2(size,max_size);
695 }
696
697 // Vector ideal reg corresponding to specidied size in bytes
698 const int Matcher::vector_ideal_reg(int size) {
699 assert(MaxVectorSize >= size, "");
700 switch(size) {
701 case 4: return Op_VecS;
702 case 8: return Op_VecD;
703 case 16: return Op_VecX;
704 case 32: return Op_VecY;
705 }
706 ShouldNotReachHere();
707 return 0;
708 }
709
710 // Only lowest bits of xmm reg are used for vector shift count.
711 const int Matcher::vector_shift_count_ideal_reg(int size) {
712 return Op_VecS;
713 }
714
715 // x86 supports misaligned vectors store/load.
716 const bool Matcher::misaligned_vectors_ok() {
717 return !AlignVector; // can be changed by flag
718 }
719
720 // x86 AES instructions are compatible with SunJCE expanded
721 // keys, hence we do not need to pass the original key to stubs
722 const bool Matcher::pass_original_key_for_aes() {
723 return false;
724 }
728 int src_hi, int dst_hi, uint ireg, outputStream* st) {
729 // In 64-bit VM size calculation is very complex. Emitting instructions
730 // into scratch buffer is used to get size in 64-bit VM.
731 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); )
732 assert(ireg == Op_VecS || // 32bit vector
733 (src_lo & 1) == 0 && (src_lo + 1) == src_hi &&
734 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi,
735 "no non-adjacent vector moves" );
736 if (cbuf) {
737 MacroAssembler _masm(cbuf);
738 int offset = __ offset();
739 switch (ireg) {
740 case Op_VecS: // copy whole register
741 case Op_VecD:
742 case Op_VecX:
743 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
744 break;
745 case Op_VecY:
746 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
747 break;
748 default:
749 ShouldNotReachHere();
750 }
751 int size = __ offset() - offset;
752 #ifdef ASSERT
753 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
754 assert(!do_size || size == 4, "incorrect size calculattion");
755 #endif
756 return size;
757 #ifndef PRODUCT
758 } else if (!do_size) {
759 switch (ireg) {
760 case Op_VecS:
761 case Op_VecD:
762 case Op_VecX:
763 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
764 break;
765 case Op_VecY:
766 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
767 break;
768 default:
769 ShouldNotReachHere();
770 }
771 #endif
772 }
773 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
774 return 4;
775 }
776
777 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
778 int stack_offset, int reg, uint ireg, outputStream* st) {
779 // In 64-bit VM size calculation is very complex. Emitting instructions
780 // into scratch buffer is used to get size in 64-bit VM.
781 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); )
782 if (cbuf) {
783 MacroAssembler _masm(cbuf);
784 int offset = __ offset();
785 if (is_load) {
786 switch (ireg) {
787 case Op_VecS:
788 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
789 break;
790 case Op_VecD:
791 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
792 break;
793 case Op_VecX:
794 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
795 break;
796 case Op_VecY:
797 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
798 break;
799 default:
800 ShouldNotReachHere();
801 }
802 } else { // store
803 switch (ireg) {
804 case Op_VecS:
805 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
806 break;
807 case Op_VecD:
808 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
809 break;
810 case Op_VecX:
811 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
812 break;
813 case Op_VecY:
814 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
815 break;
816 default:
817 ShouldNotReachHere();
818 }
819 }
820 int size = __ offset() - offset;
821 #ifdef ASSERT
822 int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : 4);
823 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
824 assert(!do_size || size == (5+offset_size), "incorrect size calculattion");
825 #endif
826 return size;
827 #ifndef PRODUCT
828 } else if (!do_size) {
829 if (is_load) {
830 switch (ireg) {
831 case Op_VecS:
832 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
833 break;
834 case Op_VecD:
835 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
836 break;
837 case Op_VecX:
838 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
839 break;
840 case Op_VecY:
841 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
842 break;
843 default:
844 ShouldNotReachHere();
845 }
846 } else { // store
847 switch (ireg) {
848 case Op_VecS:
849 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
850 break;
851 case Op_VecD:
852 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
853 break;
854 case Op_VecX:
855 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
856 break;
857 case Op_VecY:
858 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
859 break;
860 default:
861 ShouldNotReachHere();
862 }
863 }
864 #endif
865 }
866 int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : 4);
867 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
868 return 5+offset_size;
869 }
870
871 static inline jfloat replicate4_imm(int con, int width) {
872 // Load a constant of "width" (in bytes) and replicate it to fill 32bit.
873 assert(width == 1 || width == 2, "only byte or short types here");
874 int bit_width = width * 8;
875 jint val = con;
876 val &= (1 << bit_width) - 1; // mask off sign bits
877 while(bit_width < 32) {
878 val |= (val << bit_width);
879 bit_width <<= 1;
880 }
881 jfloat fval = *((jfloat*) &val); // coerce to float type
882 return fval;
883 }
884
885 static inline jdouble replicate8_imm(int con, int width) {
886 // Load a constant of "width" (in bytes) and replicate it to fill 64bit.
950 // Check that stack depth is unchanged: find majik cookie on stack
951 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
952 MacroAssembler _masm(&cbuf);
953 Label L;
954 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
955 __ jccb(Assembler::equal, L);
956 // Die if stack mismatch
957 __ int3();
958 __ bind(L);
959 }
960 %}
961
962 %}
963
964
965 //----------OPERANDS-----------------------------------------------------------
966 // Operand definitions must precede instruction definitions for correct parsing
967 // in the ADLC because operands constitute user defined types which are used in
968 // instruction definitions.
969
970 // Vectors
971 operand vecS() %{
972 constraint(ALLOC_IN_RC(vectors_reg));
973 match(VecS);
974
975 format %{ %}
976 interface(REG_INTER);
977 %}
978
979 operand vecD() %{
980 constraint(ALLOC_IN_RC(vectord_reg));
981 match(VecD);
982
983 format %{ %}
984 interface(REG_INTER);
985 %}
986
987 operand vecX() %{
988 constraint(ALLOC_IN_RC(vectorx_reg));
989 match(VecX);
990
991 format %{ %}
992 interface(REG_INTER);
993 %}
994
995 operand vecY() %{
996 constraint(ALLOC_IN_RC(vectory_reg));
997 match(VecY);
998
999 format %{ %}
1000 interface(REG_INTER);
1001 %}
1002
1003
1004 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit)
1005
1006 // ============================================================================
1007
1008 instruct ShouldNotReachHere() %{
1009 match(Halt);
1010 format %{ "int3\t# ShouldNotReachHere" %}
1011 ins_encode %{
1012 __ int3();
1013 %}
1014 ins_pipe(pipe_slow);
1015 %}
1016
1017 // ============================================================================
1018
1019 instruct addF_reg(regF dst, regF src) %{
1020 predicate((UseSSE>=1) && (UseAVX == 0));
1021 match(Set dst (AddF dst src));
1022
1023 format %{ "addss $dst, $src" %}
1584 ins_pipe(pipe_slow);
1585 %}
1586
1587 instruct absF_reg(regF dst) %{
1588 predicate((UseSSE>=1) && (UseAVX == 0));
1589 match(Set dst (AbsF dst));
1590 ins_cost(150);
1591 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %}
1592 ins_encode %{
1593 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask()));
1594 %}
1595 ins_pipe(pipe_slow);
1596 %}
1597
1598 instruct absF_reg_reg(regF dst, regF src) %{
1599 predicate(UseAVX > 0);
1600 match(Set dst (AbsF src));
1601 ins_cost(150);
1602 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
1603 ins_encode %{
1604 bool vector256 = false;
1605 __ vandps($dst$$XMMRegister, $src$$XMMRegister,
1606 ExternalAddress(float_signmask()), vector256);
1607 %}
1608 ins_pipe(pipe_slow);
1609 %}
1610
1611 instruct absD_reg(regD dst) %{
1612 predicate((UseSSE>=2) && (UseAVX == 0));
1613 match(Set dst (AbsD dst));
1614 ins_cost(150);
1615 format %{ "andpd $dst, [0x7fffffffffffffff]\t"
1616 "# abs double by sign masking" %}
1617 ins_encode %{
1618 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask()));
1619 %}
1620 ins_pipe(pipe_slow);
1621 %}
1622
1623 instruct absD_reg_reg(regD dst, regD src) %{
1624 predicate(UseAVX > 0);
1625 match(Set dst (AbsD src));
1626 ins_cost(150);
1627 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t"
1628 "# abs double by sign masking" %}
1629 ins_encode %{
1630 bool vector256 = false;
1631 __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
1632 ExternalAddress(double_signmask()), vector256);
1633 %}
1634 ins_pipe(pipe_slow);
1635 %}
1636
1637 instruct negF_reg(regF dst) %{
1638 predicate((UseSSE>=1) && (UseAVX == 0));
1639 match(Set dst (NegF dst));
1640 ins_cost(150);
1641 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %}
1642 ins_encode %{
1643 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip()));
1644 %}
1645 ins_pipe(pipe_slow);
1646 %}
1647
1648 instruct negF_reg_reg(regF dst, regF src) %{
1649 predicate(UseAVX > 0);
1650 match(Set dst (NegF src));
1651 ins_cost(150);
1652 format %{ "vxorps $dst, $src, [0x80000000]\t# neg float by sign flipping" %}
1653 ins_encode %{
1654 bool vector256 = false;
1655 __ vxorps($dst$$XMMRegister, $src$$XMMRegister,
1656 ExternalAddress(float_signflip()), vector256);
1657 %}
1658 ins_pipe(pipe_slow);
1659 %}
1660
1661 instruct negD_reg(regD dst) %{
1662 predicate((UseSSE>=2) && (UseAVX == 0));
1663 match(Set dst (NegD dst));
1664 ins_cost(150);
1665 format %{ "xorpd $dst, [0x8000000000000000]\t"
1666 "# neg double by sign flipping" %}
1667 ins_encode %{
1668 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip()));
1669 %}
1670 ins_pipe(pipe_slow);
1671 %}
1672
1673 instruct negD_reg_reg(regD dst, regD src) %{
1674 predicate(UseAVX > 0);
1675 match(Set dst (NegD src));
1676 ins_cost(150);
1677 format %{ "vxorpd $dst, $src, [0x8000000000000000]\t"
1678 "# neg double by sign flipping" %}
1679 ins_encode %{
1680 bool vector256 = false;
1681 __ vxorpd($dst$$XMMRegister, $src$$XMMRegister,
1682 ExternalAddress(double_signflip()), vector256);
1683 %}
1684 ins_pipe(pipe_slow);
1685 %}
1686
1687 instruct sqrtF_reg(regF dst, regF src) %{
1688 predicate(UseSSE>=1);
1689 match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
1690
1691 format %{ "sqrtss $dst, $src" %}
1692 ins_cost(150);
1693 ins_encode %{
1694 __ sqrtss($dst$$XMMRegister, $src$$XMMRegister);
1695 %}
1696 ins_pipe(pipe_slow);
1697 %}
1698
1699 instruct sqrtF_mem(regF dst, memory src) %{
1700 predicate(UseSSE>=1);
1701 match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF src)))));
1702
1737
1738 format %{ "sqrtsd $dst, $src" %}
1739 ins_cost(150);
1740 ins_encode %{
1741 __ sqrtsd($dst$$XMMRegister, $src$$Address);
1742 %}
1743 ins_pipe(pipe_slow);
1744 %}
1745
1746 instruct sqrtD_imm(regD dst, immD con) %{
1747 predicate(UseSSE>=2);
1748 match(Set dst (SqrtD con));
1749 format %{ "sqrtsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
1750 ins_cost(150);
1751 ins_encode %{
1752 __ sqrtsd($dst$$XMMRegister, $constantaddress($con));
1753 %}
1754 ins_pipe(pipe_slow);
1755 %}
1756
1757
1758 // ====================VECTOR INSTRUCTIONS=====================================
1759
1760 // Load vectors (4 bytes long)
1761 instruct loadV4(vecS dst, memory mem) %{
1762 predicate(n->as_LoadVector()->memory_size() == 4);
1763 match(Set dst (LoadVector mem));
1764 ins_cost(125);
1765 format %{ "movd $dst,$mem\t! load vector (4 bytes)" %}
1766 ins_encode %{
1767 __ movdl($dst$$XMMRegister, $mem$$Address);
1768 %}
1769 ins_pipe( pipe_slow );
1770 %}
1771
1772 // Load vectors (8 bytes long)
1773 instruct loadV8(vecD dst, memory mem) %{
1774 predicate(n->as_LoadVector()->memory_size() == 8);
1775 match(Set dst (LoadVector mem));
1776 ins_cost(125);
1777 format %{ "movq $dst,$mem\t! load vector (8 bytes)" %}
1788 ins_cost(125);
1789 format %{ "movdqu $dst,$mem\t! load vector (16 bytes)" %}
1790 ins_encode %{
1791 __ movdqu($dst$$XMMRegister, $mem$$Address);
1792 %}
1793 ins_pipe( pipe_slow );
1794 %}
1795
1796 // Load vectors (32 bytes long)
1797 instruct loadV32(vecY dst, memory mem) %{
1798 predicate(n->as_LoadVector()->memory_size() == 32);
1799 match(Set dst (LoadVector mem));
1800 ins_cost(125);
1801 format %{ "vmovdqu $dst,$mem\t! load vector (32 bytes)" %}
1802 ins_encode %{
1803 __ vmovdqu($dst$$XMMRegister, $mem$$Address);
1804 %}
1805 ins_pipe( pipe_slow );
1806 %}
1807
1808 // Store vectors
1809 instruct storeV4(memory mem, vecS src) %{
1810 predicate(n->as_StoreVector()->memory_size() == 4);
1811 match(Set mem (StoreVector mem src));
1812 ins_cost(145);
1813 format %{ "movd $mem,$src\t! store vector (4 bytes)" %}
1814 ins_encode %{
1815 __ movdl($mem$$Address, $src$$XMMRegister);
1816 %}
1817 ins_pipe( pipe_slow );
1818 %}
1819
1820 instruct storeV8(memory mem, vecD src) %{
1821 predicate(n->as_StoreVector()->memory_size() == 8);
1822 match(Set mem (StoreVector mem src));
1823 ins_cost(145);
1824 format %{ "movq $mem,$src\t! store vector (8 bytes)" %}
1825 ins_encode %{
1826 __ movq($mem$$Address, $src$$XMMRegister);
1827 %}
1833 match(Set mem (StoreVector mem src));
1834 ins_cost(145);
1835 format %{ "movdqu $mem,$src\t! store vector (16 bytes)" %}
1836 ins_encode %{
1837 __ movdqu($mem$$Address, $src$$XMMRegister);
1838 %}
1839 ins_pipe( pipe_slow );
1840 %}
1841
1842 instruct storeV32(memory mem, vecY src) %{
1843 predicate(n->as_StoreVector()->memory_size() == 32);
1844 match(Set mem (StoreVector mem src));
1845 ins_cost(145);
1846 format %{ "vmovdqu $mem,$src\t! store vector (32 bytes)" %}
1847 ins_encode %{
1848 __ vmovdqu($mem$$Address, $src$$XMMRegister);
1849 %}
1850 ins_pipe( pipe_slow );
1851 %}
1852
1853 // Replicate byte scalar to be vector
1854 instruct Repl4B(vecS dst, rRegI src) %{
1855 predicate(n->as_Vector()->length() == 4);
1856 match(Set dst (ReplicateB src));
1857 format %{ "movd $dst,$src\n\t"
1858 "punpcklbw $dst,$dst\n\t"
1859 "pshuflw $dst,$dst,0x00\t! replicate4B" %}
1860 ins_encode %{
1861 __ movdl($dst$$XMMRegister, $src$$Register);
1862 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
1863 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
1864 %}
1865 ins_pipe( pipe_slow );
1866 %}
1867
1868 instruct Repl8B(vecD dst, rRegI src) %{
1869 predicate(n->as_Vector()->length() == 8);
1870 match(Set dst (ReplicateB src));
1871 format %{ "movd $dst,$src\n\t"
1872 "punpcklbw $dst,$dst\n\t"
1896 %}
1897
1898 instruct Repl32B(vecY dst, rRegI src) %{
1899 predicate(n->as_Vector()->length() == 32);
1900 match(Set dst (ReplicateB src));
1901 format %{ "movd $dst,$src\n\t"
1902 "punpcklbw $dst,$dst\n\t"
1903 "pshuflw $dst,$dst,0x00\n\t"
1904 "punpcklqdq $dst,$dst\n\t"
1905 "vinserti128h $dst,$dst,$dst\t! replicate32B" %}
1906 ins_encode %{
1907 __ movdl($dst$$XMMRegister, $src$$Register);
1908 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
1909 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
1910 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
1911 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
1912 %}
1913 ins_pipe( pipe_slow );
1914 %}
1915
1916 // Replicate byte scalar immediate to be vector by loading from const table.
1917 instruct Repl4B_imm(vecS dst, immI con) %{
1918 predicate(n->as_Vector()->length() == 4);
1919 match(Set dst (ReplicateB con));
1920 format %{ "movdl $dst,[$constantaddress]\t! replicate4B($con)" %}
1921 ins_encode %{
1922 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 1)));
1923 %}
1924 ins_pipe( pipe_slow );
1925 %}
1926
1927 instruct Repl8B_imm(vecD dst, immI con) %{
1928 predicate(n->as_Vector()->length() == 8);
1929 match(Set dst (ReplicateB con));
1930 format %{ "movq $dst,[$constantaddress]\t! replicate8B($con)" %}
1931 ins_encode %{
1932 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
1933 %}
1934 ins_pipe( pipe_slow );
1935 %}
1943 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
1944 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
1945 %}
1946 ins_pipe( pipe_slow );
1947 %}
1948
1949 instruct Repl32B_imm(vecY dst, immI con) %{
1950 predicate(n->as_Vector()->length() == 32);
1951 match(Set dst (ReplicateB con));
1952 format %{ "movq $dst,[$constantaddress]\n\t"
1953 "punpcklqdq $dst,$dst\n\t"
1954 "vinserti128h $dst,$dst,$dst\t! lreplicate32B($con)" %}
1955 ins_encode %{
1956 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
1957 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
1958 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
1959 %}
1960 ins_pipe( pipe_slow );
1961 %}
1962
1963 // Replicate byte scalar zero to be vector
1964 instruct Repl4B_zero(vecS dst, immI0 zero) %{
1965 predicate(n->as_Vector()->length() == 4);
1966 match(Set dst (ReplicateB zero));
1967 format %{ "pxor $dst,$dst\t! replicate4B zero" %}
1968 ins_encode %{
1969 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
1970 %}
1971 ins_pipe( fpu_reg_reg );
1972 %}
1973
1974 instruct Repl8B_zero(vecD dst, immI0 zero) %{
1975 predicate(n->as_Vector()->length() == 8);
1976 match(Set dst (ReplicateB zero));
1977 format %{ "pxor $dst,$dst\t! replicate8B zero" %}
1978 ins_encode %{
1979 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
1980 %}
1981 ins_pipe( fpu_reg_reg );
1982 %}
1983
1984 instruct Repl16B_zero(vecX dst, immI0 zero) %{
1985 predicate(n->as_Vector()->length() == 16);
1986 match(Set dst (ReplicateB zero));
1987 format %{ "pxor $dst,$dst\t! replicate16B zero" %}
1988 ins_encode %{
1989 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
1990 %}
1991 ins_pipe( fpu_reg_reg );
1992 %}
1993
1994 instruct Repl32B_zero(vecY dst, immI0 zero) %{
1995 predicate(n->as_Vector()->length() == 32);
1996 match(Set dst (ReplicateB zero));
1997 format %{ "vpxor $dst,$dst,$dst\t! replicate32B zero" %}
1998 ins_encode %{
1999 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
2000 bool vector256 = true;
2001 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
2002 %}
2003 ins_pipe( fpu_reg_reg );
2004 %}
2005
2006 // Replicate char/short (2 byte) scalar to be vector
2007 instruct Repl2S(vecS dst, rRegI src) %{
2008 predicate(n->as_Vector()->length() == 2);
2009 match(Set dst (ReplicateS src));
2010 format %{ "movd $dst,$src\n\t"
2011 "pshuflw $dst,$dst,0x00\t! replicate2S" %}
2012 ins_encode %{
2013 __ movdl($dst$$XMMRegister, $src$$Register);
2014 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
2015 %}
2016 ins_pipe( fpu_reg_reg );
2017 %}
2018
2019 instruct Repl4S(vecD dst, rRegI src) %{
2020 predicate(n->as_Vector()->length() == 4);
2021 match(Set dst (ReplicateS src));
2041 %}
2042 ins_pipe( pipe_slow );
2043 %}
2044
2045 instruct Repl16S(vecY dst, rRegI src) %{
2046 predicate(n->as_Vector()->length() == 16);
2047 match(Set dst (ReplicateS src));
2048 format %{ "movd $dst,$src\n\t"
2049 "pshuflw $dst,$dst,0x00\n\t"
2050 "punpcklqdq $dst,$dst\n\t"
2051 "vinserti128h $dst,$dst,$dst\t! replicate16S" %}
2052 ins_encode %{
2053 __ movdl($dst$$XMMRegister, $src$$Register);
2054 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
2055 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2056 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2057 %}
2058 ins_pipe( pipe_slow );
2059 %}
2060
2061 // Replicate char/short (2 byte) scalar immediate to be vector by loading from const table.
2062 instruct Repl2S_imm(vecS dst, immI con) %{
2063 predicate(n->as_Vector()->length() == 2);
2064 match(Set dst (ReplicateS con));
2065 format %{ "movdl $dst,[$constantaddress]\t! replicate2S($con)" %}
2066 ins_encode %{
2067 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 2)));
2068 %}
2069 ins_pipe( fpu_reg_reg );
2070 %}
2071
2072 instruct Repl4S_imm(vecD dst, immI con) %{
2073 predicate(n->as_Vector()->length() == 4);
2074 match(Set dst (ReplicateS con));
2075 format %{ "movq $dst,[$constantaddress]\t! replicate4S($con)" %}
2076 ins_encode %{
2077 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
2078 %}
2079 ins_pipe( fpu_reg_reg );
2080 %}
2088 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
2089 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2090 %}
2091 ins_pipe( pipe_slow );
2092 %}
2093
2094 instruct Repl16S_imm(vecY dst, immI con) %{
2095 predicate(n->as_Vector()->length() == 16);
2096 match(Set dst (ReplicateS con));
2097 format %{ "movq $dst,[$constantaddress]\n\t"
2098 "punpcklqdq $dst,$dst\n\t"
2099 "vinserti128h $dst,$dst,$dst\t! replicate16S($con)" %}
2100 ins_encode %{
2101 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
2102 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2103 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2104 %}
2105 ins_pipe( pipe_slow );
2106 %}
2107
2108 // Replicate char/short (2 byte) scalar zero to be vector
2109 instruct Repl2S_zero(vecS dst, immI0 zero) %{
2110 predicate(n->as_Vector()->length() == 2);
2111 match(Set dst (ReplicateS zero));
2112 format %{ "pxor $dst,$dst\t! replicate2S zero" %}
2113 ins_encode %{
2114 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
2115 %}
2116 ins_pipe( fpu_reg_reg );
2117 %}
2118
2119 instruct Repl4S_zero(vecD dst, immI0 zero) %{
2120 predicate(n->as_Vector()->length() == 4);
2121 match(Set dst (ReplicateS zero));
2122 format %{ "pxor $dst,$dst\t! replicate4S zero" %}
2123 ins_encode %{
2124 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
2125 %}
2126 ins_pipe( fpu_reg_reg );
2127 %}
2128
2129 instruct Repl8S_zero(vecX dst, immI0 zero) %{
2130 predicate(n->as_Vector()->length() == 8);
2131 match(Set dst (ReplicateS zero));
2132 format %{ "pxor $dst,$dst\t! replicate8S zero" %}
2133 ins_encode %{
2134 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
2135 %}
2136 ins_pipe( fpu_reg_reg );
2137 %}
2138
2139 instruct Repl16S_zero(vecY dst, immI0 zero) %{
2140 predicate(n->as_Vector()->length() == 16);
2141 match(Set dst (ReplicateS zero));
2142 format %{ "vpxor $dst,$dst,$dst\t! replicate16S zero" %}
2143 ins_encode %{
2144 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
2145 bool vector256 = true;
2146 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
2147 %}
2148 ins_pipe( fpu_reg_reg );
2149 %}
2150
2151 // Replicate integer (4 byte) scalar to be vector
2152 instruct Repl2I(vecD dst, rRegI src) %{
2153 predicate(n->as_Vector()->length() == 2);
2154 match(Set dst (ReplicateI src));
2155 format %{ "movd $dst,$src\n\t"
2156 "pshufd $dst,$dst,0x00\t! replicate2I" %}
2157 ins_encode %{
2158 __ movdl($dst$$XMMRegister, $src$$Register);
2159 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
2160 %}
2161 ins_pipe( fpu_reg_reg );
2162 %}
2163
2164 instruct Repl4I(vecX dst, rRegI src) %{
2165 predicate(n->as_Vector()->length() == 4);
2166 match(Set dst (ReplicateI src));
2170 __ movdl($dst$$XMMRegister, $src$$Register);
2171 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
2172 %}
2173 ins_pipe( pipe_slow );
2174 %}
2175
2176 instruct Repl8I(vecY dst, rRegI src) %{
2177 predicate(n->as_Vector()->length() == 8);
2178 match(Set dst (ReplicateI src));
2179 format %{ "movd $dst,$src\n\t"
2180 "pshufd $dst,$dst,0x00\n\t"
2181 "vinserti128h $dst,$dst,$dst\t! replicate8I" %}
2182 ins_encode %{
2183 __ movdl($dst$$XMMRegister, $src$$Register);
2184 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
2185 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2186 %}
2187 ins_pipe( pipe_slow );
2188 %}
2189
2190 // Replicate integer (4 byte) scalar immediate to be vector by loading from const table.
2191 instruct Repl2I_imm(vecD dst, immI con) %{
2192 predicate(n->as_Vector()->length() == 2);
2193 match(Set dst (ReplicateI con));
2194 format %{ "movq $dst,[$constantaddress]\t! replicate2I($con)" %}
2195 ins_encode %{
2196 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
2197 %}
2198 ins_pipe( fpu_reg_reg );
2199 %}
2200
2201 instruct Repl4I_imm(vecX dst, immI con) %{
2202 predicate(n->as_Vector()->length() == 4);
2203 match(Set dst (ReplicateI con));
2204 format %{ "movq $dst,[$constantaddress]\t! replicate4I($con)\n\t"
2205 "punpcklqdq $dst,$dst" %}
2206 ins_encode %{
2207 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
2208 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2209 %}
2210 ins_pipe( pipe_slow );
2211 %}
2212
2213 instruct Repl8I_imm(vecY dst, immI con) %{
2214 predicate(n->as_Vector()->length() == 8);
2215 match(Set dst (ReplicateI con));
2216 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t"
2217 "punpcklqdq $dst,$dst\n\t"
2218 "vinserti128h $dst,$dst,$dst" %}
2219 ins_encode %{
2220 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
2221 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2222 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2223 %}
2224 ins_pipe( pipe_slow );
2225 %}
2226
2227 // Integer could be loaded into xmm register directly from memory.
2228 instruct Repl2I_mem(vecD dst, memory mem) %{
2229 predicate(n->as_Vector()->length() == 2);
2230 match(Set dst (ReplicateI (LoadI mem)));
2231 format %{ "movd $dst,$mem\n\t"
2232 "pshufd $dst,$dst,0x00\t! replicate2I" %}
2233 ins_encode %{
2234 __ movdl($dst$$XMMRegister, $mem$$Address);
2235 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
2236 %}
2237 ins_pipe( fpu_reg_reg );
2238 %}
2239
2240 instruct Repl4I_mem(vecX dst, memory mem) %{
2241 predicate(n->as_Vector()->length() == 4);
2242 match(Set dst (ReplicateI (LoadI mem)));
2243 format %{ "movd $dst,$mem\n\t"
2244 "pshufd $dst,$dst,0x00\t! replicate4I" %}
2245 ins_encode %{
2246 __ movdl($dst$$XMMRegister, $mem$$Address);
2247 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
2248 %}
2249 ins_pipe( pipe_slow );
2250 %}
2251
2252 instruct Repl8I_mem(vecY dst, memory mem) %{
2253 predicate(n->as_Vector()->length() == 8);
2254 match(Set dst (ReplicateI (LoadI mem)));
2255 format %{ "movd $dst,$mem\n\t"
2256 "pshufd $dst,$dst,0x00\n\t"
2257 "vinserti128h $dst,$dst,$dst\t! replicate8I" %}
2258 ins_encode %{
2259 __ movdl($dst$$XMMRegister, $mem$$Address);
2260 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
2261 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2262 %}
2263 ins_pipe( pipe_slow );
2264 %}
2265
2266 // Replicate integer (4 byte) scalar zero to be vector
2267 instruct Repl2I_zero(vecD dst, immI0 zero) %{
2268 predicate(n->as_Vector()->length() == 2);
2269 match(Set dst (ReplicateI zero));
2270 format %{ "pxor $dst,$dst\t! replicate2I" %}
2271 ins_encode %{
2272 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
2273 %}
2274 ins_pipe( fpu_reg_reg );
2275 %}
2276
2277 instruct Repl4I_zero(vecX dst, immI0 zero) %{
2278 predicate(n->as_Vector()->length() == 4);
2279 match(Set dst (ReplicateI zero));
2280 format %{ "pxor $dst,$dst\t! replicate4I zero)" %}
2281 ins_encode %{
2282 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
2283 %}
2284 ins_pipe( fpu_reg_reg );
2285 %}
2286
2287 instruct Repl8I_zero(vecY dst, immI0 zero) %{
2288 predicate(n->as_Vector()->length() == 8);
2289 match(Set dst (ReplicateI zero));
2290 format %{ "vpxor $dst,$dst,$dst\t! replicate8I zero" %}
2291 ins_encode %{
2292 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
2293 bool vector256 = true;
2294 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
2295 %}
2296 ins_pipe( fpu_reg_reg );
2297 %}
2298
2299 // Replicate long (8 byte) scalar to be vector
2300 #ifdef _LP64
2301 instruct Repl2L(vecX dst, rRegL src) %{
2302 predicate(n->as_Vector()->length() == 2);
2303 match(Set dst (ReplicateL src));
2304 format %{ "movdq $dst,$src\n\t"
2305 "punpcklqdq $dst,$dst\t! replicate2L" %}
2306 ins_encode %{
2307 __ movdq($dst$$XMMRegister, $src$$Register);
2308 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2309 %}
2310 ins_pipe( pipe_slow );
2311 %}
2312
2313 instruct Repl4L(vecY dst, rRegL src) %{
2314 predicate(n->as_Vector()->length() == 4);
2315 match(Set dst (ReplicateL src));
2316 format %{ "movdq $dst,$src\n\t"
2317 "punpcklqdq $dst,$dst\n\t"
2318 "vinserti128h $dst,$dst,$dst\t! replicate4L" %}
2319 ins_encode %{
2320 __ movdq($dst$$XMMRegister, $src$$Register);
2321 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2322 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2323 %}
2324 ins_pipe( pipe_slow );
2325 %}
2326 #else // _LP64
2327 instruct Repl2L(vecX dst, eRegL src, regD tmp) %{
2328 predicate(n->as_Vector()->length() == 2);
2329 match(Set dst (ReplicateL src));
2330 effect(TEMP dst, USE src, TEMP tmp);
2331 format %{ "movdl $dst,$src.lo\n\t"
2332 "movdl $tmp,$src.hi\n\t"
2333 "punpckldq $dst,$tmp\n\t"
2334 "punpcklqdq $dst,$dst\t! replicate2L"%}
2335 ins_encode %{
2336 __ movdl($dst$$XMMRegister, $src$$Register);
2337 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
2338 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
2339 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2340 %}
2341 ins_pipe( pipe_slow );
2342 %}
2343
2344 instruct Repl4L(vecY dst, eRegL src, regD tmp) %{
2345 predicate(n->as_Vector()->length() == 4);
2346 match(Set dst (ReplicateL src));
2347 effect(TEMP dst, USE src, TEMP tmp);
2348 format %{ "movdl $dst,$src.lo\n\t"
2349 "movdl $tmp,$src.hi\n\t"
2350 "punpckldq $dst,$tmp\n\t"
2351 "punpcklqdq $dst,$dst\n\t"
2352 "vinserti128h $dst,$dst,$dst\t! replicate4L" %}
2353 ins_encode %{
2354 __ movdl($dst$$XMMRegister, $src$$Register);
2355 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
2356 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
2357 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2358 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2359 %}
2360 ins_pipe( pipe_slow );
2361 %}
2362 #endif // _LP64
2363
2364 // Replicate long (8 byte) scalar immediate to be vector by loading from const table.
2365 instruct Repl2L_imm(vecX dst, immL con) %{
2366 predicate(n->as_Vector()->length() == 2);
2367 match(Set dst (ReplicateL con));
2368 format %{ "movq $dst,[$constantaddress]\n\t"
2369 "punpcklqdq $dst,$dst\t! replicate2L($con)" %}
2370 ins_encode %{
2371 __ movq($dst$$XMMRegister, $constantaddress($con));
2372 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2373 %}
2374 ins_pipe( pipe_slow );
2375 %}
2376
2377 instruct Repl4L_imm(vecY dst, immL con) %{
2378 predicate(n->as_Vector()->length() == 4);
2379 match(Set dst (ReplicateL con));
2380 format %{ "movq $dst,[$constantaddress]\n\t"
2381 "punpcklqdq $dst,$dst\n\t"
2382 "vinserti128h $dst,$dst,$dst\t! replicate4L($con)" %}
2383 ins_encode %{
2384 __ movq($dst$$XMMRegister, $constantaddress($con));
2385 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2386 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2387 %}
2388 ins_pipe( pipe_slow );
2389 %}
2390
2391 // Long could be loaded into xmm register directly from memory.
2392 instruct Repl2L_mem(vecX dst, memory mem) %{
2393 predicate(n->as_Vector()->length() == 2);
2394 match(Set dst (ReplicateL (LoadL mem)));
2395 format %{ "movq $dst,$mem\n\t"
2396 "punpcklqdq $dst,$dst\t! replicate2L" %}
2397 ins_encode %{
2398 __ movq($dst$$XMMRegister, $mem$$Address);
2399 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2400 %}
2401 ins_pipe( pipe_slow );
2402 %}
2403
2404 instruct Repl4L_mem(vecY dst, memory mem) %{
2405 predicate(n->as_Vector()->length() == 4);
2406 match(Set dst (ReplicateL (LoadL mem)));
2407 format %{ "movq $dst,$mem\n\t"
2408 "punpcklqdq $dst,$dst\n\t"
2409 "vinserti128h $dst,$dst,$dst\t! replicate4L" %}
2410 ins_encode %{
2411 __ movq($dst$$XMMRegister, $mem$$Address);
2412 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2413 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2414 %}
2415 ins_pipe( pipe_slow );
2416 %}
2417
2418 // Replicate long (8 byte) scalar zero to be vector
2419 instruct Repl2L_zero(vecX dst, immL0 zero) %{
2420 predicate(n->as_Vector()->length() == 2);
2421 match(Set dst (ReplicateL zero));
2422 format %{ "pxor $dst,$dst\t! replicate2L zero" %}
2423 ins_encode %{
2424 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
2425 %}
2426 ins_pipe( fpu_reg_reg );
2427 %}
2428
2429 instruct Repl4L_zero(vecY dst, immL0 zero) %{
2430 predicate(n->as_Vector()->length() == 4);
2431 match(Set dst (ReplicateL zero));
2432 format %{ "vpxor $dst,$dst,$dst\t! replicate4L zero" %}
2433 ins_encode %{
2434 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
2435 bool vector256 = true;
2436 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
2437 %}
2438 ins_pipe( fpu_reg_reg );
2439 %}
2440
2441 // Replicate float (4 byte) scalar to be vector
2442 instruct Repl2F(vecD dst, regF src) %{
2443 predicate(n->as_Vector()->length() == 2);
2444 match(Set dst (ReplicateF src));
2445 format %{ "pshufd $dst,$dst,0x00\t! replicate2F" %}
2446 ins_encode %{
2447 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
2448 %}
2449 ins_pipe( fpu_reg_reg );
2450 %}
2451
2452 instruct Repl4F(vecX dst, regF src) %{
2453 predicate(n->as_Vector()->length() == 4);
2454 match(Set dst (ReplicateF src));
2455 format %{ "pshufd $dst,$dst,0x00\t! replicate4F" %}
2456 ins_encode %{
2457 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
2458 %}
2459 ins_pipe( pipe_slow );
2460 %}
2461
2462 instruct Repl8F(vecY dst, regF src) %{
2463 predicate(n->as_Vector()->length() == 8);
2464 match(Set dst (ReplicateF src));
2465 format %{ "pshufd $dst,$src,0x00\n\t"
2466 "vinsertf128h $dst,$dst,$dst\t! replicate8F" %}
2467 ins_encode %{
2468 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
2469 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2470 %}
2471 ins_pipe( pipe_slow );
2472 %}
2473
2474 // Replicate float (4 byte) scalar zero to be vector
2475 instruct Repl2F_zero(vecD dst, immF0 zero) %{
2476 predicate(n->as_Vector()->length() == 2);
2477 match(Set dst (ReplicateF zero));
2478 format %{ "xorps $dst,$dst\t! replicate2F zero" %}
2479 ins_encode %{
2480 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
2481 %}
2482 ins_pipe( fpu_reg_reg );
2483 %}
2484
2485 instruct Repl4F_zero(vecX dst, immF0 zero) %{
2486 predicate(n->as_Vector()->length() == 4);
2487 match(Set dst (ReplicateF zero));
2488 format %{ "xorps $dst,$dst\t! replicate4F zero" %}
2489 ins_encode %{
2490 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
2491 %}
2492 ins_pipe( fpu_reg_reg );
2493 %}
2494
2495 instruct Repl8F_zero(vecY dst, immF0 zero) %{
2496 predicate(n->as_Vector()->length() == 8);
2497 match(Set dst (ReplicateF zero));
2498 format %{ "vxorps $dst,$dst,$dst\t! replicate8F zero" %}
2499 ins_encode %{
2500 bool vector256 = true;
2501 __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
2502 %}
2503 ins_pipe( fpu_reg_reg );
2504 %}
2505
2506 // Replicate double (8 bytes) scalar to be vector
2507 instruct Repl2D(vecX dst, regD src) %{
2508 predicate(n->as_Vector()->length() == 2);
2509 match(Set dst (ReplicateD src));
2510 format %{ "pshufd $dst,$src,0x44\t! replicate2D" %}
2511 ins_encode %{
2512 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
2513 %}
2514 ins_pipe( pipe_slow );
2515 %}
2516
2517 instruct Repl4D(vecY dst, regD src) %{
2518 predicate(n->as_Vector()->length() == 4);
2519 match(Set dst (ReplicateD src));
2520 format %{ "pshufd $dst,$src,0x44\n\t"
2521 "vinsertf128h $dst,$dst,$dst\t! replicate4D" %}
2522 ins_encode %{
2523 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
2524 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2525 %}
2526 ins_pipe( pipe_slow );
2527 %}
2528
2529 // Replicate double (8 byte) scalar zero to be vector
2530 instruct Repl2D_zero(vecX dst, immD0 zero) %{
2531 predicate(n->as_Vector()->length() == 2);
2532 match(Set dst (ReplicateD zero));
2533 format %{ "xorpd $dst,$dst\t! replicate2D zero" %}
2534 ins_encode %{
2535 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister);
2536 %}
2537 ins_pipe( fpu_reg_reg );
2538 %}
2539
2540 instruct Repl4D_zero(vecY dst, immD0 zero) %{
2541 predicate(n->as_Vector()->length() == 4);
2542 match(Set dst (ReplicateD zero));
2543 format %{ "vxorpd $dst,$dst,$dst,vect256\t! replicate4D zero" %}
2544 ins_encode %{
2545 bool vector256 = true;
2546 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
2547 %}
2548 ins_pipe( fpu_reg_reg );
2549 %}
2550
2551 // ====================REDUCTION ARITHMETIC=======================================
2552
2553 instruct rsadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{
2554 predicate(UseSSE > 2 && UseAVX == 0);
2555 match(Set dst (AddReductionVI src1 src2));
2556 effect(TEMP tmp2, TEMP tmp);
2557 format %{ "movdqu $tmp2,$src2\n\t"
2558 "phaddd $tmp2,$tmp2\n\t"
2559 "movd $tmp,$src1\n\t"
2560 "paddd $tmp,$tmp2\n\t"
2561 "movd $dst,$tmp\t! add reduction2I" %}
2562 ins_encode %{
2563 __ movdqu($tmp2$$XMMRegister, $src2$$XMMRegister);
2564 __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister);
2565 __ movdl($tmp$$XMMRegister, $src1$$Register);
2566 __ paddd($tmp$$XMMRegister, $tmp2$$XMMRegister);
2567 __ movdl($dst$$Register, $tmp$$XMMRegister);
2568 %}
2569 ins_pipe( pipe_slow );
2570 %}
2571
2572 instruct rvadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{
2573 predicate(UseAVX > 0);
2574 match(Set dst (AddReductionVI src1 src2));
2575 effect(TEMP tmp, TEMP tmp2);
2576 format %{ "vphaddd $tmp,$src2,$src2\n\t"
2577 "movd $tmp2,$src1\n\t"
2578 "vpaddd $tmp2,$tmp2,$tmp\n\t"
2579 "movd $dst,$tmp2\t! add reduction2I" %}
2580 ins_encode %{
2581 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, false);
2582 __ movdl($tmp2$$XMMRegister, $src1$$Register);
2583 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, false);
2584 __ movdl($dst$$Register, $tmp2$$XMMRegister);
2585 %}
2586 ins_pipe( pipe_slow );
2587 %}
2588
2589 instruct rsadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{
2590 predicate(UseSSE > 2 && UseAVX == 0);
2591 match(Set dst (AddReductionVI src1 src2));
2592 effect(TEMP tmp2, TEMP tmp);
2593 format %{ "movdqu $tmp2,$src2\n\t"
2594 "phaddd $tmp2,$tmp2\n\t"
2595 "phaddd $tmp2,$tmp2\n\t"
2596 "movd $tmp,$src1\n\t"
2597 "paddd $tmp,$tmp2\n\t"
2598 "movd $dst,$tmp\t! add reduction4I" %}
2599 ins_encode %{
2600 __ movdqu($tmp2$$XMMRegister, $src2$$XMMRegister);
2601 __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister);
2602 __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister);
2603 __ movdl($tmp$$XMMRegister, $src1$$Register);
2604 __ paddd($tmp$$XMMRegister, $tmp2$$XMMRegister);
2605 __ movdl($dst$$Register, $tmp$$XMMRegister);
2606 %}
2607 ins_pipe( pipe_slow );
2608 %}
2609
2610 instruct rvadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{
2611 predicate(UseAVX > 0);
2612 match(Set dst (AddReductionVI src1 src2));
2613 effect(TEMP tmp, TEMP tmp2);
2614 format %{ "vphaddd $tmp,$src2,$src2\n\t"
2615 "vphaddd $tmp,$tmp,$tmp2\n\t"
2616 "movd $tmp2,$src1\n\t"
2617 "vpaddd $tmp2,$tmp2,$tmp\n\t"
2618 "movd $dst,$tmp2\t! add reduction4I" %}
2619 ins_encode %{
2620 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, false);
2621 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, false);
2622 __ movdl($tmp2$$XMMRegister, $src1$$Register);
2623 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, false);
2624 __ movdl($dst$$Register, $tmp2$$XMMRegister);
2625 %}
2626 ins_pipe( pipe_slow );
2627 %}
2628
2629 instruct rvadd8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{
2630 predicate(UseAVX > 0);
2631 match(Set dst (AddReductionVI src1 src2));
2632 effect(TEMP tmp, TEMP tmp2);
2633 format %{ "vphaddd $tmp,$src2,$src2\n\t"
2634 "vphaddd $tmp,$tmp,$tmp2\n\t"
2635 "vextractf128 $tmp2,$tmp\n\t"
2636 "vpaddd $tmp,$tmp,$tmp2\n\t"
2637 "movd $tmp2,$src1\n\t"
2638 "vpaddd $tmp2,$tmp2,$tmp\n\t"
2639 "movd $dst,$tmp2\t! add reduction8I" %}
2640 ins_encode %{
2641 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, true);
2642 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, true);
2643 __ vextractf128h($tmp2$$XMMRegister, $tmp$$XMMRegister);
2644 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, false);
2645 __ movdl($tmp2$$XMMRegister, $src1$$Register);
2646 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, false);
2647 __ movdl($dst$$Register, $tmp2$$XMMRegister);
2648 %}
2649 ins_pipe( pipe_slow );
2650 %}
2651
2652 instruct rsadd2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{
2653 predicate(UseSSE >= 1 && UseAVX == 0);
2654 match(Set dst (AddReductionVF src1 src2));
2655 effect(TEMP tmp, TEMP tmp2);
2656 format %{ "movdqu $tmp,$src1\n\t"
2657 "addss $tmp,$src2\n\t"
2658 "pshufd $tmp2,$src2,0x01\n\t"
2659 "addss $tmp,$tmp2\n\t"
2660 "movdqu $dst,$tmp\t! add reduction2F" %}
2661 ins_encode %{
2662 __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister);
2663 __ addss($tmp$$XMMRegister, $src2$$XMMRegister);
2664 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01);
2665 __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister);
2666 __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister);
2667 %}
2668 ins_pipe( pipe_slow );
2669 %}
2670
2671 instruct rvadd2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{
2755 ins_encode %{
2756 __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
2757 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
2758 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
2759 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
2760 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
2761 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
2762 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
2763 __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister);
2764 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
2765 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01);
2766 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
2767 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02);
2768 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
2769 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03);
2770 __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
2771 %}
2772 ins_pipe( pipe_slow );
2773 %}
2774
2775 instruct rsadd2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp) %{
2776 predicate(UseSSE >= 1 && UseAVX == 0);
2777 match(Set dst (AddReductionVD src1 src2));
2778 effect(TEMP tmp, TEMP dst);
2779 format %{ "movdqu $tmp,$src1\n\t"
2780 "addsd $tmp,$src2\n\t"
2781 "pshufd $dst,$src2,0xE\n\t"
2782 "addsd $dst,$tmp\t! add reduction2D" %}
2783 ins_encode %{
2784 __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister);
2785 __ addsd($tmp$$XMMRegister, $src2$$XMMRegister);
2786 __ pshufd($dst$$XMMRegister, $src2$$XMMRegister, 0xE);
2787 __ addsd($dst$$XMMRegister, $tmp$$XMMRegister);
2788 %}
2789 ins_pipe( pipe_slow );
2790 %}
2791
2792 instruct rvadd2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp, regD tmp2) %{
2793 predicate(UseAVX > 0);
2794 match(Set dst (AddReductionVD src1 src2));
2802 __ vaddsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
2803 %}
2804 ins_pipe( pipe_slow );
2805 %}
2806
2807 instruct rvadd4D_reduction_reg(regD dst, regD src1, vecY src2, regD tmp, regD tmp2, regD tmp3) %{
2808 predicate(UseAVX > 0);
2809 match(Set dst (AddReductionVD src1 src2));
2810 effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
2811 format %{ "vaddsd $tmp2,$src1,$src2\n\t"
2812 "pshufd $tmp,$src2,0xE\n\t"
2813 "vaddsd $tmp2,$tmp2,$tmp\n\t"
2814 "vextractf128 $tmp3,$src2\n\t"
2815 "vaddsd $tmp2,$tmp2,$tmp3\n\t"
2816 "pshufd $tmp,$tmp3,0xE\n\t"
2817 "vaddsd $dst,$tmp2,$tmp\t! add reduction4D" %}
2818 ins_encode %{
2819 __ vaddsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
2820 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
2821 __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
2822 __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister);
2823 __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
2824 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE);
2825 __ vaddsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
2826 %}
2827 ins_pipe( pipe_slow );
2828 %}
2829
2830 instruct rsmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{
2831 predicate(UseSSE > 3 && UseAVX == 0);
2832 match(Set dst (MulReductionVI src1 src2));
2833 effect(TEMP tmp, TEMP tmp2);
2834 format %{ "pshufd $tmp2,$src2,0x1\n\t"
2835 "pmulld $tmp2,$src2\n\t"
2836 "movd $tmp,$src1\n\t"
2837 "pmulld $tmp2,$tmp\n\t"
2838 "movd $dst,$tmp2\t! mul reduction2I" %}
2839 ins_encode %{
2840 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
2841 __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister);
2842 __ movdl($tmp$$XMMRegister, $src1$$Register);
2843 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister);
2844 __ movdl($dst$$Register, $tmp2$$XMMRegister);
2845 %}
2846 ins_pipe( pipe_slow );
2847 %}
2848
2849 instruct rvmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{
2850 predicate(UseAVX > 0);
2851 match(Set dst (MulReductionVI src1 src2));
2852 effect(TEMP tmp, TEMP tmp2);
2853 format %{ "pshufd $tmp2,$src2,0x1\n\t"
2854 "vpmulld $tmp,$src2,$tmp2\n\t"
2855 "movd $tmp2,$src1\n\t"
2856 "vpmulld $tmp2,$tmp,$tmp2\n\t"
2857 "movd $dst,$tmp2\t! mul reduction2I" %}
2858 ins_encode %{
2859 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
2860 __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, false);
2861 __ movdl($tmp2$$XMMRegister, $src1$$Register);
2862 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, false);
2863 __ movdl($dst$$Register, $tmp2$$XMMRegister);
2864 %}
2865 ins_pipe( pipe_slow );
2866 %}
2867
2868 instruct rsmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{
2869 predicate(UseSSE > 3 && UseAVX == 0);
2870 match(Set dst (MulReductionVI src1 src2));
2871 effect(TEMP tmp, TEMP tmp2);
2872 format %{ "pshufd $tmp2,$src2,0xE\n\t"
2873 "pmulld $tmp2,$src2\n\t"
2874 "pshufd $tmp,$tmp2,0x1\n\t"
2875 "pmulld $tmp2,$tmp\n\t"
2876 "movd $tmp,$src1\n\t"
2877 "pmulld $tmp2,$tmp\n\t"
2878 "movd $dst,$tmp2\t! mul reduction4I" %}
2879 ins_encode %{
2880 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE);
2881 __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister);
2882 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x1);
2883 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister);
2884 __ movdl($tmp$$XMMRegister, $src1$$Register);
2885 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister);
2886 __ movdl($dst$$Register, $tmp2$$XMMRegister);
2887 %}
2888 ins_pipe( pipe_slow );
2889 %}
2890
2891 instruct rvmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{
2892 predicate(UseAVX > 0);
2893 match(Set dst (MulReductionVI src1 src2));
2894 effect(TEMP tmp, TEMP tmp2);
2895 format %{ "pshufd $tmp2,$src2,0xE\n\t"
2896 "vpmulld $tmp,$src2,$tmp2\n\t"
2897 "pshufd $tmp2,$tmp,0x1\n\t"
2898 "vpmulld $tmp,$tmp,$tmp2\n\t"
2899 "movd $tmp2,$src1\n\t"
2900 "vpmulld $tmp2,$tmp,$tmp2\n\t"
2901 "movd $dst,$tmp2\t! mul reduction4I" %}
2902 ins_encode %{
2903 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE);
2904 __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, false);
2905 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1);
2906 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, false);
2907 __ movdl($tmp2$$XMMRegister, $src1$$Register);
2908 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, false);
2909 __ movdl($dst$$Register, $tmp2$$XMMRegister);
2910 %}
2911 ins_pipe( pipe_slow );
2912 %}
2913
2914 instruct rvmul8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{
2915 predicate(UseAVX > 0);
2916 match(Set dst (MulReductionVI src1 src2));
2917 effect(TEMP tmp, TEMP tmp2);
2918 format %{ "vextractf128 $tmp,$src2\n\t"
2919 "vpmulld $tmp,$tmp,$src2\n\t"
2920 "pshufd $tmp2,$tmp,0xE\n\t"
2921 "vpmulld $tmp,$tmp,$tmp2\n\t"
2922 "pshufd $tmp2,$tmp,0x1\n\t"
2923 "vpmulld $tmp,$tmp,$tmp2\n\t"
2924 "movd $tmp2,$src1\n\t"
2925 "vpmulld $tmp2,$tmp,$tmp2\n\t"
2926 "movd $dst,$tmp2\t! mul reduction8I" %}
2927 ins_encode %{
2928 __ vextractf128h($tmp$$XMMRegister, $src2$$XMMRegister);
2929 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, false);
2930 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE);
2931 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, false);
2932 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1);
2933 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, false);
2934 __ movdl($tmp2$$XMMRegister, $src1$$Register);
2935 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, false);
2936 __ movdl($dst$$Register, $tmp2$$XMMRegister);
2937 %}
2938 ins_pipe( pipe_slow );
2939 %}
2940
2941 instruct rsmul2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{
2942 predicate(UseSSE >= 1 && UseAVX == 0);
2943 match(Set dst (MulReductionVF src1 src2));
2944 effect(TEMP tmp, TEMP tmp2);
2945 format %{ "movdqu $tmp,$src1\n\t"
2946 "mulss $tmp,$src2\n\t"
2947 "pshufd $tmp2,$src2,0x01\n\t"
2948 "mulss $tmp,$tmp2\n\t"
2949 "movdqu $dst,$tmp\t! add reduction2F" %}
2950 ins_encode %{
2951 __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister);
2952 __ mulss($tmp$$XMMRegister, $src2$$XMMRegister);
2953 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01);
2954 __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister);
2955 __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister);
2956 %}
2957 ins_pipe( pipe_slow );
2958 %}
2959
2960 instruct rvmul2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{
2961 predicate(UseAVX > 0);
2962 match(Set dst (MulReductionVF src1 src2));
2963 effect(TEMP tmp, TEMP tmp2);
2964 format %{ "vmulss $tmp2,$src1,$src2\n\t"
2965 "pshufd $tmp,$src2,0x01\n\t"
2966 "vmulss $dst,$tmp2,$tmp\t! add reduction2F" %}
2967 ins_encode %{
2968 __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
2969 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
2970 __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
2971 %}
2972 ins_pipe( pipe_slow );
2973 %}
2974
2975 instruct rsmul4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{
2976 predicate(UseSSE >= 1 && UseAVX == 0);
2977 match(Set dst (MulReductionVF src1 src2));
2978 effect(TEMP tmp, TEMP tmp2);
2979 format %{ "movdqu $tmp,$src1\n\t"
2980 "mulss $tmp,$src2\n\t"
2981 "pshufd $tmp2,$src2,0x01\n\t"
2982 "mulss $tmp,$tmp2\n\t"
2983 "pshufd $tmp2,$src2,0x02\n\t"
2984 "mulss $tmp,$tmp2\n\t"
2985 "pshufd $tmp2,$src2,0x03\n\t"
2986 "mulss $tmp,$tmp2\n\t"
2987 "movdqu $dst,$tmp\t! add reduction4F" %}
2988 ins_encode %{
2989 __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister);
2990 __ mulss($tmp$$XMMRegister, $src2$$XMMRegister);
2991 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01);
2992 __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister);
2993 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x02);
2994 __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister);
2995 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x03);
2996 __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister);
2997 __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister);
2998 %}
2999 ins_pipe( pipe_slow );
3000 %}
3001
3002 instruct rvmul4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{
3003 predicate(UseAVX > 0);
3004 match(Set dst (MulReductionVF src1 src2));
3005 effect(TEMP tmp, TEMP tmp2);
3006 format %{ "vmulss $tmp2,$src1,$src2\n\t"
3007 "pshufd $tmp,$src2,0x01\n\t"
3008 "vmulss $tmp2,$tmp2,$tmp\n\t"
3009 "pshufd $tmp,$src2,0x02\n\t"
3010 "vmulss $tmp2,$tmp2,$tmp\n\t"
3011 "pshufd $tmp,$src2,0x03\n\t"
3012 "vmulss $dst,$tmp2,$tmp\t! add reduction4F" %}
3013 ins_encode %{
3014 __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
3015 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
3016 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
3017 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
3018 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
3019 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
3020 __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
3021 %}
3022 ins_pipe( pipe_slow );
3023 %}
3024
3025 instruct rvmul8F_reduction_reg(regF dst, regF src1, vecY src2, regF tmp, regF tmp2, regF tmp3) %{
3026 predicate(UseAVX > 0);
3027 match(Set dst (MulReductionVF src1 src2));
3028 effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
3029 format %{ "vmulss $tmp2,$src1,$src2\n\t"
3030 "pshufd $tmp,$src2,0x01\n\t"
3031 "vmulss $tmp2,$tmp2,$tmp\n\t"
3032 "pshufd $tmp,$src2,0x02\n\t"
3044 ins_encode %{
3045 __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
3046 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
3047 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
3048 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
3049 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
3050 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
3051 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
3052 __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister);
3053 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
3054 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01);
3055 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
3056 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02);
3057 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
3058 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03);
3059 __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
3060 %}
3061 ins_pipe( pipe_slow );
3062 %}
3063
3064 instruct rsmul2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp) %{
3065 predicate(UseSSE >= 1 && UseAVX == 0);
3066 match(Set dst (MulReductionVD src1 src2));
3067 effect(TEMP tmp, TEMP dst);
3068 format %{ "movdqu $tmp,$src1\n\t"
3069 "mulsd $tmp,$src2\n\t"
3070 "pshufd $dst,$src2,0xE\n\t"
3071 "mulsd $dst,$tmp\t! add reduction2D" %}
3072 ins_encode %{
3073 __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister);
3074 __ mulsd($tmp$$XMMRegister, $src2$$XMMRegister);
3075 __ pshufd($dst$$XMMRegister, $src2$$XMMRegister, 0xE);
3076 __ mulsd($dst$$XMMRegister, $tmp$$XMMRegister);
3077 %}
3078 ins_pipe( pipe_slow );
3079 %}
3080
3081 instruct rvmul2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp, regD tmp2) %{
3082 predicate(UseAVX > 0);
3083 match(Set dst (MulReductionVD src1 src2));
3084 effect(TEMP tmp, TEMP tmp2);
3085 format %{ "vmulsd $tmp2,$src1,$src2\n\t"
3086 "pshufd $tmp,$src2,0xE\n\t"
3087 "vmulsd $dst,$tmp2,$tmp\t! mul reduction2D" %}
3088 ins_encode %{
3089 __ vmulsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
3090 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
3091 __ vmulsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
3099 effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
3100 format %{ "vmulsd $tmp2,$src1,$src2\n\t"
3101 "pshufd $tmp,$src2,0xE\n\t"
3102 "vmulsd $tmp2,$tmp2,$tmp\n\t"
3103 "vextractf128 $tmp3,$src2\n\t"
3104 "vmulsd $tmp2,$tmp2,$tmp3\n\t"
3105 "pshufd $tmp,$tmp3,0xE\n\t"
3106 "vmulsd $dst,$tmp2,$tmp\t! mul reduction4D" %}
3107 ins_encode %{
3108 __ vmulsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
3109 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
3110 __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
3111 __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister);
3112 __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
3113 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE);
3114 __ vmulsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
3115 %}
3116 ins_pipe( pipe_slow );
3117 %}
3118
3119 // ====================VECTOR ARITHMETIC=======================================
3120
3121 // --------------------------------- ADD --------------------------------------
3122
3123 // Bytes vector add
3124 instruct vadd4B(vecS dst, vecS src) %{
3125 predicate(n->as_Vector()->length() == 4);
3126 match(Set dst (AddVB dst src));
3127 format %{ "paddb $dst,$src\t! add packed4B" %}
3128 ins_encode %{
3129 __ paddb($dst$$XMMRegister, $src$$XMMRegister);
3130 %}
3131 ins_pipe( pipe_slow );
3132 %}
3133
3134 instruct vadd4B_reg(vecS dst, vecS src1, vecS src2) %{
3135 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3136 match(Set dst (AddVB src1 src2));
3137 format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %}
3138 ins_encode %{
3139 bool vector256 = false;
3140 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3141 %}
3142 ins_pipe( pipe_slow );
3143 %}
3144
3145 instruct vadd8B(vecD dst, vecD src) %{
3146 predicate(n->as_Vector()->length() == 8);
3147 match(Set dst (AddVB dst src));
3148 format %{ "paddb $dst,$src\t! add packed8B" %}
3149 ins_encode %{
3150 __ paddb($dst$$XMMRegister, $src$$XMMRegister);
3151 %}
3152 ins_pipe( pipe_slow );
3153 %}
3154
3155 instruct vadd8B_reg(vecD dst, vecD src1, vecD src2) %{
3156 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3157 match(Set dst (AddVB src1 src2));
3158 format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %}
3159 ins_encode %{
3160 bool vector256 = false;
3161 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3162 %}
3163 ins_pipe( pipe_slow );
3164 %}
3165
3166 instruct vadd16B(vecX dst, vecX src) %{
3167 predicate(n->as_Vector()->length() == 16);
3168 match(Set dst (AddVB dst src));
3169 format %{ "paddb $dst,$src\t! add packed16B" %}
3170 ins_encode %{
3171 __ paddb($dst$$XMMRegister, $src$$XMMRegister);
3172 %}
3173 ins_pipe( pipe_slow );
3174 %}
3175
3176 instruct vadd16B_reg(vecX dst, vecX src1, vecX src2) %{
3177 predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
3178 match(Set dst (AddVB src1 src2));
3179 format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %}
3180 ins_encode %{
3181 bool vector256 = false;
3182 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3183 %}
3184 ins_pipe( pipe_slow );
3185 %}
3186
3187 instruct vadd16B_mem(vecX dst, vecX src, memory mem) %{
3188 predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
3189 match(Set dst (AddVB src (LoadVector mem)));
3190 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %}
3191 ins_encode %{
3192 bool vector256 = false;
3193 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3194 %}
3195 ins_pipe( pipe_slow );
3196 %}
3197
3198 instruct vadd32B_reg(vecY dst, vecY src1, vecY src2) %{
3199 predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
3200 match(Set dst (AddVB src1 src2));
3201 format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %}
3202 ins_encode %{
3203 bool vector256 = true;
3204 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3205 %}
3206 ins_pipe( pipe_slow );
3207 %}
3208
3209 instruct vadd32B_mem(vecY dst, vecY src, memory mem) %{
3210 predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
3211 match(Set dst (AddVB src (LoadVector mem)));
3212 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %}
3213 ins_encode %{
3214 bool vector256 = true;
3215 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3216 %}
3217 ins_pipe( pipe_slow );
3218 %}
3219
3220 // Shorts/Chars vector add
3221 instruct vadd2S(vecS dst, vecS src) %{
3222 predicate(n->as_Vector()->length() == 2);
3223 match(Set dst (AddVS dst src));
3224 format %{ "paddw $dst,$src\t! add packed2S" %}
3225 ins_encode %{
3226 __ paddw($dst$$XMMRegister, $src$$XMMRegister);
3227 %}
3228 ins_pipe( pipe_slow );
3229 %}
3230
3231 instruct vadd2S_reg(vecS dst, vecS src1, vecS src2) %{
3232 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3233 match(Set dst (AddVS src1 src2));
3234 format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %}
3235 ins_encode %{
3236 bool vector256 = false;
3237 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3238 %}
3239 ins_pipe( pipe_slow );
3240 %}
3241
3242 instruct vadd4S(vecD dst, vecD src) %{
3243 predicate(n->as_Vector()->length() == 4);
3244 match(Set dst (AddVS dst src));
3245 format %{ "paddw $dst,$src\t! add packed4S" %}
3246 ins_encode %{
3247 __ paddw($dst$$XMMRegister, $src$$XMMRegister);
3248 %}
3249 ins_pipe( pipe_slow );
3250 %}
3251
3252 instruct vadd4S_reg(vecD dst, vecD src1, vecD src2) %{
3253 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3254 match(Set dst (AddVS src1 src2));
3255 format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %}
3256 ins_encode %{
3257 bool vector256 = false;
3258 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3259 %}
3260 ins_pipe( pipe_slow );
3261 %}
3262
3263 instruct vadd8S(vecX dst, vecX src) %{
3264 predicate(n->as_Vector()->length() == 8);
3265 match(Set dst (AddVS dst src));
3266 format %{ "paddw $dst,$src\t! add packed8S" %}
3267 ins_encode %{
3268 __ paddw($dst$$XMMRegister, $src$$XMMRegister);
3269 %}
3270 ins_pipe( pipe_slow );
3271 %}
3272
3273 instruct vadd8S_reg(vecX dst, vecX src1, vecX src2) %{
3274 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3275 match(Set dst (AddVS src1 src2));
3276 format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %}
3277 ins_encode %{
3278 bool vector256 = false;
3279 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3280 %}
3281 ins_pipe( pipe_slow );
3282 %}
3283
3284 instruct vadd8S_mem(vecX dst, vecX src, memory mem) %{
3285 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3286 match(Set dst (AddVS src (LoadVector mem)));
3287 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %}
3288 ins_encode %{
3289 bool vector256 = false;
3290 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3291 %}
3292 ins_pipe( pipe_slow );
3293 %}
3294
3295 instruct vadd16S_reg(vecY dst, vecY src1, vecY src2) %{
3296 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
3297 match(Set dst (AddVS src1 src2));
3298 format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %}
3299 ins_encode %{
3300 bool vector256 = true;
3301 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3302 %}
3303 ins_pipe( pipe_slow );
3304 %}
3305
3306 instruct vadd16S_mem(vecY dst, vecY src, memory mem) %{
3307 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
3308 match(Set dst (AddVS src (LoadVector mem)));
3309 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %}
3310 ins_encode %{
3311 bool vector256 = true;
3312 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3313 %}
3314 ins_pipe( pipe_slow );
3315 %}
3316
3317 // Integers vector add
3318 instruct vadd2I(vecD dst, vecD src) %{
3319 predicate(n->as_Vector()->length() == 2);
3320 match(Set dst (AddVI dst src));
3321 format %{ "paddd $dst,$src\t! add packed2I" %}
3322 ins_encode %{
3323 __ paddd($dst$$XMMRegister, $src$$XMMRegister);
3324 %}
3325 ins_pipe( pipe_slow );
3326 %}
3327
3328 instruct vadd2I_reg(vecD dst, vecD src1, vecD src2) %{
3329 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3330 match(Set dst (AddVI src1 src2));
3331 format %{ "vpaddd $dst,$src1,$src2\t! add packed2I" %}
3332 ins_encode %{
3333 bool vector256 = false;
3334 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3335 %}
3336 ins_pipe( pipe_slow );
3337 %}
3338
3339 instruct vadd4I(vecX dst, vecX src) %{
3340 predicate(n->as_Vector()->length() == 4);
3341 match(Set dst (AddVI dst src));
3342 format %{ "paddd $dst,$src\t! add packed4I" %}
3343 ins_encode %{
3344 __ paddd($dst$$XMMRegister, $src$$XMMRegister);
3345 %}
3346 ins_pipe( pipe_slow );
3347 %}
3348
3349 instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{
3350 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3351 match(Set dst (AddVI src1 src2));
3352 format %{ "vpaddd $dst,$src1,$src2\t! add packed4I" %}
3353 ins_encode %{
3354 bool vector256 = false;
3355 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3356 %}
3357 ins_pipe( pipe_slow );
3358 %}
3359
3360 instruct vadd4I_mem(vecX dst, vecX src, memory mem) %{
3361 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3362 match(Set dst (AddVI src (LoadVector mem)));
3363 format %{ "vpaddd $dst,$src,$mem\t! add packed4I" %}
3364 ins_encode %{
3365 bool vector256 = false;
3366 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3367 %}
3368 ins_pipe( pipe_slow );
3369 %}
3370
3371 instruct vadd8I_reg(vecY dst, vecY src1, vecY src2) %{
3372 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
3373 match(Set dst (AddVI src1 src2));
3374 format %{ "vpaddd $dst,$src1,$src2\t! add packed8I" %}
3375 ins_encode %{
3376 bool vector256 = true;
3377 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3378 %}
3379 ins_pipe( pipe_slow );
3380 %}
3381
3382 instruct vadd8I_mem(vecY dst, vecY src, memory mem) %{
3383 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
3384 match(Set dst (AddVI src (LoadVector mem)));
3385 format %{ "vpaddd $dst,$src,$mem\t! add packed8I" %}
3386 ins_encode %{
3387 bool vector256 = true;
3388 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3389 %}
3390 ins_pipe( pipe_slow );
3391 %}
3392
3393 // Longs vector add
3394 instruct vadd2L(vecX dst, vecX src) %{
3395 predicate(n->as_Vector()->length() == 2);
3396 match(Set dst (AddVL dst src));
3397 format %{ "paddq $dst,$src\t! add packed2L" %}
3398 ins_encode %{
3399 __ paddq($dst$$XMMRegister, $src$$XMMRegister);
3400 %}
3401 ins_pipe( pipe_slow );
3402 %}
3403
3404 instruct vadd2L_reg(vecX dst, vecX src1, vecX src2) %{
3405 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3406 match(Set dst (AddVL src1 src2));
3407 format %{ "vpaddq $dst,$src1,$src2\t! add packed2L" %}
3408 ins_encode %{
3409 bool vector256 = false;
3410 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3411 %}
3412 ins_pipe( pipe_slow );
3413 %}
3414
3415 instruct vadd2L_mem(vecX dst, vecX src, memory mem) %{
3416 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3417 match(Set dst (AddVL src (LoadVector mem)));
3418 format %{ "vpaddq $dst,$src,$mem\t! add packed2L" %}
3419 ins_encode %{
3420 bool vector256 = false;
3421 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3422 %}
3423 ins_pipe( pipe_slow );
3424 %}
3425
3426 instruct vadd4L_reg(vecY dst, vecY src1, vecY src2) %{
3427 predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
3428 match(Set dst (AddVL src1 src2));
3429 format %{ "vpaddq $dst,$src1,$src2\t! add packed4L" %}
3430 ins_encode %{
3431 bool vector256 = true;
3432 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3433 %}
3434 ins_pipe( pipe_slow );
3435 %}
3436
3437 instruct vadd4L_mem(vecY dst, vecY src, memory mem) %{
3438 predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
3439 match(Set dst (AddVL src (LoadVector mem)));
3440 format %{ "vpaddq $dst,$src,$mem\t! add packed4L" %}
3441 ins_encode %{
3442 bool vector256 = true;
3443 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3444 %}
3445 ins_pipe( pipe_slow );
3446 %}
3447
3448 // Floats vector add
3449 instruct vadd2F(vecD dst, vecD src) %{
3450 predicate(n->as_Vector()->length() == 2);
3451 match(Set dst (AddVF dst src));
3452 format %{ "addps $dst,$src\t! add packed2F" %}
3453 ins_encode %{
3454 __ addps($dst$$XMMRegister, $src$$XMMRegister);
3455 %}
3456 ins_pipe( pipe_slow );
3457 %}
3458
3459 instruct vadd2F_reg(vecD dst, vecD src1, vecD src2) %{
3460 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3461 match(Set dst (AddVF src1 src2));
3462 format %{ "vaddps $dst,$src1,$src2\t! add packed2F" %}
3463 ins_encode %{
3464 bool vector256 = false;
3465 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3466 %}
3467 ins_pipe( pipe_slow );
3468 %}
3469
3470 instruct vadd4F(vecX dst, vecX src) %{
3471 predicate(n->as_Vector()->length() == 4);
3472 match(Set dst (AddVF dst src));
3473 format %{ "addps $dst,$src\t! add packed4F" %}
3474 ins_encode %{
3475 __ addps($dst$$XMMRegister, $src$$XMMRegister);
3476 %}
3477 ins_pipe( pipe_slow );
3478 %}
3479
3480 instruct vadd4F_reg(vecX dst, vecX src1, vecX src2) %{
3481 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3482 match(Set dst (AddVF src1 src2));
3483 format %{ "vaddps $dst,$src1,$src2\t! add packed4F" %}
3484 ins_encode %{
3485 bool vector256 = false;
3486 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3487 %}
3488 ins_pipe( pipe_slow );
3489 %}
3490
3491 instruct vadd4F_mem(vecX dst, vecX src, memory mem) %{
3492 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3493 match(Set dst (AddVF src (LoadVector mem)));
3494 format %{ "vaddps $dst,$src,$mem\t! add packed4F" %}
3495 ins_encode %{
3496 bool vector256 = false;
3497 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3498 %}
3499 ins_pipe( pipe_slow );
3500 %}
3501
3502 instruct vadd8F_reg(vecY dst, vecY src1, vecY src2) %{
3503 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3504 match(Set dst (AddVF src1 src2));
3505 format %{ "vaddps $dst,$src1,$src2\t! add packed8F" %}
3506 ins_encode %{
3507 bool vector256 = true;
3508 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3509 %}
3510 ins_pipe( pipe_slow );
3511 %}
3512
3513 instruct vadd8F_mem(vecY dst, vecY src, memory mem) %{
3514 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3515 match(Set dst (AddVF src (LoadVector mem)));
3516 format %{ "vaddps $dst,$src,$mem\t! add packed8F" %}
3517 ins_encode %{
3518 bool vector256 = true;
3519 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3520 %}
3521 ins_pipe( pipe_slow );
3522 %}
3523
3524 // Doubles vector add
3525 instruct vadd2D(vecX dst, vecX src) %{
3526 predicate(n->as_Vector()->length() == 2);
3527 match(Set dst (AddVD dst src));
3528 format %{ "addpd $dst,$src\t! add packed2D" %}
3529 ins_encode %{
3530 __ addpd($dst$$XMMRegister, $src$$XMMRegister);
3531 %}
3532 ins_pipe( pipe_slow );
3533 %}
3534
3535 instruct vadd2D_reg(vecX dst, vecX src1, vecX src2) %{
3536 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3537 match(Set dst (AddVD src1 src2));
3538 format %{ "vaddpd $dst,$src1,$src2\t! add packed2D" %}
3539 ins_encode %{
3540 bool vector256 = false;
3541 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3542 %}
3543 ins_pipe( pipe_slow );
3544 %}
3545
3546 instruct vadd2D_mem(vecX dst, vecX src, memory mem) %{
3547 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3548 match(Set dst (AddVD src (LoadVector mem)));
3549 format %{ "vaddpd $dst,$src,$mem\t! add packed2D" %}
3550 ins_encode %{
3551 bool vector256 = false;
3552 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3553 %}
3554 ins_pipe( pipe_slow );
3555 %}
3556
3557 instruct vadd4D_reg(vecY dst, vecY src1, vecY src2) %{
3558 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3559 match(Set dst (AddVD src1 src2));
3560 format %{ "vaddpd $dst,$src1,$src2\t! add packed4D" %}
3561 ins_encode %{
3562 bool vector256 = true;
3563 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3564 %}
3565 ins_pipe( pipe_slow );
3566 %}
3567
3568 instruct vadd4D_mem(vecY dst, vecY src, memory mem) %{
3569 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3570 match(Set dst (AddVD src (LoadVector mem)));
3571 format %{ "vaddpd $dst,$src,$mem\t! add packed4D" %}
3572 ins_encode %{
3573 bool vector256 = true;
3574 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3575 %}
3576 ins_pipe( pipe_slow );
3577 %}
3578
3579 // --------------------------------- SUB --------------------------------------
3580
3581 // Bytes vector sub
3582 instruct vsub4B(vecS dst, vecS src) %{
3583 predicate(n->as_Vector()->length() == 4);
3584 match(Set dst (SubVB dst src));
3585 format %{ "psubb $dst,$src\t! sub packed4B" %}
3586 ins_encode %{
3587 __ psubb($dst$$XMMRegister, $src$$XMMRegister);
3588 %}
3589 ins_pipe( pipe_slow );
3590 %}
3591
3592 instruct vsub4B_reg(vecS dst, vecS src1, vecS src2) %{
3593 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3594 match(Set dst (SubVB src1 src2));
3595 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %}
3596 ins_encode %{
3597 bool vector256 = false;
3598 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3599 %}
3600 ins_pipe( pipe_slow );
3601 %}
3602
3603 instruct vsub8B(vecD dst, vecD src) %{
3604 predicate(n->as_Vector()->length() == 8);
3605 match(Set dst (SubVB dst src));
3606 format %{ "psubb $dst,$src\t! sub packed8B" %}
3607 ins_encode %{
3608 __ psubb($dst$$XMMRegister, $src$$XMMRegister);
3609 %}
3610 ins_pipe( pipe_slow );
3611 %}
3612
3613 instruct vsub8B_reg(vecD dst, vecD src1, vecD src2) %{
3614 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3615 match(Set dst (SubVB src1 src2));
3616 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %}
3617 ins_encode %{
3618 bool vector256 = false;
3619 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3620 %}
3621 ins_pipe( pipe_slow );
3622 %}
3623
3624 instruct vsub16B(vecX dst, vecX src) %{
3625 predicate(n->as_Vector()->length() == 16);
3626 match(Set dst (SubVB dst src));
3627 format %{ "psubb $dst,$src\t! sub packed16B" %}
3628 ins_encode %{
3629 __ psubb($dst$$XMMRegister, $src$$XMMRegister);
3630 %}
3631 ins_pipe( pipe_slow );
3632 %}
3633
3634 instruct vsub16B_reg(vecX dst, vecX src1, vecX src2) %{
3635 predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
3636 match(Set dst (SubVB src1 src2));
3637 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %}
3638 ins_encode %{
3639 bool vector256 = false;
3640 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3641 %}
3642 ins_pipe( pipe_slow );
3643 %}
3644
3645 instruct vsub16B_mem(vecX dst, vecX src, memory mem) %{
3646 predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
3647 match(Set dst (SubVB src (LoadVector mem)));
3648 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %}
3649 ins_encode %{
3650 bool vector256 = false;
3651 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3652 %}
3653 ins_pipe( pipe_slow );
3654 %}
3655
3656 instruct vsub32B_reg(vecY dst, vecY src1, vecY src2) %{
3657 predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
3658 match(Set dst (SubVB src1 src2));
3659 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %}
3660 ins_encode %{
3661 bool vector256 = true;
3662 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3663 %}
3664 ins_pipe( pipe_slow );
3665 %}
3666
3667 instruct vsub32B_mem(vecY dst, vecY src, memory mem) %{
3668 predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
3669 match(Set dst (SubVB src (LoadVector mem)));
3670 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %}
3671 ins_encode %{
3672 bool vector256 = true;
3673 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3674 %}
3675 ins_pipe( pipe_slow );
3676 %}
3677
3678 // Shorts/Chars vector sub
3679 instruct vsub2S(vecS dst, vecS src) %{
3680 predicate(n->as_Vector()->length() == 2);
3681 match(Set dst (SubVS dst src));
3682 format %{ "psubw $dst,$src\t! sub packed2S" %}
3683 ins_encode %{
3684 __ psubw($dst$$XMMRegister, $src$$XMMRegister);
3685 %}
3686 ins_pipe( pipe_slow );
3687 %}
3688
3689 instruct vsub2S_reg(vecS dst, vecS src1, vecS src2) %{
3690 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3691 match(Set dst (SubVS src1 src2));
3692 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %}
3693 ins_encode %{
3694 bool vector256 = false;
3695 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3696 %}
3697 ins_pipe( pipe_slow );
3698 %}
3699
3700 instruct vsub4S(vecD dst, vecD src) %{
3701 predicate(n->as_Vector()->length() == 4);
3702 match(Set dst (SubVS dst src));
3703 format %{ "psubw $dst,$src\t! sub packed4S" %}
3704 ins_encode %{
3705 __ psubw($dst$$XMMRegister, $src$$XMMRegister);
3706 %}
3707 ins_pipe( pipe_slow );
3708 %}
3709
3710 instruct vsub4S_reg(vecD dst, vecD src1, vecD src2) %{
3711 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3712 match(Set dst (SubVS src1 src2));
3713 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %}
3714 ins_encode %{
3715 bool vector256 = false;
3716 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3717 %}
3718 ins_pipe( pipe_slow );
3719 %}
3720
3721 instruct vsub8S(vecX dst, vecX src) %{
3722 predicate(n->as_Vector()->length() == 8);
3723 match(Set dst (SubVS dst src));
3724 format %{ "psubw $dst,$src\t! sub packed8S" %}
3725 ins_encode %{
3726 __ psubw($dst$$XMMRegister, $src$$XMMRegister);
3727 %}
3728 ins_pipe( pipe_slow );
3729 %}
3730
3731 instruct vsub8S_reg(vecX dst, vecX src1, vecX src2) %{
3732 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3733 match(Set dst (SubVS src1 src2));
3734 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %}
3735 ins_encode %{
3736 bool vector256 = false;
3737 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3738 %}
3739 ins_pipe( pipe_slow );
3740 %}
3741
3742 instruct vsub8S_mem(vecX dst, vecX src, memory mem) %{
3743 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3744 match(Set dst (SubVS src (LoadVector mem)));
3745 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %}
3746 ins_encode %{
3747 bool vector256 = false;
3748 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3749 %}
3750 ins_pipe( pipe_slow );
3751 %}
3752
3753 instruct vsub16S_reg(vecY dst, vecY src1, vecY src2) %{
3754 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
3755 match(Set dst (SubVS src1 src2));
3756 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %}
3757 ins_encode %{
3758 bool vector256 = true;
3759 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3760 %}
3761 ins_pipe( pipe_slow );
3762 %}
3763
3764 instruct vsub16S_mem(vecY dst, vecY src, memory mem) %{
3765 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
3766 match(Set dst (SubVS src (LoadVector mem)));
3767 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %}
3768 ins_encode %{
3769 bool vector256 = true;
3770 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3771 %}
3772 ins_pipe( pipe_slow );
3773 %}
3774
3775 // Integers vector sub
3776 instruct vsub2I(vecD dst, vecD src) %{
3777 predicate(n->as_Vector()->length() == 2);
3778 match(Set dst (SubVI dst src));
3779 format %{ "psubd $dst,$src\t! sub packed2I" %}
3780 ins_encode %{
3781 __ psubd($dst$$XMMRegister, $src$$XMMRegister);
3782 %}
3783 ins_pipe( pipe_slow );
3784 %}
3785
3786 instruct vsub2I_reg(vecD dst, vecD src1, vecD src2) %{
3787 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3788 match(Set dst (SubVI src1 src2));
3789 format %{ "vpsubd $dst,$src1,$src2\t! sub packed2I" %}
3790 ins_encode %{
3791 bool vector256 = false;
3792 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3793 %}
3794 ins_pipe( pipe_slow );
3795 %}
3796
3797 instruct vsub4I(vecX dst, vecX src) %{
3798 predicate(n->as_Vector()->length() == 4);
3799 match(Set dst (SubVI dst src));
3800 format %{ "psubd $dst,$src\t! sub packed4I" %}
3801 ins_encode %{
3802 __ psubd($dst$$XMMRegister, $src$$XMMRegister);
3803 %}
3804 ins_pipe( pipe_slow );
3805 %}
3806
3807 instruct vsub4I_reg(vecX dst, vecX src1, vecX src2) %{
3808 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3809 match(Set dst (SubVI src1 src2));
3810 format %{ "vpsubd $dst,$src1,$src2\t! sub packed4I" %}
3811 ins_encode %{
3812 bool vector256 = false;
3813 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3814 %}
3815 ins_pipe( pipe_slow );
3816 %}
3817
3818 instruct vsub4I_mem(vecX dst, vecX src, memory mem) %{
3819 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3820 match(Set dst (SubVI src (LoadVector mem)));
3821 format %{ "vpsubd $dst,$src,$mem\t! sub packed4I" %}
3822 ins_encode %{
3823 bool vector256 = false;
3824 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3825 %}
3826 ins_pipe( pipe_slow );
3827 %}
3828
3829 instruct vsub8I_reg(vecY dst, vecY src1, vecY src2) %{
3830 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
3831 match(Set dst (SubVI src1 src2));
3832 format %{ "vpsubd $dst,$src1,$src2\t! sub packed8I" %}
3833 ins_encode %{
3834 bool vector256 = true;
3835 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3836 %}
3837 ins_pipe( pipe_slow );
3838 %}
3839
3840 instruct vsub8I_mem(vecY dst, vecY src, memory mem) %{
3841 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
3842 match(Set dst (SubVI src (LoadVector mem)));
3843 format %{ "vpsubd $dst,$src,$mem\t! sub packed8I" %}
3844 ins_encode %{
3845 bool vector256 = true;
3846 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3847 %}
3848 ins_pipe( pipe_slow );
3849 %}
3850
3851 // Longs vector sub
3852 instruct vsub2L(vecX dst, vecX src) %{
3853 predicate(n->as_Vector()->length() == 2);
3854 match(Set dst (SubVL dst src));
3855 format %{ "psubq $dst,$src\t! sub packed2L" %}
3856 ins_encode %{
3857 __ psubq($dst$$XMMRegister, $src$$XMMRegister);
3858 %}
3859 ins_pipe( pipe_slow );
3860 %}
3861
3862 instruct vsub2L_reg(vecX dst, vecX src1, vecX src2) %{
3863 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3864 match(Set dst (SubVL src1 src2));
3865 format %{ "vpsubq $dst,$src1,$src2\t! sub packed2L" %}
3866 ins_encode %{
3867 bool vector256 = false;
3868 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3869 %}
3870 ins_pipe( pipe_slow );
3871 %}
3872
3873 instruct vsub2L_mem(vecX dst, vecX src, memory mem) %{
3874 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3875 match(Set dst (SubVL src (LoadVector mem)));
3876 format %{ "vpsubq $dst,$src,$mem\t! sub packed2L" %}
3877 ins_encode %{
3878 bool vector256 = false;
3879 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3880 %}
3881 ins_pipe( pipe_slow );
3882 %}
3883
3884 instruct vsub4L_reg(vecY dst, vecY src1, vecY src2) %{
3885 predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
3886 match(Set dst (SubVL src1 src2));
3887 format %{ "vpsubq $dst,$src1,$src2\t! sub packed4L" %}
3888 ins_encode %{
3889 bool vector256 = true;
3890 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3891 %}
3892 ins_pipe( pipe_slow );
3893 %}
3894
3895 instruct vsub4L_mem(vecY dst, vecY src, memory mem) %{
3896 predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
3897 match(Set dst (SubVL src (LoadVector mem)));
3898 format %{ "vpsubq $dst,$src,$mem\t! sub packed4L" %}
3899 ins_encode %{
3900 bool vector256 = true;
3901 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3902 %}
3903 ins_pipe( pipe_slow );
3904 %}
3905
3906 // Floats vector sub
3907 instruct vsub2F(vecD dst, vecD src) %{
3908 predicate(n->as_Vector()->length() == 2);
3909 match(Set dst (SubVF dst src));
3910 format %{ "subps $dst,$src\t! sub packed2F" %}
3911 ins_encode %{
3912 __ subps($dst$$XMMRegister, $src$$XMMRegister);
3913 %}
3914 ins_pipe( pipe_slow );
3915 %}
3916
3917 instruct vsub2F_reg(vecD dst, vecD src1, vecD src2) %{
3918 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3919 match(Set dst (SubVF src1 src2));
3920 format %{ "vsubps $dst,$src1,$src2\t! sub packed2F" %}
3921 ins_encode %{
3922 bool vector256 = false;
3923 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3924 %}
3925 ins_pipe( pipe_slow );
3926 %}
3927
3928 instruct vsub4F(vecX dst, vecX src) %{
3929 predicate(n->as_Vector()->length() == 4);
3930 match(Set dst (SubVF dst src));
3931 format %{ "subps $dst,$src\t! sub packed4F" %}
3932 ins_encode %{
3933 __ subps($dst$$XMMRegister, $src$$XMMRegister);
3934 %}
3935 ins_pipe( pipe_slow );
3936 %}
3937
3938 instruct vsub4F_reg(vecX dst, vecX src1, vecX src2) %{
3939 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3940 match(Set dst (SubVF src1 src2));
3941 format %{ "vsubps $dst,$src1,$src2\t! sub packed4F" %}
3942 ins_encode %{
3943 bool vector256 = false;
3944 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3945 %}
3946 ins_pipe( pipe_slow );
3947 %}
3948
3949 instruct vsub4F_mem(vecX dst, vecX src, memory mem) %{
3950 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3951 match(Set dst (SubVF src (LoadVector mem)));
3952 format %{ "vsubps $dst,$src,$mem\t! sub packed4F" %}
3953 ins_encode %{
3954 bool vector256 = false;
3955 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3956 %}
3957 ins_pipe( pipe_slow );
3958 %}
3959
3960 instruct vsub8F_reg(vecY dst, vecY src1, vecY src2) %{
3961 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3962 match(Set dst (SubVF src1 src2));
3963 format %{ "vsubps $dst,$src1,$src2\t! sub packed8F" %}
3964 ins_encode %{
3965 bool vector256 = true;
3966 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3967 %}
3968 ins_pipe( pipe_slow );
3969 %}
3970
3971 instruct vsub8F_mem(vecY dst, vecY src, memory mem) %{
3972 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3973 match(Set dst (SubVF src (LoadVector mem)));
3974 format %{ "vsubps $dst,$src,$mem\t! sub packed8F" %}
3975 ins_encode %{
3976 bool vector256 = true;
3977 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3978 %}
3979 ins_pipe( pipe_slow );
3980 %}
3981
3982 // Doubles vector sub
3983 instruct vsub2D(vecX dst, vecX src) %{
3984 predicate(n->as_Vector()->length() == 2);
3985 match(Set dst (SubVD dst src));
3986 format %{ "subpd $dst,$src\t! sub packed2D" %}
3987 ins_encode %{
3988 __ subpd($dst$$XMMRegister, $src$$XMMRegister);
3989 %}
3990 ins_pipe( pipe_slow );
3991 %}
3992
3993 instruct vsub2D_reg(vecX dst, vecX src1, vecX src2) %{
3994 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3995 match(Set dst (SubVD src1 src2));
3996 format %{ "vsubpd $dst,$src1,$src2\t! sub packed2D" %}
3997 ins_encode %{
3998 bool vector256 = false;
3999 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4000 %}
4001 ins_pipe( pipe_slow );
4002 %}
4003
4004 instruct vsub2D_mem(vecX dst, vecX src, memory mem) %{
4005 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4006 match(Set dst (SubVD src (LoadVector mem)));
4007 format %{ "vsubpd $dst,$src,$mem\t! sub packed2D" %}
4008 ins_encode %{
4009 bool vector256 = false;
4010 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
4011 %}
4012 ins_pipe( pipe_slow );
4013 %}
4014
4015 instruct vsub4D_reg(vecY dst, vecY src1, vecY src2) %{
4016 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4017 match(Set dst (SubVD src1 src2));
4018 format %{ "vsubpd $dst,$src1,$src2\t! sub packed4D" %}
4019 ins_encode %{
4020 bool vector256 = true;
4021 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4022 %}
4023 ins_pipe( pipe_slow );
4024 %}
4025
4026 instruct vsub4D_mem(vecY dst, vecY src, memory mem) %{
4027 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4028 match(Set dst (SubVD src (LoadVector mem)));
4029 format %{ "vsubpd $dst,$src,$mem\t! sub packed4D" %}
4030 ins_encode %{
4031 bool vector256 = true;
4032 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
4033 %}
4034 ins_pipe( pipe_slow );
4035 %}
4036
4037 // --------------------------------- MUL --------------------------------------
4038
4039 // Shorts/Chars vector mul
4040 instruct vmul2S(vecS dst, vecS src) %{
4041 predicate(n->as_Vector()->length() == 2);
4042 match(Set dst (MulVS dst src));
4043 format %{ "pmullw $dst,$src\t! mul packed2S" %}
4044 ins_encode %{
4045 __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
4046 %}
4047 ins_pipe( pipe_slow );
4048 %}
4049
4050 instruct vmul2S_reg(vecS dst, vecS src1, vecS src2) %{
4051 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4052 match(Set dst (MulVS src1 src2));
4053 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %}
4054 ins_encode %{
4055 bool vector256 = false;
4056 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4057 %}
4058 ins_pipe( pipe_slow );
4059 %}
4060
4061 instruct vmul4S(vecD dst, vecD src) %{
4062 predicate(n->as_Vector()->length() == 4);
4063 match(Set dst (MulVS dst src));
4064 format %{ "pmullw $dst,$src\t! mul packed4S" %}
4065 ins_encode %{
4066 __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
4067 %}
4068 ins_pipe( pipe_slow );
4069 %}
4070
4071 instruct vmul4S_reg(vecD dst, vecD src1, vecD src2) %{
4072 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4073 match(Set dst (MulVS src1 src2));
4074 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %}
4075 ins_encode %{
4076 bool vector256 = false;
4077 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4078 %}
4079 ins_pipe( pipe_slow );
4080 %}
4081
4082 instruct vmul8S(vecX dst, vecX src) %{
4083 predicate(n->as_Vector()->length() == 8);
4084 match(Set dst (MulVS dst src));
4085 format %{ "pmullw $dst,$src\t! mul packed8S" %}
4086 ins_encode %{
4087 __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
4088 %}
4089 ins_pipe( pipe_slow );
4090 %}
4091
4092 instruct vmul8S_reg(vecX dst, vecX src1, vecX src2) %{
4093 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
4094 match(Set dst (MulVS src1 src2));
4095 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %}
4096 ins_encode %{
4097 bool vector256 = false;
4098 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4099 %}
4100 ins_pipe( pipe_slow );
4101 %}
4102
4103 instruct vmul8S_mem(vecX dst, vecX src, memory mem) %{
4104 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
4105 match(Set dst (MulVS src (LoadVector mem)));
4106 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %}
4107 ins_encode %{
4108 bool vector256 = false;
4109 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
4110 %}
4111 ins_pipe( pipe_slow );
4112 %}
4113
4114 instruct vmul16S_reg(vecY dst, vecY src1, vecY src2) %{
4115 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
4116 match(Set dst (MulVS src1 src2));
4117 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %}
4118 ins_encode %{
4119 bool vector256 = true;
4120 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4121 %}
4122 ins_pipe( pipe_slow );
4123 %}
4124
4125 instruct vmul16S_mem(vecY dst, vecY src, memory mem) %{
4126 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
4127 match(Set dst (MulVS src (LoadVector mem)));
4128 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %}
4129 ins_encode %{
4130 bool vector256 = true;
4131 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
4132 %}
4133 ins_pipe( pipe_slow );
4134 %}
4135
4136 // Integers vector mul (sse4_1)
4137 instruct vmul2I(vecD dst, vecD src) %{
4138 predicate(UseSSE > 3 && n->as_Vector()->length() == 2);
4139 match(Set dst (MulVI dst src));
4140 format %{ "pmulld $dst,$src\t! mul packed2I" %}
4141 ins_encode %{
4142 __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
4143 %}
4144 ins_pipe( pipe_slow );
4145 %}
4146
4147 instruct vmul2I_reg(vecD dst, vecD src1, vecD src2) %{
4148 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4149 match(Set dst (MulVI src1 src2));
4150 format %{ "vpmulld $dst,$src1,$src2\t! mul packed2I" %}
4151 ins_encode %{
4152 bool vector256 = false;
4153 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4154 %}
4155 ins_pipe( pipe_slow );
4156 %}
4157
4158 instruct vmul4I(vecX dst, vecX src) %{
4159 predicate(UseSSE > 3 && n->as_Vector()->length() == 4);
4160 match(Set dst (MulVI dst src));
4161 format %{ "pmulld $dst,$src\t! mul packed4I" %}
4162 ins_encode %{
4163 __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
4164 %}
4165 ins_pipe( pipe_slow );
4166 %}
4167
4168 instruct vmul4I_reg(vecX dst, vecX src1, vecX src2) %{
4169 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4170 match(Set dst (MulVI src1 src2));
4171 format %{ "vpmulld $dst,$src1,$src2\t! mul packed4I" %}
4172 ins_encode %{
4173 bool vector256 = false;
4174 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4175 %}
4176 ins_pipe( pipe_slow );
4177 %}
4178
4179 instruct vmul4I_mem(vecX dst, vecX src, memory mem) %{
4180 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4181 match(Set dst (MulVI src (LoadVector mem)));
4182 format %{ "vpmulld $dst,$src,$mem\t! mul packed4I" %}
4183 ins_encode %{
4184 bool vector256 = false;
4185 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
4186 %}
4187 ins_pipe( pipe_slow );
4188 %}
4189
4190 instruct vmul8I_reg(vecY dst, vecY src1, vecY src2) %{
4191 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
4192 match(Set dst (MulVI src1 src2));
4193 format %{ "vpmulld $dst,$src1,$src2\t! mul packed8I" %}
4194 ins_encode %{
4195 bool vector256 = true;
4196 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4197 %}
4198 ins_pipe( pipe_slow );
4199 %}
4200
4201 instruct vmul8I_mem(vecY dst, vecY src, memory mem) %{
4202 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
4203 match(Set dst (MulVI src (LoadVector mem)));
4204 format %{ "vpmulld $dst,$src,$mem\t! mul packed8I" %}
4205 ins_encode %{
4206 bool vector256 = true;
4207 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
4208 %}
4209 ins_pipe( pipe_slow );
4210 %}
4211
4212 // Floats vector mul
4213 instruct vmul2F(vecD dst, vecD src) %{
4214 predicate(n->as_Vector()->length() == 2);
4215 match(Set dst (MulVF dst src));
4216 format %{ "mulps $dst,$src\t! mul packed2F" %}
4217 ins_encode %{
4218 __ mulps($dst$$XMMRegister, $src$$XMMRegister);
4219 %}
4220 ins_pipe( pipe_slow );
4221 %}
4222
4223 instruct vmul2F_reg(vecD dst, vecD src1, vecD src2) %{
4224 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4225 match(Set dst (MulVF src1 src2));
4226 format %{ "vmulps $dst,$src1,$src2\t! mul packed2F" %}
4227 ins_encode %{
4228 bool vector256 = false;
4229 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4230 %}
4231 ins_pipe( pipe_slow );
4232 %}
4233
4234 instruct vmul4F(vecX dst, vecX src) %{
4235 predicate(n->as_Vector()->length() == 4);
4236 match(Set dst (MulVF dst src));
4237 format %{ "mulps $dst,$src\t! mul packed4F" %}
4238 ins_encode %{
4239 __ mulps($dst$$XMMRegister, $src$$XMMRegister);
4240 %}
4241 ins_pipe( pipe_slow );
4242 %}
4243
4244 instruct vmul4F_reg(vecX dst, vecX src1, vecX src2) %{
4245 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4246 match(Set dst (MulVF src1 src2));
4247 format %{ "vmulps $dst,$src1,$src2\t! mul packed4F" %}
4248 ins_encode %{
4249 bool vector256 = false;
4250 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4251 %}
4252 ins_pipe( pipe_slow );
4253 %}
4254
4255 instruct vmul4F_mem(vecX dst, vecX src, memory mem) %{
4256 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4257 match(Set dst (MulVF src (LoadVector mem)));
4258 format %{ "vmulps $dst,$src,$mem\t! mul packed4F" %}
4259 ins_encode %{
4260 bool vector256 = false;
4261 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
4262 %}
4263 ins_pipe( pipe_slow );
4264 %}
4265
4266 instruct vmul8F_reg(vecY dst, vecY src1, vecY src2) %{
4267 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
4268 match(Set dst (MulVF src1 src2));
4269 format %{ "vmulps $dst,$src1,$src2\t! mul packed8F" %}
4270 ins_encode %{
4271 bool vector256 = true;
4272 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4273 %}
4274 ins_pipe( pipe_slow );
4275 %}
4276
4277 instruct vmul8F_mem(vecY dst, vecY src, memory mem) %{
4278 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
4279 match(Set dst (MulVF src (LoadVector mem)));
4280 format %{ "vmulps $dst,$src,$mem\t! mul packed8F" %}
4281 ins_encode %{
4282 bool vector256 = true;
4283 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
4284 %}
4285 ins_pipe( pipe_slow );
4286 %}
4287
4288 // Doubles vector mul
4289 instruct vmul2D(vecX dst, vecX src) %{
4290 predicate(n->as_Vector()->length() == 2);
4291 match(Set dst (MulVD dst src));
4292 format %{ "mulpd $dst,$src\t! mul packed2D" %}
4293 ins_encode %{
4294 __ mulpd($dst$$XMMRegister, $src$$XMMRegister);
4295 %}
4296 ins_pipe( pipe_slow );
4297 %}
4298
4299 instruct vmul2D_reg(vecX dst, vecX src1, vecX src2) %{
4300 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4301 match(Set dst (MulVD src1 src2));
4302 format %{ "vmulpd $dst,$src1,$src2\t! mul packed2D" %}
4303 ins_encode %{
4304 bool vector256 = false;
4305 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4306 %}
4307 ins_pipe( pipe_slow );
4308 %}
4309
4310 instruct vmul2D_mem(vecX dst, vecX src, memory mem) %{
4311 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4312 match(Set dst (MulVD src (LoadVector mem)));
4313 format %{ "vmulpd $dst,$src,$mem\t! mul packed2D" %}
4314 ins_encode %{
4315 bool vector256 = false;
4316 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
4317 %}
4318 ins_pipe( pipe_slow );
4319 %}
4320
4321 instruct vmul4D_reg(vecY dst, vecY src1, vecY src2) %{
4322 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4323 match(Set dst (MulVD src1 src2));
4324 format %{ "vmulpd $dst,$src1,$src2\t! mul packed4D" %}
4325 ins_encode %{
4326 bool vector256 = true;
4327 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4328 %}
4329 ins_pipe( pipe_slow );
4330 %}
4331
4332 instruct vmul4D_mem(vecY dst, vecY src, memory mem) %{
4333 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4334 match(Set dst (MulVD src (LoadVector mem)));
4335 format %{ "vmulpd $dst,$src,$mem\t! mul packed4D" %}
4336 ins_encode %{
4337 bool vector256 = true;
4338 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
4339 %}
4340 ins_pipe( pipe_slow );
4341 %}
4342
4343 // --------------------------------- DIV --------------------------------------
4344
4345 // Floats vector div
4346 instruct vdiv2F(vecD dst, vecD src) %{
4347 predicate(n->as_Vector()->length() == 2);
4348 match(Set dst (DivVF dst src));
4349 format %{ "divps $dst,$src\t! div packed2F" %}
4350 ins_encode %{
4351 __ divps($dst$$XMMRegister, $src$$XMMRegister);
4352 %}
4353 ins_pipe( pipe_slow );
4354 %}
4355
4356 instruct vdiv2F_reg(vecD dst, vecD src1, vecD src2) %{
4357 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4358 match(Set dst (DivVF src1 src2));
4359 format %{ "vdivps $dst,$src1,$src2\t! div packed2F" %}
4360 ins_encode %{
4361 bool vector256 = false;
4362 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4363 %}
4364 ins_pipe( pipe_slow );
4365 %}
4366
4367 instruct vdiv4F(vecX dst, vecX src) %{
4368 predicate(n->as_Vector()->length() == 4);
4369 match(Set dst (DivVF dst src));
4370 format %{ "divps $dst,$src\t! div packed4F" %}
4371 ins_encode %{
4372 __ divps($dst$$XMMRegister, $src$$XMMRegister);
4373 %}
4374 ins_pipe( pipe_slow );
4375 %}
4376
4377 instruct vdiv4F_reg(vecX dst, vecX src1, vecX src2) %{
4378 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4379 match(Set dst (DivVF src1 src2));
4380 format %{ "vdivps $dst,$src1,$src2\t! div packed4F" %}
4381 ins_encode %{
4382 bool vector256 = false;
4383 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4384 %}
4385 ins_pipe( pipe_slow );
4386 %}
4387
4388 instruct vdiv4F_mem(vecX dst, vecX src, memory mem) %{
4389 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4390 match(Set dst (DivVF src (LoadVector mem)));
4391 format %{ "vdivps $dst,$src,$mem\t! div packed4F" %}
4392 ins_encode %{
4393 bool vector256 = false;
4394 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
4395 %}
4396 ins_pipe( pipe_slow );
4397 %}
4398
4399 instruct vdiv8F_reg(vecY dst, vecY src1, vecY src2) %{
4400 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
4401 match(Set dst (DivVF src1 src2));
4402 format %{ "vdivps $dst,$src1,$src2\t! div packed8F" %}
4403 ins_encode %{
4404 bool vector256 = true;
4405 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4406 %}
4407 ins_pipe( pipe_slow );
4408 %}
4409
4410 instruct vdiv8F_mem(vecY dst, vecY src, memory mem) %{
4411 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
4412 match(Set dst (DivVF src (LoadVector mem)));
4413 format %{ "vdivps $dst,$src,$mem\t! div packed8F" %}
4414 ins_encode %{
4415 bool vector256 = true;
4416 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
4417 %}
4418 ins_pipe( pipe_slow );
4419 %}
4420
4421 // Doubles vector div
4422 instruct vdiv2D(vecX dst, vecX src) %{
4423 predicate(n->as_Vector()->length() == 2);
4424 match(Set dst (DivVD dst src));
4425 format %{ "divpd $dst,$src\t! div packed2D" %}
4426 ins_encode %{
4427 __ divpd($dst$$XMMRegister, $src$$XMMRegister);
4428 %}
4429 ins_pipe( pipe_slow );
4430 %}
4431
4432 instruct vdiv2D_reg(vecX dst, vecX src1, vecX src2) %{
4433 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4434 match(Set dst (DivVD src1 src2));
4435 format %{ "vdivpd $dst,$src1,$src2\t! div packed2D" %}
4436 ins_encode %{
4437 bool vector256 = false;
4438 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4439 %}
4440 ins_pipe( pipe_slow );
4441 %}
4442
4443 instruct vdiv2D_mem(vecX dst, vecX src, memory mem) %{
4444 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4445 match(Set dst (DivVD src (LoadVector mem)));
4446 format %{ "vdivpd $dst,$src,$mem\t! div packed2D" %}
4447 ins_encode %{
4448 bool vector256 = false;
4449 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
4450 %}
4451 ins_pipe( pipe_slow );
4452 %}
4453
4454 instruct vdiv4D_reg(vecY dst, vecY src1, vecY src2) %{
4455 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4456 match(Set dst (DivVD src1 src2));
4457 format %{ "vdivpd $dst,$src1,$src2\t! div packed4D" %}
4458 ins_encode %{
4459 bool vector256 = true;
4460 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4461 %}
4462 ins_pipe( pipe_slow );
4463 %}
4464
4465 instruct vdiv4D_mem(vecY dst, vecY src, memory mem) %{
4466 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4467 match(Set dst (DivVD src (LoadVector mem)));
4468 format %{ "vdivpd $dst,$src,$mem\t! div packed4D" %}
4469 ins_encode %{
4470 bool vector256 = true;
4471 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
4472 %}
4473 ins_pipe( pipe_slow );
4474 %}
4475
4476 // ------------------------------ Shift ---------------------------------------
4477
4478 // Left and right shift count vectors are the same on x86
4479 // (only lowest bits of xmm reg are used for count).
4480 instruct vshiftcnt(vecS dst, rRegI cnt) %{
4481 match(Set dst (LShiftCntV cnt));
4482 match(Set dst (RShiftCntV cnt));
4483 format %{ "movd $dst,$cnt\t! load shift count" %}
4484 ins_encode %{
4485 __ movdl($dst$$XMMRegister, $cnt$$Register);
4486 %}
4487 ins_pipe( pipe_slow );
4488 %}
4489
4490 // ------------------------------ LeftShift -----------------------------------
4491
4498 __ psllw($dst$$XMMRegister, $shift$$XMMRegister);
4499 %}
4500 ins_pipe( pipe_slow );
4501 %}
4502
4503 instruct vsll2S_imm(vecS dst, immI8 shift) %{
4504 predicate(n->as_Vector()->length() == 2);
4505 match(Set dst (LShiftVS dst shift));
4506 format %{ "psllw $dst,$shift\t! left shift packed2S" %}
4507 ins_encode %{
4508 __ psllw($dst$$XMMRegister, (int)$shift$$constant);
4509 %}
4510 ins_pipe( pipe_slow );
4511 %}
4512
4513 instruct vsll2S_reg(vecS dst, vecS src, vecS shift) %{
4514 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4515 match(Set dst (LShiftVS src shift));
4516 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %}
4517 ins_encode %{
4518 bool vector256 = false;
4519 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4520 %}
4521 ins_pipe( pipe_slow );
4522 %}
4523
4524 instruct vsll2S_reg_imm(vecS dst, vecS src, immI8 shift) %{
4525 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4526 match(Set dst (LShiftVS src shift));
4527 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %}
4528 ins_encode %{
4529 bool vector256 = false;
4530 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4531 %}
4532 ins_pipe( pipe_slow );
4533 %}
4534
4535 instruct vsll4S(vecD dst, vecS shift) %{
4536 predicate(n->as_Vector()->length() == 4);
4537 match(Set dst (LShiftVS dst shift));
4538 format %{ "psllw $dst,$shift\t! left shift packed4S" %}
4539 ins_encode %{
4540 __ psllw($dst$$XMMRegister, $shift$$XMMRegister);
4541 %}
4542 ins_pipe( pipe_slow );
4543 %}
4544
4545 instruct vsll4S_imm(vecD dst, immI8 shift) %{
4546 predicate(n->as_Vector()->length() == 4);
4547 match(Set dst (LShiftVS dst shift));
4548 format %{ "psllw $dst,$shift\t! left shift packed4S" %}
4549 ins_encode %{
4550 __ psllw($dst$$XMMRegister, (int)$shift$$constant);
4551 %}
4552 ins_pipe( pipe_slow );
4553 %}
4554
4555 instruct vsll4S_reg(vecD dst, vecD src, vecS shift) %{
4556 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4557 match(Set dst (LShiftVS src shift));
4558 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %}
4559 ins_encode %{
4560 bool vector256 = false;
4561 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4562 %}
4563 ins_pipe( pipe_slow );
4564 %}
4565
4566 instruct vsll4S_reg_imm(vecD dst, vecD src, immI8 shift) %{
4567 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4568 match(Set dst (LShiftVS src shift));
4569 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %}
4570 ins_encode %{
4571 bool vector256 = false;
4572 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4573 %}
4574 ins_pipe( pipe_slow );
4575 %}
4576
4577 instruct vsll8S(vecX dst, vecS shift) %{
4578 predicate(n->as_Vector()->length() == 8);
4579 match(Set dst (LShiftVS dst shift));
4580 format %{ "psllw $dst,$shift\t! left shift packed8S" %}
4581 ins_encode %{
4582 __ psllw($dst$$XMMRegister, $shift$$XMMRegister);
4583 %}
4584 ins_pipe( pipe_slow );
4585 %}
4586
4587 instruct vsll8S_imm(vecX dst, immI8 shift) %{
4588 predicate(n->as_Vector()->length() == 8);
4589 match(Set dst (LShiftVS dst shift));
4590 format %{ "psllw $dst,$shift\t! left shift packed8S" %}
4591 ins_encode %{
4592 __ psllw($dst$$XMMRegister, (int)$shift$$constant);
4593 %}
4594 ins_pipe( pipe_slow );
4595 %}
4596
4597 instruct vsll8S_reg(vecX dst, vecX src, vecS shift) %{
4598 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
4599 match(Set dst (LShiftVS src shift));
4600 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %}
4601 ins_encode %{
4602 bool vector256 = false;
4603 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4604 %}
4605 ins_pipe( pipe_slow );
4606 %}
4607
4608 instruct vsll8S_reg_imm(vecX dst, vecX src, immI8 shift) %{
4609 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
4610 match(Set dst (LShiftVS src shift));
4611 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %}
4612 ins_encode %{
4613 bool vector256 = false;
4614 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4615 %}
4616 ins_pipe( pipe_slow );
4617 %}
4618
4619 instruct vsll16S_reg(vecY dst, vecY src, vecS shift) %{
4620 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
4621 match(Set dst (LShiftVS src shift));
4622 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %}
4623 ins_encode %{
4624 bool vector256 = true;
4625 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4626 %}
4627 ins_pipe( pipe_slow );
4628 %}
4629
4630 instruct vsll16S_reg_imm(vecY dst, vecY src, immI8 shift) %{
4631 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
4632 match(Set dst (LShiftVS src shift));
4633 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %}
4634 ins_encode %{
4635 bool vector256 = true;
4636 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4637 %}
4638 ins_pipe( pipe_slow );
4639 %}
4640
4641 // Integers vector left shift
4642 instruct vsll2I(vecD dst, vecS shift) %{
4643 predicate(n->as_Vector()->length() == 2);
4644 match(Set dst (LShiftVI dst shift));
4645 format %{ "pslld $dst,$shift\t! left shift packed2I" %}
4646 ins_encode %{
4647 __ pslld($dst$$XMMRegister, $shift$$XMMRegister);
4648 %}
4649 ins_pipe( pipe_slow );
4650 %}
4651
4652 instruct vsll2I_imm(vecD dst, immI8 shift) %{
4653 predicate(n->as_Vector()->length() == 2);
4654 match(Set dst (LShiftVI dst shift));
4655 format %{ "pslld $dst,$shift\t! left shift packed2I" %}
4656 ins_encode %{
4657 __ pslld($dst$$XMMRegister, (int)$shift$$constant);
4658 %}
4659 ins_pipe( pipe_slow );
4660 %}
4661
4662 instruct vsll2I_reg(vecD dst, vecD src, vecS shift) %{
4663 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4664 match(Set dst (LShiftVI src shift));
4665 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %}
4666 ins_encode %{
4667 bool vector256 = false;
4668 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4669 %}
4670 ins_pipe( pipe_slow );
4671 %}
4672
4673 instruct vsll2I_reg_imm(vecD dst, vecD src, immI8 shift) %{
4674 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4675 match(Set dst (LShiftVI src shift));
4676 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %}
4677 ins_encode %{
4678 bool vector256 = false;
4679 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4680 %}
4681 ins_pipe( pipe_slow );
4682 %}
4683
4684 instruct vsll4I(vecX dst, vecS shift) %{
4685 predicate(n->as_Vector()->length() == 4);
4686 match(Set dst (LShiftVI dst shift));
4687 format %{ "pslld $dst,$shift\t! left shift packed4I" %}
4688 ins_encode %{
4689 __ pslld($dst$$XMMRegister, $shift$$XMMRegister);
4690 %}
4691 ins_pipe( pipe_slow );
4692 %}
4693
4694 instruct vsll4I_imm(vecX dst, immI8 shift) %{
4695 predicate(n->as_Vector()->length() == 4);
4696 match(Set dst (LShiftVI dst shift));
4697 format %{ "pslld $dst,$shift\t! left shift packed4I" %}
4698 ins_encode %{
4699 __ pslld($dst$$XMMRegister, (int)$shift$$constant);
4700 %}
4701 ins_pipe( pipe_slow );
4702 %}
4703
4704 instruct vsll4I_reg(vecX dst, vecX src, vecS shift) %{
4705 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4706 match(Set dst (LShiftVI src shift));
4707 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %}
4708 ins_encode %{
4709 bool vector256 = false;
4710 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4711 %}
4712 ins_pipe( pipe_slow );
4713 %}
4714
4715 instruct vsll4I_reg_imm(vecX dst, vecX src, immI8 shift) %{
4716 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4717 match(Set dst (LShiftVI src shift));
4718 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %}
4719 ins_encode %{
4720 bool vector256 = false;
4721 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4722 %}
4723 ins_pipe( pipe_slow );
4724 %}
4725
4726 instruct vsll8I_reg(vecY dst, vecY src, vecS shift) %{
4727 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
4728 match(Set dst (LShiftVI src shift));
4729 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %}
4730 ins_encode %{
4731 bool vector256 = true;
4732 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4733 %}
4734 ins_pipe( pipe_slow );
4735 %}
4736
4737 instruct vsll8I_reg_imm(vecY dst, vecY src, immI8 shift) %{
4738 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
4739 match(Set dst (LShiftVI src shift));
4740 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %}
4741 ins_encode %{
4742 bool vector256 = true;
4743 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4744 %}
4745 ins_pipe( pipe_slow );
4746 %}
4747
4748 // Longs vector left shift
4749 instruct vsll2L(vecX dst, vecS shift) %{
4750 predicate(n->as_Vector()->length() == 2);
4751 match(Set dst (LShiftVL dst shift));
4752 format %{ "psllq $dst,$shift\t! left shift packed2L" %}
4753 ins_encode %{
4754 __ psllq($dst$$XMMRegister, $shift$$XMMRegister);
4755 %}
4756 ins_pipe( pipe_slow );
4757 %}
4758
4759 instruct vsll2L_imm(vecX dst, immI8 shift) %{
4760 predicate(n->as_Vector()->length() == 2);
4761 match(Set dst (LShiftVL dst shift));
4762 format %{ "psllq $dst,$shift\t! left shift packed2L" %}
4763 ins_encode %{
4764 __ psllq($dst$$XMMRegister, (int)$shift$$constant);
4765 %}
4766 ins_pipe( pipe_slow );
4767 %}
4768
4769 instruct vsll2L_reg(vecX dst, vecX src, vecS shift) %{
4770 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4771 match(Set dst (LShiftVL src shift));
4772 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %}
4773 ins_encode %{
4774 bool vector256 = false;
4775 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4776 %}
4777 ins_pipe( pipe_slow );
4778 %}
4779
4780 instruct vsll2L_reg_imm(vecX dst, vecX src, immI8 shift) %{
4781 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4782 match(Set dst (LShiftVL src shift));
4783 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %}
4784 ins_encode %{
4785 bool vector256 = false;
4786 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4787 %}
4788 ins_pipe( pipe_slow );
4789 %}
4790
4791 instruct vsll4L_reg(vecY dst, vecY src, vecS shift) %{
4792 predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
4793 match(Set dst (LShiftVL src shift));
4794 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %}
4795 ins_encode %{
4796 bool vector256 = true;
4797 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4798 %}
4799 ins_pipe( pipe_slow );
4800 %}
4801
4802 instruct vsll4L_reg_imm(vecY dst, vecY src, immI8 shift) %{
4803 predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
4804 match(Set dst (LShiftVL src shift));
4805 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %}
4806 ins_encode %{
4807 bool vector256 = true;
4808 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4809 %}
4810 ins_pipe( pipe_slow );
4811 %}
4812
4813 // ----------------------- LogicalRightShift -----------------------------------
4814
4815 // Shorts vector logical right shift produces incorrect Java result
4816 // for negative data because java code convert short value into int with
4817 // sign extension before a shift. But char vectors are fine since chars are
4818 // unsigned values.
4819
4820 instruct vsrl2S(vecS dst, vecS shift) %{
4821 predicate(n->as_Vector()->length() == 2);
4822 match(Set dst (URShiftVS dst shift));
4823 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %}
4824 ins_encode %{
4825 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister);
4826 %}
4827 ins_pipe( pipe_slow );
4828 %}
4829
4830 instruct vsrl2S_imm(vecS dst, immI8 shift) %{
4831 predicate(n->as_Vector()->length() == 2);
4832 match(Set dst (URShiftVS dst shift));
4833 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %}
4834 ins_encode %{
4835 __ psrlw($dst$$XMMRegister, (int)$shift$$constant);
4836 %}
4837 ins_pipe( pipe_slow );
4838 %}
4839
4840 instruct vsrl2S_reg(vecS dst, vecS src, vecS shift) %{
4841 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4842 match(Set dst (URShiftVS src shift));
4843 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %}
4844 ins_encode %{
4845 bool vector256 = false;
4846 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4847 %}
4848 ins_pipe( pipe_slow );
4849 %}
4850
4851 instruct vsrl2S_reg_imm(vecS dst, vecS src, immI8 shift) %{
4852 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4853 match(Set dst (URShiftVS src shift));
4854 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %}
4855 ins_encode %{
4856 bool vector256 = false;
4857 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4858 %}
4859 ins_pipe( pipe_slow );
4860 %}
4861
4862 instruct vsrl4S(vecD dst, vecS shift) %{
4863 predicate(n->as_Vector()->length() == 4);
4864 match(Set dst (URShiftVS dst shift));
4865 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %}
4866 ins_encode %{
4867 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister);
4868 %}
4869 ins_pipe( pipe_slow );
4870 %}
4871
4872 instruct vsrl4S_imm(vecD dst, immI8 shift) %{
4873 predicate(n->as_Vector()->length() == 4);
4874 match(Set dst (URShiftVS dst shift));
4875 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %}
4876 ins_encode %{
4877 __ psrlw($dst$$XMMRegister, (int)$shift$$constant);
4878 %}
4879 ins_pipe( pipe_slow );
4880 %}
4881
4882 instruct vsrl4S_reg(vecD dst, vecD src, vecS shift) %{
4883 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4884 match(Set dst (URShiftVS src shift));
4885 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %}
4886 ins_encode %{
4887 bool vector256 = false;
4888 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4889 %}
4890 ins_pipe( pipe_slow );
4891 %}
4892
4893 instruct vsrl4S_reg_imm(vecD dst, vecD src, immI8 shift) %{
4894 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4895 match(Set dst (URShiftVS src shift));
4896 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %}
4897 ins_encode %{
4898 bool vector256 = false;
4899 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4900 %}
4901 ins_pipe( pipe_slow );
4902 %}
4903
4904 instruct vsrl8S(vecX dst, vecS shift) %{
4905 predicate(n->as_Vector()->length() == 8);
4906 match(Set dst (URShiftVS dst shift));
4907 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %}
4908 ins_encode %{
4909 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister);
4910 %}
4911 ins_pipe( pipe_slow );
4912 %}
4913
4914 instruct vsrl8S_imm(vecX dst, immI8 shift) %{
4915 predicate(n->as_Vector()->length() == 8);
4916 match(Set dst (URShiftVS dst shift));
4917 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %}
4918 ins_encode %{
4919 __ psrlw($dst$$XMMRegister, (int)$shift$$constant);
4920 %}
4921 ins_pipe( pipe_slow );
4922 %}
4923
4924 instruct vsrl8S_reg(vecX dst, vecX src, vecS shift) %{
4925 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
4926 match(Set dst (URShiftVS src shift));
4927 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %}
4928 ins_encode %{
4929 bool vector256 = false;
4930 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4931 %}
4932 ins_pipe( pipe_slow );
4933 %}
4934
4935 instruct vsrl8S_reg_imm(vecX dst, vecX src, immI8 shift) %{
4936 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
4937 match(Set dst (URShiftVS src shift));
4938 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %}
4939 ins_encode %{
4940 bool vector256 = false;
4941 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4942 %}
4943 ins_pipe( pipe_slow );
4944 %}
4945
4946 instruct vsrl16S_reg(vecY dst, vecY src, vecS shift) %{
4947 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
4948 match(Set dst (URShiftVS src shift));
4949 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %}
4950 ins_encode %{
4951 bool vector256 = true;
4952 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4953 %}
4954 ins_pipe( pipe_slow );
4955 %}
4956
4957 instruct vsrl16S_reg_imm(vecY dst, vecY src, immI8 shift) %{
4958 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
4959 match(Set dst (URShiftVS src shift));
4960 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %}
4961 ins_encode %{
4962 bool vector256 = true;
4963 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4964 %}
4965 ins_pipe( pipe_slow );
4966 %}
4967
4968 // Integers vector logical right shift
4969 instruct vsrl2I(vecD dst, vecS shift) %{
4970 predicate(n->as_Vector()->length() == 2);
4971 match(Set dst (URShiftVI dst shift));
4972 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %}
4973 ins_encode %{
4974 __ psrld($dst$$XMMRegister, $shift$$XMMRegister);
4975 %}
4976 ins_pipe( pipe_slow );
4977 %}
4978
4979 instruct vsrl2I_imm(vecD dst, immI8 shift) %{
4980 predicate(n->as_Vector()->length() == 2);
4981 match(Set dst (URShiftVI dst shift));
4982 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %}
4983 ins_encode %{
4984 __ psrld($dst$$XMMRegister, (int)$shift$$constant);
4985 %}
4986 ins_pipe( pipe_slow );
4987 %}
4988
4989 instruct vsrl2I_reg(vecD dst, vecD src, vecS shift) %{
4990 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4991 match(Set dst (URShiftVI src shift));
4992 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %}
4993 ins_encode %{
4994 bool vector256 = false;
4995 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4996 %}
4997 ins_pipe( pipe_slow );
4998 %}
4999
5000 instruct vsrl2I_reg_imm(vecD dst, vecD src, immI8 shift) %{
5001 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
5002 match(Set dst (URShiftVI src shift));
5003 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %}
5004 ins_encode %{
5005 bool vector256 = false;
5006 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
5007 %}
5008 ins_pipe( pipe_slow );
5009 %}
5010
5011 instruct vsrl4I(vecX dst, vecS shift) %{
5012 predicate(n->as_Vector()->length() == 4);
5013 match(Set dst (URShiftVI dst shift));
5014 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %}
5015 ins_encode %{
5016 __ psrld($dst$$XMMRegister, $shift$$XMMRegister);
5017 %}
5018 ins_pipe( pipe_slow );
5019 %}
5020
5021 instruct vsrl4I_imm(vecX dst, immI8 shift) %{
5022 predicate(n->as_Vector()->length() == 4);
5023 match(Set dst (URShiftVI dst shift));
5024 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %}
5025 ins_encode %{
5026 __ psrld($dst$$XMMRegister, (int)$shift$$constant);
5027 %}
5028 ins_pipe( pipe_slow );
5029 %}
5030
5031 instruct vsrl4I_reg(vecX dst, vecX src, vecS shift) %{
5032 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
5033 match(Set dst (URShiftVI src shift));
5034 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %}
5035 ins_encode %{
5036 bool vector256 = false;
5037 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
5038 %}
5039 ins_pipe( pipe_slow );
5040 %}
5041
5042 instruct vsrl4I_reg_imm(vecX dst, vecX src, immI8 shift) %{
5043 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
5044 match(Set dst (URShiftVI src shift));
5045 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %}
5046 ins_encode %{
5047 bool vector256 = false;
5048 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
5049 %}
5050 ins_pipe( pipe_slow );
5051 %}
5052
5053 instruct vsrl8I_reg(vecY dst, vecY src, vecS shift) %{
5054 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
5055 match(Set dst (URShiftVI src shift));
5056 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %}
5057 ins_encode %{
5058 bool vector256 = true;
5059 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
5060 %}
5061 ins_pipe( pipe_slow );
5062 %}
5063
5064 instruct vsrl8I_reg_imm(vecY dst, vecY src, immI8 shift) %{
5065 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
5066 match(Set dst (URShiftVI src shift));
5067 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %}
5068 ins_encode %{
5069 bool vector256 = true;
5070 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
5071 %}
5072 ins_pipe( pipe_slow );
5073 %}
5074
5075 // Longs vector logical right shift
5076 instruct vsrl2L(vecX dst, vecS shift) %{
5077 predicate(n->as_Vector()->length() == 2);
5078 match(Set dst (URShiftVL dst shift));
5079 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %}
5080 ins_encode %{
5081 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister);
5082 %}
5083 ins_pipe( pipe_slow );
5084 %}
5085
5086 instruct vsrl2L_imm(vecX dst, immI8 shift) %{
5087 predicate(n->as_Vector()->length() == 2);
5088 match(Set dst (URShiftVL dst shift));
5089 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %}
5090 ins_encode %{
5091 __ psrlq($dst$$XMMRegister, (int)$shift$$constant);
5092 %}
5093 ins_pipe( pipe_slow );
5094 %}
5095
5096 instruct vsrl2L_reg(vecX dst, vecX src, vecS shift) %{
5097 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
5098 match(Set dst (URShiftVL src shift));
5099 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %}
5100 ins_encode %{
5101 bool vector256 = false;
5102 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
5103 %}
5104 ins_pipe( pipe_slow );
5105 %}
5106
5107 instruct vsrl2L_reg_imm(vecX dst, vecX src, immI8 shift) %{
5108 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
5109 match(Set dst (URShiftVL src shift));
5110 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %}
5111 ins_encode %{
5112 bool vector256 = false;
5113 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
5114 %}
5115 ins_pipe( pipe_slow );
5116 %}
5117
5118 instruct vsrl4L_reg(vecY dst, vecY src, vecS shift) %{
5119 predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
5120 match(Set dst (URShiftVL src shift));
5121 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %}
5122 ins_encode %{
5123 bool vector256 = true;
5124 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
5125 %}
5126 ins_pipe( pipe_slow );
5127 %}
5128
5129 instruct vsrl4L_reg_imm(vecY dst, vecY src, immI8 shift) %{
5130 predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
5131 match(Set dst (URShiftVL src shift));
5132 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %}
5133 ins_encode %{
5134 bool vector256 = true;
5135 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
5136 %}
5137 ins_pipe( pipe_slow );
5138 %}
5139
5140 // ------------------- ArithmeticRightShift -----------------------------------
5141
5142 // Shorts/Chars vector arithmetic right shift
5143 instruct vsra2S(vecS dst, vecS shift) %{
5144 predicate(n->as_Vector()->length() == 2);
5145 match(Set dst (RShiftVS dst shift));
5146 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %}
5147 ins_encode %{
5148 __ psraw($dst$$XMMRegister, $shift$$XMMRegister);
5149 %}
5150 ins_pipe( pipe_slow );
5151 %}
5152
5153 instruct vsra2S_imm(vecS dst, immI8 shift) %{
5154 predicate(n->as_Vector()->length() == 2);
5155 match(Set dst (RShiftVS dst shift));
5156 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %}
5157 ins_encode %{
5158 __ psraw($dst$$XMMRegister, (int)$shift$$constant);
5159 %}
5160 ins_pipe( pipe_slow );
5161 %}
5162
5163 instruct vsra2S_reg(vecS dst, vecS src, vecS shift) %{
5164 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
5165 match(Set dst (RShiftVS src shift));
5166 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %}
5167 ins_encode %{
5168 bool vector256 = false;
5169 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
5170 %}
5171 ins_pipe( pipe_slow );
5172 %}
5173
5174 instruct vsra2S_reg_imm(vecS dst, vecS src, immI8 shift) %{
5175 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
5176 match(Set dst (RShiftVS src shift));
5177 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %}
5178 ins_encode %{
5179 bool vector256 = false;
5180 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
5181 %}
5182 ins_pipe( pipe_slow );
5183 %}
5184
5185 instruct vsra4S(vecD dst, vecS shift) %{
5186 predicate(n->as_Vector()->length() == 4);
5187 match(Set dst (RShiftVS dst shift));
5188 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %}
5189 ins_encode %{
5190 __ psraw($dst$$XMMRegister, $shift$$XMMRegister);
5191 %}
5192 ins_pipe( pipe_slow );
5193 %}
5194
5195 instruct vsra4S_imm(vecD dst, immI8 shift) %{
5196 predicate(n->as_Vector()->length() == 4);
5197 match(Set dst (RShiftVS dst shift));
5198 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %}
5199 ins_encode %{
5200 __ psraw($dst$$XMMRegister, (int)$shift$$constant);
5201 %}
5202 ins_pipe( pipe_slow );
5203 %}
5204
5205 instruct vsra4S_reg(vecD dst, vecD src, vecS shift) %{
5206 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
5207 match(Set dst (RShiftVS src shift));
5208 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %}
5209 ins_encode %{
5210 bool vector256 = false;
5211 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
5212 %}
5213 ins_pipe( pipe_slow );
5214 %}
5215
5216 instruct vsra4S_reg_imm(vecD dst, vecD src, immI8 shift) %{
5217 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
5218 match(Set dst (RShiftVS src shift));
5219 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %}
5220 ins_encode %{
5221 bool vector256 = false;
5222 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
5223 %}
5224 ins_pipe( pipe_slow );
5225 %}
5226
5227 instruct vsra8S(vecX dst, vecS shift) %{
5228 predicate(n->as_Vector()->length() == 8);
5229 match(Set dst (RShiftVS dst shift));
5230 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %}
5231 ins_encode %{
5232 __ psraw($dst$$XMMRegister, $shift$$XMMRegister);
5233 %}
5234 ins_pipe( pipe_slow );
5235 %}
5236
5237 instruct vsra8S_imm(vecX dst, immI8 shift) %{
5238 predicate(n->as_Vector()->length() == 8);
5239 match(Set dst (RShiftVS dst shift));
5240 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %}
5241 ins_encode %{
5242 __ psraw($dst$$XMMRegister, (int)$shift$$constant);
5243 %}
5244 ins_pipe( pipe_slow );
5245 %}
5246
5247 instruct vsra8S_reg(vecX dst, vecX src, vecS shift) %{
5248 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
5249 match(Set dst (RShiftVS src shift));
5250 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %}
5251 ins_encode %{
5252 bool vector256 = false;
5253 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
5254 %}
5255 ins_pipe( pipe_slow );
5256 %}
5257
5258 instruct vsra8S_reg_imm(vecX dst, vecX src, immI8 shift) %{
5259 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
5260 match(Set dst (RShiftVS src shift));
5261 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %}
5262 ins_encode %{
5263 bool vector256 = false;
5264 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
5265 %}
5266 ins_pipe( pipe_slow );
5267 %}
5268
5269 instruct vsra16S_reg(vecY dst, vecY src, vecS shift) %{
5270 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
5271 match(Set dst (RShiftVS src shift));
5272 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %}
5273 ins_encode %{
5274 bool vector256 = true;
5275 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
5276 %}
5277 ins_pipe( pipe_slow );
5278 %}
5279
5280 instruct vsra16S_reg_imm(vecY dst, vecY src, immI8 shift) %{
5281 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
5282 match(Set dst (RShiftVS src shift));
5283 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %}
5284 ins_encode %{
5285 bool vector256 = true;
5286 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
5287 %}
5288 ins_pipe( pipe_slow );
5289 %}
5290
5291 // Integers vector arithmetic right shift
5292 instruct vsra2I(vecD dst, vecS shift) %{
5293 predicate(n->as_Vector()->length() == 2);
5294 match(Set dst (RShiftVI dst shift));
5295 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %}
5296 ins_encode %{
5297 __ psrad($dst$$XMMRegister, $shift$$XMMRegister);
5298 %}
5299 ins_pipe( pipe_slow );
5300 %}
5301
5302 instruct vsra2I_imm(vecD dst, immI8 shift) %{
5303 predicate(n->as_Vector()->length() == 2);
5304 match(Set dst (RShiftVI dst shift));
5305 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %}
5306 ins_encode %{
5307 __ psrad($dst$$XMMRegister, (int)$shift$$constant);
5308 %}
5309 ins_pipe( pipe_slow );
5310 %}
5311
5312 instruct vsra2I_reg(vecD dst, vecD src, vecS shift) %{
5313 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
5314 match(Set dst (RShiftVI src shift));
5315 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %}
5316 ins_encode %{
5317 bool vector256 = false;
5318 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
5319 %}
5320 ins_pipe( pipe_slow );
5321 %}
5322
5323 instruct vsra2I_reg_imm(vecD dst, vecD src, immI8 shift) %{
5324 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
5325 match(Set dst (RShiftVI src shift));
5326 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %}
5327 ins_encode %{
5328 bool vector256 = false;
5329 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
5330 %}
5331 ins_pipe( pipe_slow );
5332 %}
5333
5334 instruct vsra4I(vecX dst, vecS shift) %{
5335 predicate(n->as_Vector()->length() == 4);
5336 match(Set dst (RShiftVI dst shift));
5337 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %}
5338 ins_encode %{
5339 __ psrad($dst$$XMMRegister, $shift$$XMMRegister);
5340 %}
5341 ins_pipe( pipe_slow );
5342 %}
5343
5344 instruct vsra4I_imm(vecX dst, immI8 shift) %{
5345 predicate(n->as_Vector()->length() == 4);
5346 match(Set dst (RShiftVI dst shift));
5347 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %}
5348 ins_encode %{
5349 __ psrad($dst$$XMMRegister, (int)$shift$$constant);
5350 %}
5351 ins_pipe( pipe_slow );
5352 %}
5353
5354 instruct vsra4I_reg(vecX dst, vecX src, vecS shift) %{
5355 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
5356 match(Set dst (RShiftVI src shift));
5357 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %}
5358 ins_encode %{
5359 bool vector256 = false;
5360 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
5361 %}
5362 ins_pipe( pipe_slow );
5363 %}
5364
5365 instruct vsra4I_reg_imm(vecX dst, vecX src, immI8 shift) %{
5366 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
5367 match(Set dst (RShiftVI src shift));
5368 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %}
5369 ins_encode %{
5370 bool vector256 = false;
5371 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
5372 %}
5373 ins_pipe( pipe_slow );
5374 %}
5375
5376 instruct vsra8I_reg(vecY dst, vecY src, vecS shift) %{
5377 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
5378 match(Set dst (RShiftVI src shift));
5379 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %}
5380 ins_encode %{
5381 bool vector256 = true;
5382 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
5383 %}
5384 ins_pipe( pipe_slow );
5385 %}
5386
5387 instruct vsra8I_reg_imm(vecY dst, vecY src, immI8 shift) %{
5388 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
5389 match(Set dst (RShiftVI src shift));
5390 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %}
5391 ins_encode %{
5392 bool vector256 = true;
5393 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
5394 %}
5395 ins_pipe( pipe_slow );
5396 %}
5397
5398 // There are no longs vector arithmetic right shift instructions.
5399
5400
5401 // --------------------------------- AND --------------------------------------
5402
5403 instruct vand4B(vecS dst, vecS src) %{
5404 predicate(n->as_Vector()->length_in_bytes() == 4);
5405 match(Set dst (AndV dst src));
5406 format %{ "pand $dst,$src\t! and vectors (4 bytes)" %}
5407 ins_encode %{
5408 __ pand($dst$$XMMRegister, $src$$XMMRegister);
5409 %}
5410 ins_pipe( pipe_slow );
5411 %}
5412
5413 instruct vand4B_reg(vecS dst, vecS src1, vecS src2) %{
5414 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4);
5415 match(Set dst (AndV src1 src2));
5416 format %{ "vpand $dst,$src1,$src2\t! and vectors (4 bytes)" %}
5417 ins_encode %{
5418 bool vector256 = false;
5419 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
5420 %}
5421 ins_pipe( pipe_slow );
5422 %}
5423
5424 instruct vand8B(vecD dst, vecD src) %{
5425 predicate(n->as_Vector()->length_in_bytes() == 8);
5426 match(Set dst (AndV dst src));
5427 format %{ "pand $dst,$src\t! and vectors (8 bytes)" %}
5428 ins_encode %{
5429 __ pand($dst$$XMMRegister, $src$$XMMRegister);
5430 %}
5431 ins_pipe( pipe_slow );
5432 %}
5433
5434 instruct vand8B_reg(vecD dst, vecD src1, vecD src2) %{
5435 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8);
5436 match(Set dst (AndV src1 src2));
5437 format %{ "vpand $dst,$src1,$src2\t! and vectors (8 bytes)" %}
5438 ins_encode %{
5439 bool vector256 = false;
5440 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
5441 %}
5442 ins_pipe( pipe_slow );
5443 %}
5444
5445 instruct vand16B(vecX dst, vecX src) %{
5446 predicate(n->as_Vector()->length_in_bytes() == 16);
5447 match(Set dst (AndV dst src));
5448 format %{ "pand $dst,$src\t! and vectors (16 bytes)" %}
5449 ins_encode %{
5450 __ pand($dst$$XMMRegister, $src$$XMMRegister);
5451 %}
5452 ins_pipe( pipe_slow );
5453 %}
5454
5455 instruct vand16B_reg(vecX dst, vecX src1, vecX src2) %{
5456 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
5457 match(Set dst (AndV src1 src2));
5458 format %{ "vpand $dst,$src1,$src2\t! and vectors (16 bytes)" %}
5459 ins_encode %{
5460 bool vector256 = false;
5461 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
5462 %}
5463 ins_pipe( pipe_slow );
5464 %}
5465
5466 instruct vand16B_mem(vecX dst, vecX src, memory mem) %{
5467 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
5468 match(Set dst (AndV src (LoadVector mem)));
5469 format %{ "vpand $dst,$src,$mem\t! and vectors (16 bytes)" %}
5470 ins_encode %{
5471 bool vector256 = false;
5472 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
5473 %}
5474 ins_pipe( pipe_slow );
5475 %}
5476
5477 instruct vand32B_reg(vecY dst, vecY src1, vecY src2) %{
5478 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
5479 match(Set dst (AndV src1 src2));
5480 format %{ "vpand $dst,$src1,$src2\t! and vectors (32 bytes)" %}
5481 ins_encode %{
5482 bool vector256 = true;
5483 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
5484 %}
5485 ins_pipe( pipe_slow );
5486 %}
5487
5488 instruct vand32B_mem(vecY dst, vecY src, memory mem) %{
5489 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
5490 match(Set dst (AndV src (LoadVector mem)));
5491 format %{ "vpand $dst,$src,$mem\t! and vectors (32 bytes)" %}
5492 ins_encode %{
5493 bool vector256 = true;
5494 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
5495 %}
5496 ins_pipe( pipe_slow );
5497 %}
5498
5499 // --------------------------------- OR ---------------------------------------
5500
5501 instruct vor4B(vecS dst, vecS src) %{
5502 predicate(n->as_Vector()->length_in_bytes() == 4);
5503 match(Set dst (OrV dst src));
5504 format %{ "por $dst,$src\t! or vectors (4 bytes)" %}
5505 ins_encode %{
5506 __ por($dst$$XMMRegister, $src$$XMMRegister);
5507 %}
5508 ins_pipe( pipe_slow );
5509 %}
5510
5511 instruct vor4B_reg(vecS dst, vecS src1, vecS src2) %{
5512 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4);
5513 match(Set dst (OrV src1 src2));
5514 format %{ "vpor $dst,$src1,$src2\t! or vectors (4 bytes)" %}
5515 ins_encode %{
5516 bool vector256 = false;
5517 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
5518 %}
5519 ins_pipe( pipe_slow );
5520 %}
5521
5522 instruct vor8B(vecD dst, vecD src) %{
5523 predicate(n->as_Vector()->length_in_bytes() == 8);
5524 match(Set dst (OrV dst src));
5525 format %{ "por $dst,$src\t! or vectors (8 bytes)" %}
5526 ins_encode %{
5527 __ por($dst$$XMMRegister, $src$$XMMRegister);
5528 %}
5529 ins_pipe( pipe_slow );
5530 %}
5531
5532 instruct vor8B_reg(vecD dst, vecD src1, vecD src2) %{
5533 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8);
5534 match(Set dst (OrV src1 src2));
5535 format %{ "vpor $dst,$src1,$src2\t! or vectors (8 bytes)" %}
5536 ins_encode %{
5537 bool vector256 = false;
5538 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
5539 %}
5540 ins_pipe( pipe_slow );
5541 %}
5542
5543 instruct vor16B(vecX dst, vecX src) %{
5544 predicate(n->as_Vector()->length_in_bytes() == 16);
5545 match(Set dst (OrV dst src));
5546 format %{ "por $dst,$src\t! or vectors (16 bytes)" %}
5547 ins_encode %{
5548 __ por($dst$$XMMRegister, $src$$XMMRegister);
5549 %}
5550 ins_pipe( pipe_slow );
5551 %}
5552
5553 instruct vor16B_reg(vecX dst, vecX src1, vecX src2) %{
5554 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
5555 match(Set dst (OrV src1 src2));
5556 format %{ "vpor $dst,$src1,$src2\t! or vectors (16 bytes)" %}
5557 ins_encode %{
5558 bool vector256 = false;
5559 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
5560 %}
5561 ins_pipe( pipe_slow );
5562 %}
5563
5564 instruct vor16B_mem(vecX dst, vecX src, memory mem) %{
5565 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
5566 match(Set dst (OrV src (LoadVector mem)));
5567 format %{ "vpor $dst,$src,$mem\t! or vectors (16 bytes)" %}
5568 ins_encode %{
5569 bool vector256 = false;
5570 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
5571 %}
5572 ins_pipe( pipe_slow );
5573 %}
5574
5575 instruct vor32B_reg(vecY dst, vecY src1, vecY src2) %{
5576 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
5577 match(Set dst (OrV src1 src2));
5578 format %{ "vpor $dst,$src1,$src2\t! or vectors (32 bytes)" %}
5579 ins_encode %{
5580 bool vector256 = true;
5581 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
5582 %}
5583 ins_pipe( pipe_slow );
5584 %}
5585
5586 instruct vor32B_mem(vecY dst, vecY src, memory mem) %{
5587 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
5588 match(Set dst (OrV src (LoadVector mem)));
5589 format %{ "vpor $dst,$src,$mem\t! or vectors (32 bytes)" %}
5590 ins_encode %{
5591 bool vector256 = true;
5592 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
5593 %}
5594 ins_pipe( pipe_slow );
5595 %}
5596
5597 // --------------------------------- XOR --------------------------------------
5598
5599 instruct vxor4B(vecS dst, vecS src) %{
5600 predicate(n->as_Vector()->length_in_bytes() == 4);
5601 match(Set dst (XorV dst src));
5602 format %{ "pxor $dst,$src\t! xor vectors (4 bytes)" %}
5603 ins_encode %{
5604 __ pxor($dst$$XMMRegister, $src$$XMMRegister);
5605 %}
5606 ins_pipe( pipe_slow );
5607 %}
5608
5609 instruct vxor4B_reg(vecS dst, vecS src1, vecS src2) %{
5610 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4);
5611 match(Set dst (XorV src1 src2));
5612 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (4 bytes)" %}
5613 ins_encode %{
5614 bool vector256 = false;
5615 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
5616 %}
5617 ins_pipe( pipe_slow );
5618 %}
5619
5620 instruct vxor8B(vecD dst, vecD src) %{
5621 predicate(n->as_Vector()->length_in_bytes() == 8);
5622 match(Set dst (XorV dst src));
5623 format %{ "pxor $dst,$src\t! xor vectors (8 bytes)" %}
5624 ins_encode %{
5625 __ pxor($dst$$XMMRegister, $src$$XMMRegister);
5626 %}
5627 ins_pipe( pipe_slow );
5628 %}
5629
5630 instruct vxor8B_reg(vecD dst, vecD src1, vecD src2) %{
5631 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8);
5632 match(Set dst (XorV src1 src2));
5633 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (8 bytes)" %}
5634 ins_encode %{
5635 bool vector256 = false;
5636 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
5637 %}
5638 ins_pipe( pipe_slow );
5639 %}
5640
5641 instruct vxor16B(vecX dst, vecX src) %{
5642 predicate(n->as_Vector()->length_in_bytes() == 16);
5643 match(Set dst (XorV dst src));
5644 format %{ "pxor $dst,$src\t! xor vectors (16 bytes)" %}
5645 ins_encode %{
5646 __ pxor($dst$$XMMRegister, $src$$XMMRegister);
5647 %}
5648 ins_pipe( pipe_slow );
5649 %}
5650
5651 instruct vxor16B_reg(vecX dst, vecX src1, vecX src2) %{
5652 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
5653 match(Set dst (XorV src1 src2));
5654 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (16 bytes)" %}
5655 ins_encode %{
5656 bool vector256 = false;
5657 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
5658 %}
5659 ins_pipe( pipe_slow );
5660 %}
5661
5662 instruct vxor16B_mem(vecX dst, vecX src, memory mem) %{
5663 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
5664 match(Set dst (XorV src (LoadVector mem)));
5665 format %{ "vpxor $dst,$src,$mem\t! xor vectors (16 bytes)" %}
5666 ins_encode %{
5667 bool vector256 = false;
5668 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
5669 %}
5670 ins_pipe( pipe_slow );
5671 %}
5672
5673 instruct vxor32B_reg(vecY dst, vecY src1, vecY src2) %{
5674 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
5675 match(Set dst (XorV src1 src2));
5676 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (32 bytes)" %}
5677 ins_encode %{
5678 bool vector256 = true;
5679 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
5680 %}
5681 ins_pipe( pipe_slow );
5682 %}
5683
5684 instruct vxor32B_mem(vecY dst, vecY src, memory mem) %{
5685 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
5686 match(Set dst (XorV src (LoadVector mem)));
5687 format %{ "vpxor $dst,$src,$mem\t! xor vectors (32 bytes)" %}
5688 ins_encode %{
5689 bool vector256 = true;
5690 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
5691 %}
5692 ins_pipe( pipe_slow );
5693 %}
5694
|
42 //
43 // SOC = Save-On-Call: The register allocator assumes that these registers
44 // can be used without saving upon entry to the method,
45 // but that they must be saved at call sites.
46 //
47 // SOE = Save-On-Entry: The register allocator assumes that these registers
48 // must be saved before using them upon entry to the
49 // method, but they do not need to be saved at call
50 // sites.
51 //
52 // AS = Always-Save: The register allocator assumes that these registers
53 // must be saved before using them upon entry to the
54 // method, & that they must be saved at call sites.
55 //
56 // Ideal Register Type is used to determine how to save & restore a
57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI.
59 //
60 // The encoding number is the actual bit-pattern placed into the opcodes.
61
62 // XMM registers. 512-bit registers or 8 words each, labeled (a)-p.
63 // Word a in each register holds a Float, words ab hold a Double.
64 // The whole registers are used in SSE4.2 version intrinsics,
65 // array copy stubs and superword operations (see UseSSE42Intrinsics,
66 // UseXMMForArrayCopy and UseSuperword flags).
67 // For pre EVEX enabled architectures:
68 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX)
69 // For EVEX enabled architectures:
70 // XMM8-XMM31 must be encoded with REX (EVEX for UseAVX).
71 //
72 // Linux ABI: No register preserved across function calls
73 // XMM0-XMM7 might hold parameters
74 // Windows ABI: XMM6-XMM31 preserved across function calls
75 // XMM0-XMM3 might hold parameters
76
77 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
78 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1));
79 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2));
80 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3));
81 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4));
82 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5));
83 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6));
84 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7));
85 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8));
86 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9));
87 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10));
88 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11));
89 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12));
90 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13));
91 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14));
92 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15));
93
94 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
95 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1));
96 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2));
97 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3));
98 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4));
99 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5));
100 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6));
101 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7));
102 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8));
103 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9));
104 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10));
105 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11));
106 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12));
107 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13));
108 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14));
109 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15));
110
111 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
112 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1));
113 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2));
114 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3));
115 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4));
116 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5));
117 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6));
118 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7));
119 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8));
120 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9));
121 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10));
122 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11));
123 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12));
124 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13));
125 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14));
126 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15));
127
128 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
129 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1));
130 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2));
131 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3));
132 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4));
133 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5));
134 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6));
135 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7));
136 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8));
137 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9));
138 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10));
139 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11));
140 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12));
141 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13));
142 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14));
143 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15));
144
145 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
146 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1));
147 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2));
148 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3));
149 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4));
150 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5));
151 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6));
152 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7));
153 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8));
154 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9));
155 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10));
156 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11));
157 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12));
158 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13));
159 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14));
160 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15));
161
162 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
163 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1));
164 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2));
165 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3));
166 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4));
167 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5));
168 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6));
169 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7));
170 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8));
171 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9));
172 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10));
173 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11));
174 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12));
175 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13));
176 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14));
177 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15));
178
179 #ifdef _WIN64
180
181 reg_def XMM6 ( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg());
182 reg_def XMM6b( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(1));
183 reg_def XMM6c( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(2));
184 reg_def XMM6d( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(3));
185 reg_def XMM6e( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(4));
186 reg_def XMM6f( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(5));
187 reg_def XMM6g( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(6));
188 reg_def XMM6h( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(7));
189 reg_def XMM6i( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(8));
190 reg_def XMM6j( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(9));
191 reg_def XMM6k( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(10));
192 reg_def XMM6l( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(11));
193 reg_def XMM6m( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(12));
194 reg_def XMM6n( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(13));
195 reg_def XMM6o( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(14));
196 reg_def XMM6p( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(15));
197
198 reg_def XMM7 ( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg());
199 reg_def XMM7b( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(1));
200 reg_def XMM7c( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(2));
201 reg_def XMM7d( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(3));
202 reg_def XMM7e( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(4));
203 reg_def XMM7f( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(5));
204 reg_def XMM7g( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(6));
205 reg_def XMM7h( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(7));
206 reg_def XMM7i( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(8));
207 reg_def XMM7j( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(9));
208 reg_def XMM7k( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(10));
209 reg_def XMM7l( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(11));
210 reg_def XMM7m( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(12));
211 reg_def XMM7n( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(13));
212 reg_def XMM7o( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(14));
213 reg_def XMM7p( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(15));
214
215 reg_def XMM8 ( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg());
216 reg_def XMM8b( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(1));
217 reg_def XMM8c( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(2));
218 reg_def XMM8d( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(3));
219 reg_def XMM8e( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(4));
220 reg_def XMM8f( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(5));
221 reg_def XMM8g( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(6));
222 reg_def XMM8h( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(7));
223 reg_def XMM8i( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(8));
224 reg_def XMM8j( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(9));
225 reg_def XMM8k( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(10));
226 reg_def XMM8l( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(11));
227 reg_def XMM8m( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(12));
228 reg_def XMM8n( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(13));
229 reg_def XMM8o( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(14));
230 reg_def XMM8p( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(15));
231
232 reg_def XMM9 ( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg());
233 reg_def XMM9b( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(1));
234 reg_def XMM9c( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(2));
235 reg_def XMM9d( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(3));
236 reg_def XMM9e( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(4));
237 reg_def XMM9f( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(5));
238 reg_def XMM9g( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(6));
239 reg_def XMM9h( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(7));
240 reg_def XMM9i( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(8));
241 reg_def XMM9j( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(9));
242 reg_def XMM9k( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(10));
243 reg_def XMM9l( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(11));
244 reg_def XMM9m( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(12));
245 reg_def XMM9n( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(13));
246 reg_def XMM9o( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(14));
247 reg_def XMM9p( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(15));
248
249 reg_def XMM10 ( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg());
250 reg_def XMM10b( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(1));
251 reg_def XMM10c( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(2));
252 reg_def XMM10d( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(3));
253 reg_def XMM10e( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(4));
254 reg_def XMM10f( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(5));
255 reg_def XMM10g( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(6));
256 reg_def XMM10h( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(7));
257 reg_def XMM10i( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(8));
258 reg_def XMM10j( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(9));
259 reg_def XMM10k( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(10));
260 reg_def XMM10l( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(11));
261 reg_def XMM10m( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(12));
262 reg_def XMM10n( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(13));
263 reg_def XMM10o( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(14));
264 reg_def XMM10p( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(15));
265
266 reg_def XMM11 ( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg());
267 reg_def XMM11b( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(1));
268 reg_def XMM11c( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(2));
269 reg_def XMM11d( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(3));
270 reg_def XMM11e( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(4));
271 reg_def XMM11f( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(5));
272 reg_def XMM11g( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(6));
273 reg_def XMM11h( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(7));
274 reg_def XMM11i( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(8));
275 reg_def XMM11j( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(9));
276 reg_def XMM11k( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(10));
277 reg_def XMM11l( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(11));
278 reg_def XMM11m( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(12));
279 reg_def XMM11n( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(13));
280 reg_def XMM11o( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(14));
281 reg_def XMM11p( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(15));
282
283 reg_def XMM12 ( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg());
284 reg_def XMM12b( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(1));
285 reg_def XMM12c( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(2));
286 reg_def XMM12d( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(3));
287 reg_def XMM12e( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(4));
288 reg_def XMM12f( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(5));
289 reg_def XMM12g( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(6));
290 reg_def XMM12h( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(7));
291 reg_def XMM12i( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(8));
292 reg_def XMM12j( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(9));
293 reg_def XMM12k( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(10));
294 reg_def XMM12l( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(11));
295 reg_def XMM12m( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(12));
296 reg_def XMM12n( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(13));
297 reg_def XMM12o( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(14));
298 reg_def XMM12p( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(15));
299
300 reg_def XMM13 ( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg());
301 reg_def XMM13b( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(1));
302 reg_def XMM13c( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(2));
303 reg_def XMM13d( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(3));
304 reg_def XMM13e( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(4));
305 reg_def XMM13f( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(5));
306 reg_def XMM13g( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(6));
307 reg_def XMM13h( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(7));
308 reg_def XMM13i( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(8));
309 reg_def XMM13j( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(9));
310 reg_def XMM13k( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(10));
311 reg_def XMM13l( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(11));
312 reg_def XMM13m( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(12));
313 reg_def XMM13n( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(13));
314 reg_def XMM13o( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(14));
315 reg_def XMM13p( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(15));
316
317 reg_def XMM14 ( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg());
318 reg_def XMM14b( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(1));
319 reg_def XMM14c( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(2));
320 reg_def XMM14d( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(3));
321 reg_def XMM14e( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(4));
322 reg_def XMM14f( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(5));
323 reg_def XMM14g( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(6));
324 reg_def XMM14h( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(7));
325 reg_def XMM14i( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(8));
326 reg_def XMM14j( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(9));
327 reg_def XMM14k( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(10));
328 reg_def XMM14l( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(11));
329 reg_def XMM14m( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(12));
330 reg_def XMM14n( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(13));
331 reg_def XMM14o( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(14));
332 reg_def XMM14p( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(15));
333
334 reg_def XMM15 ( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg());
335 reg_def XMM15b( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(1));
336 reg_def XMM15c( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(2));
337 reg_def XMM15d( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(3));
338 reg_def XMM15e( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(4));
339 reg_def XMM15f( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(5));
340 reg_def XMM15g( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(6));
341 reg_def XMM15h( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(7));
342 reg_def XMM15i( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(8));
343 reg_def XMM15j( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(9));
344 reg_def XMM15k( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(10));
345 reg_def XMM15l( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(11));
346 reg_def XMM15m( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(12));
347 reg_def XMM15n( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(13));
348 reg_def XMM15o( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(14));
349 reg_def XMM15p( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(15));
350
351 reg_def XMM16 ( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg());
352 reg_def XMM16b( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(1));
353 reg_def XMM16c( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(2));
354 reg_def XMM16d( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(3));
355 reg_def XMM16e( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(4));
356 reg_def XMM16f( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(5));
357 reg_def XMM16g( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(6));
358 reg_def XMM16h( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(7));
359 reg_def XMM16i( SOC, SOE, Op_RegF, 16, xmm15->as_VMReg()->next(8));
360 reg_def XMM16j( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(9));
361 reg_def XMM16k( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(10));
362 reg_def XMM16l( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(11));
363 reg_def XMM16m( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(12));
364 reg_def XMM16n( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(13));
365 reg_def XMM16o( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(14));
366 reg_def XMM16p( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(15));
367
368 reg_def XMM17 ( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg());
369 reg_def XMM17b( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(1));
370 reg_def XMM17c( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(2));
371 reg_def XMM17d( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(3));
372 reg_def XMM17e( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(4));
373 reg_def XMM17f( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(5));
374 reg_def XMM17g( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(6));
375 reg_def XMM17h( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(7));
376 reg_def XMM17i( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(8));
377 reg_def XMM17j( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(9));
378 reg_def XMM17k( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(10));
379 reg_def XMM17l( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(11));
380 reg_def XMM17m( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(12));
381 reg_def XMM17n( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(13));
382 reg_def XMM17o( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(14));
383 reg_def XMM17p( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(15));
384
385 reg_def XMM18 ( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg());
386 reg_def XMM18b( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(1));
387 reg_def XMM18c( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(2));
388 reg_def XMM18d( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(3));
389 reg_def XMM18e( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(4));
390 reg_def XMM18f( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(5));
391 reg_def XMM18g( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(6));
392 reg_def XMM18h( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(7));
393 reg_def XMM18i( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(8));
394 reg_def XMM18j( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(9));
395 reg_def XMM18k( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(10));
396 reg_def XMM18l( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(11));
397 reg_def XMM18m( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(12));
398 reg_def XMM18n( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(13));
399 reg_def XMM18o( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(14));
400 reg_def XMM18p( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(15));
401
402 reg_def XMM19 ( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg());
403 reg_def XMM19b( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(1));
404 reg_def XMM19c( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(2));
405 reg_def XMM19d( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(3));
406 reg_def XMM19e( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(4));
407 reg_def XMM19f( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(5));
408 reg_def XMM19g( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(6));
409 reg_def XMM19h( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(7));
410 reg_def XMM19i( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(8));
411 reg_def XMM19j( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(9));
412 reg_def XMM19k( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(10));
413 reg_def XMM19l( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(11));
414 reg_def XMM19m( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(12));
415 reg_def XMM19n( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(13));
416 reg_def XMM19o( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(14));
417 reg_def XMM19p( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(15));
418
419 reg_def XMM20 ( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg());
420 reg_def XMM20b( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(1));
421 reg_def XMM20c( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(2));
422 reg_def XMM20d( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(3));
423 reg_def XMM20e( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(4));
424 reg_def XMM20f( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(5));
425 reg_def XMM20g( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(6));
426 reg_def XMM20h( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(7));
427 reg_def XMM20i( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(8));
428 reg_def XMM20j( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(9));
429 reg_def XMM20k( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(10));
430 reg_def XMM20l( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(11));
431 reg_def XMM20m( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(12));
432 reg_def XMM20n( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(13));
433 reg_def XMM20o( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(14));
434 reg_def XMM20p( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(15));
435
436 reg_def XMM21 ( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg());
437 reg_def XMM21b( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(1));
438 reg_def XMM21c( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(2));
439 reg_def XMM21d( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(3));
440 reg_def XMM21e( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(4));
441 reg_def XMM21f( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(5));
442 reg_def XMM21g( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(6));
443 reg_def XMM21h( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(7));
444 reg_def XMM21i( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(8));
445 reg_def XMM21j( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(9));
446 reg_def XMM21k( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(10));
447 reg_def XMM21l( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(11));
448 reg_def XMM21m( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(12));
449 reg_def XMM21n( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(13));
450 reg_def XMM21o( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(14));
451 reg_def XMM21p( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(15));
452
453 reg_def XMM22 ( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg());
454 reg_def XMM22b( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(1));
455 reg_def XMM22c( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(2));
456 reg_def XMM22d( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(3));
457 reg_def XMM22e( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(4));
458 reg_def XMM22f( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(5));
459 reg_def XMM22g( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(6));
460 reg_def XMM22h( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(7));
461 reg_def XMM22i( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(8));
462 reg_def XMM22j( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(9));
463 reg_def XMM22k( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(10));
464 reg_def XMM22l( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(11));
465 reg_def XMM22m( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(12));
466 reg_def XMM22n( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(13));
467 reg_def XMM22o( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(14));
468 reg_def XMM22p( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(15));
469
470 reg_def XMM23 ( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg());
471 reg_def XMM23b( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(1));
472 reg_def XMM23c( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(2));
473 reg_def XMM23d( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(3));
474 reg_def XMM23e( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(4));
475 reg_def XMM23f( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(5));
476 reg_def XMM23g( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(6));
477 reg_def XMM23h( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(7));
478 reg_def XMM23i( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(8));
479 reg_def XMM23j( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(9));
480 reg_def XMM23k( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(10));
481 reg_def XMM23l( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(11));
482 reg_def XMM23m( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(12));
483 reg_def XMM23n( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(13));
484 reg_def XMM23o( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(14));
485 reg_def XMM23p( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(15));
486
487 reg_def XMM24 ( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg());
488 reg_def XMM24b( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(1));
489 reg_def XMM24c( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(2));
490 reg_def XMM24d( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(3));
491 reg_def XMM24e( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(4));
492 reg_def XMM24f( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(5));
493 reg_def XMM24g( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(6));
494 reg_def XMM24h( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(7));
495 reg_def XMM24i( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(8));
496 reg_def XMM24j( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(9));
497 reg_def XMM24k( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(10));
498 reg_def XMM24l( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(11));
499 reg_def XMM24m( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(12));
500 reg_def XMM24n( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(13));
501 reg_def XMM24o( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(14));
502 reg_def XMM24p( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(15));
503
504 reg_def XMM25 ( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg());
505 reg_def XMM25b( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(1));
506 reg_def XMM25c( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(2));
507 reg_def XMM25d( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(3));
508 reg_def XMM25e( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(4));
509 reg_def XMM25f( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(5));
510 reg_def XMM25g( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(6));
511 reg_def XMM25h( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(7));
512 reg_def XMM25i( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(8));
513 reg_def XMM25j( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(9));
514 reg_def XMM25k( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(10));
515 reg_def XMM25l( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(11));
516 reg_def XMM25m( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(12));
517 reg_def XMM25n( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(13));
518 reg_def XMM25o( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(14));
519 reg_def XMM25p( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(15));
520
521 reg_def XMM26 ( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg());
522 reg_def XMM26b( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(1));
523 reg_def XMM26c( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(2));
524 reg_def XMM26d( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(3));
525 reg_def XMM26e( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(4));
526 reg_def XMM26f( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(5));
527 reg_def XMM26g( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(6));
528 reg_def XMM26h( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(7));
529 reg_def XMM26i( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(8));
530 reg_def XMM26j( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(9));
531 reg_def XMM26k( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(10));
532 reg_def XMM26l( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(11));
533 reg_def XMM26m( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(12));
534 reg_def XMM26n( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(13));
535 reg_def XMM26o( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(14));
536 reg_def XMM26p( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(15));
537
538 reg_def XMM27g( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(1));
539 reg_def XMM27c( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(2));
540 reg_def XMM27d( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(3));
541 reg_def XMM27e( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(4));
542 reg_def XMM27f( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(5));
543 reg_def XMM27g( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(6));
544 reg_def XMM27h( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(7));
545 reg_def XMM27i( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(8));
546 reg_def XMM27j( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(9));
547 reg_def XMM27k( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(10));
548 reg_def XMM27l( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(11));
549 reg_def XMM27m( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(12));
550 reg_def XMM27n( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(13));
551 reg_def XMM27o( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(14));
552 reg_def XMM27p( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(15));
553
554 reg_def XMM28 ( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg());
555 reg_def XMM28b( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(1));
556 reg_def XMM28c( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(2));
557 reg_def XMM28d( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(3));
558 reg_def XMM28e( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(4));
559 reg_def XMM28f( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(5));
560 reg_def XMM28g( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(6));
561 reg_def XMM28h( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(7));
562 reg_def XMM28i( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(8));
563 reg_def XMM28j( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(9));
564 reg_def XMM28k( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(10));
565 reg_def XMM28l( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(11));
566 reg_def XMM28m( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(12));
567 reg_def XMM28n( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(13));
568 reg_def XMM28o( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(14));
569 reg_def XMM28p( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(15));
570
571 reg_def XMM29 ( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg());
572 reg_def XMM29b( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(1));
573 reg_def XMM29c( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(2));
574 reg_def XMM29d( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(3));
575 reg_def XMM29e( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(4));
576 reg_def XMM29f( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(5));
577 reg_def XMM29g( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(6));
578 reg_def XMM29h( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(7));
579 reg_def XMM29i( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(8));
580 reg_def XMM29j( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(9));
581 reg_def XMM29k( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(10));
582 reg_def XMM29l( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(11));
583 reg_def XMM29m( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(12));
584 reg_def XMM29n( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(13));
585 reg_def XMM29o( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(14));
586 reg_def XMM29p( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(15));
587
588 reg_def XMM30 ( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg());
589 reg_def XMM30b( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(1));
590 reg_def XMM30c( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(2));
591 reg_def XMM30d( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(3));
592 reg_def XMM30e( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(4));
593 reg_def XMM30f( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(5));
594 reg_def XMM30g( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(6));
595 reg_def XMM30h( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(7));
596 reg_def XMM30i( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(8));
597 reg_def XMM30j( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(9));
598 reg_def XMM30k( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(10));
599 reg_def XMM30l( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(11));
600 reg_def XMM30m( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(12));
601 reg_def XMM30n( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(13));
602 reg_def XMM30o( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(14));
603 reg_def XMM30p( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(15));
604
605 reg_def XMM31 ( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg());
606 reg_def XMM31b( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(1));
607 reg_def XMM31c( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(2));
608 reg_def XMM31d( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(3));
609 reg_def XMM31e( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(4));
610 reg_def XMM31f( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(5));
611 reg_def XMM31g( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(6));
612 reg_def XMM31h( SOC, SOE, Op_RegF, 31, xmm31>-as_VMReg()->next(7));
613 reg_def XMM31i( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(8));
614 reg_def XMM31j( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(9));
615 reg_def XMM31k( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(10));
616 reg_def XMM31l( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(11));
617 reg_def XMM31m( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(12));
618 reg_def XMM31n( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(13));
619 reg_def XMM31o( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(14));
620 reg_def XMM31p( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(15));
621
622 #else // _WIN64
623
624 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
625 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1));
626 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2));
627 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3));
628 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4));
629 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5));
630 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6));
631 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7));
632 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8));
633 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9));
634 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10));
635 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11));
636 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12));
637 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13));
638 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14));
639 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15));
640
641 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
642 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1));
643 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2));
644 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3));
645 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4));
646 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5));
647 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6));
648 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7));
649 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8));
650 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9));
651 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10));
652 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11));
653 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12));
654 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13));
655 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14));
656 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15));
657
658 #ifdef _LP64
659
660 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg());
661 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1));
662 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2));
663 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3));
664 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4));
665 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5));
666 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6));
667 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7));
668 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8));
669 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9));
670 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10));
671 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11));
672 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12));
673 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13));
674 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14));
675 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15));
676
677 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg());
678 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1));
679 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2));
680 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3));
681 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4));
682 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5));
683 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6));
684 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7));
685 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8));
686 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9));
687 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10));
688 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11));
689 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12));
690 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13));
691 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14));
692 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15));
693
694 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
695 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1));
696 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2));
697 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3));
698 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4));
699 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5));
700 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6));
701 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7));
702 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8));
703 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9));
704 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10));
705 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11));
706 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12));
707 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13));
708 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14));
709 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15));
710
711 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
712 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1));
713 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2));
714 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3));
715 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4));
716 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5));
717 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6));
718 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7));
719 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8));
720 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9));
721 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10));
722 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11));
723 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12));
724 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13));
725 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14));
726 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15));
727
728 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
729 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1));
730 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2));
731 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3));
732 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4));
733 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5));
734 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6));
735 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7));
736 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8));
737 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9));
738 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10));
739 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11));
740 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12));
741 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13));
742 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14));
743 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15));
744
745 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
746 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1));
747 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2));
748 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3));
749 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4));
750 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5));
751 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6));
752 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7));
753 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8));
754 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9));
755 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10));
756 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11));
757 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12));
758 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13));
759 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14));
760 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15));
761
762 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
763 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1));
764 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2));
765 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3));
766 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4));
767 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5));
768 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6));
769 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7));
770 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8));
771 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9));
772 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10));
773 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11));
774 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12));
775 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13));
776 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14));
777 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15));
778
779 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
780 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1));
781 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2));
782 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3));
783 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4));
784 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5));
785 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6));
786 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7));
787 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8));
788 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9));
789 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10));
790 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11));
791 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12));
792 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13));
793 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14));
794 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15));
795
796 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg());
797 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1));
798 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2));
799 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3));
800 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4));
801 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5));
802 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6));
803 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7));
804 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8));
805 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9));
806 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10));
807 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11));
808 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12));
809 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13));
810 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14));
811 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15));
812
813 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg());
814 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1));
815 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2));
816 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3));
817 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4));
818 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5));
819 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6));
820 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7));
821 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8));
822 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9));
823 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10));
824 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11));
825 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12));
826 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13));
827 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14));
828 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15));
829
830 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg());
831 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1));
832 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2));
833 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3));
834 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4));
835 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5));
836 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6));
837 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7));
838 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8));
839 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9));
840 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10));
841 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11));
842 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12));
843 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13));
844 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14));
845 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15));
846
847 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg());
848 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1));
849 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2));
850 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3));
851 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4));
852 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5));
853 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6));
854 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7));
855 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8));
856 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9));
857 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10));
858 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11));
859 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12));
860 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13));
861 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14));
862 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15));
863
864 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg());
865 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1));
866 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2));
867 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3));
868 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4));
869 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5));
870 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6));
871 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7));
872 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8));
873 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9));
874 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10));
875 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11));
876 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12));
877 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13));
878 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14));
879 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15));
880
881 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg());
882 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1));
883 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2));
884 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3));
885 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4));
886 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5));
887 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6));
888 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7));
889 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8));
890 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9));
891 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10));
892 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11));
893 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12));
894 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13));
895 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14));
896 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15));
897
898 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg());
899 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1));
900 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2));
901 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3));
902 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4));
903 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5));
904 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6));
905 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7));
906 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8));
907 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9));
908 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10));
909 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11));
910 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12));
911 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13));
912 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14));
913 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15));
914
915 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg());
916 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1));
917 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2));
918 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3));
919 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4));
920 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5));
921 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6));
922 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7));
923 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8));
924 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9));
925 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10));
926 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11));
927 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12));
928 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13));
929 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14));
930 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15));
931
932 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg());
933 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1));
934 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2));
935 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3));
936 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4));
937 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5));
938 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6));
939 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7));
940 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8));
941 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9));
942 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10));
943 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11));
944 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12));
945 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13));
946 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14));
947 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15));
948
949 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg());
950 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1));
951 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2));
952 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3));
953 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4));
954 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5));
955 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6));
956 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7));
957 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8));
958 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9));
959 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10));
960 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11));
961 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12));
962 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13));
963 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14));
964 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15));
965
966 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg());
967 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1));
968 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2));
969 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3));
970 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4));
971 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5));
972 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6));
973 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7));
974 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8));
975 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9));
976 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10));
977 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11));
978 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12));
979 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13));
980 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14));
981 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15));
982
983 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg());
984 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1));
985 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2));
986 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3));
987 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4));
988 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5));
989 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6));
990 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7));
991 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8));
992 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9));
993 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10));
994 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11));
995 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12));
996 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13));
997 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14));
998 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15));
999
1000 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg());
1001 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1));
1002 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2));
1003 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3));
1004 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4));
1005 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5));
1006 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6));
1007 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7));
1008 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8));
1009 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9));
1010 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10));
1011 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11));
1012 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12));
1013 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13));
1014 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14));
1015 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15));
1016
1017 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg());
1018 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1));
1019 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2));
1020 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3));
1021 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4));
1022 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5));
1023 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6));
1024 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7));
1025 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8));
1026 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9));
1027 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10));
1028 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11));
1029 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12));
1030 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13));
1031 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14));
1032 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15));
1033
1034 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg());
1035 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1));
1036 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2));
1037 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3));
1038 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4));
1039 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5));
1040 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6));
1041 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7));
1042 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8));
1043 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9));
1044 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10));
1045 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11));
1046 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12));
1047 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13));
1048 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14));
1049 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15));
1050
1051 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg());
1052 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1));
1053 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2));
1054 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3));
1055 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4));
1056 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5));
1057 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6));
1058 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7));
1059 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8));
1060 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9));
1061 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10));
1062 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11));
1063 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12));
1064 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13));
1065 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14));
1066 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15));
1067
1068 #endif // _LP64
1069
1070 #endif // _WIN64
1071
1072 #ifdef _LP64
1073 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
1074 #else
1075 reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad());
1076 #endif // _LP64
1077
1078 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
1079 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
1080 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
1081 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
1082 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
1083 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
1084 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
1085 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p
1086 #ifdef _LP64
1087 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
1088 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
1089 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1090 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1091 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1092 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1093 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1094 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p
1095 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
1096 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
1097 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
1098 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
1099 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
1100 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
1101 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
1102 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
1103 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
1104 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
1105 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
1106 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
1107 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
1108 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
1109 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
1110 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p
1111 #endif
1112 );
1113
1114 // flags allocation class should be last.
1115 alloc_class chunk3(RFLAGS);
1116
1117 // Singleton class for condition codes
1118 reg_class int_flags(RFLAGS);
1119
1120 // Class for pre evex float registers
1121 reg_class float_reg_legacy(XMM0,
1122 XMM1,
1123 XMM2,
1124 XMM3,
1125 XMM4,
1126 XMM5,
1127 XMM6,
1128 XMM7
1129 #ifdef _LP64
1130 ,XMM8,
1131 XMM9,
1132 XMM10,
1133 XMM11,
1134 XMM12,
1135 XMM13,
1136 XMM14,
1137 XMM15
1138 #endif
1139 );
1140
1141 // Class for evex float registers
1142 reg_class float_reg_evex(XMM0,
1143 XMM1,
1144 XMM2,
1145 XMM3,
1146 XMM4,
1147 XMM5,
1148 XMM6,
1149 XMM7
1150 #ifdef _LP64
1151 ,XMM8,
1152 XMM9,
1153 XMM10,
1154 XMM11,
1155 XMM12,
1156 XMM13,
1157 XMM14,
1158 XMM15,
1159 XMM16,
1160 XMM17,
1161 XMM18,
1162 XMM19,
1163 XMM20,
1164 XMM21,
1165 XMM22,
1166 XMM23,
1167 XMM24,
1168 XMM25,
1169 XMM26,
1170 XMM27,
1171 XMM28,
1172 XMM29,
1173 XMM30,
1174 XMM31
1175 #endif
1176 );
1177
1178 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} );
1179
1180 // Class for pre evex double registers
1181 reg_class double_reg_legacy(XMM0, XMM0b,
1182 XMM1, XMM1b,
1183 XMM2, XMM2b,
1184 XMM3, XMM3b,
1185 XMM4, XMM4b,
1186 XMM5, XMM5b,
1187 XMM6, XMM6b,
1188 XMM7, XMM7b
1189 #ifdef _LP64
1190 ,XMM8, XMM8b,
1191 XMM9, XMM9b,
1192 XMM10, XMM10b,
1193 XMM11, XMM11b,
1194 XMM12, XMM12b,
1195 XMM13, XMM13b,
1196 XMM14, XMM14b,
1197 XMM15, XMM15b
1198 #endif
1199 );
1200
1201 // Class for evex double registers
1202 reg_class double_reg_evex(XMM0, XMM0b,
1203 XMM1, XMM1b,
1204 XMM2, XMM2b,
1205 XMM3, XMM3b,
1206 XMM4, XMM4b,
1207 XMM5, XMM5b,
1208 XMM6, XMM6b,
1209 XMM7, XMM7b
1210 #ifdef _LP64
1211 ,XMM8, XMM8b,
1212 XMM9, XMM9b,
1213 XMM10, XMM10b,
1214 XMM11, XMM11b,
1215 XMM12, XMM12b,
1216 XMM13, XMM13b,
1217 XMM14, XMM14b,
1218 XMM15, XMM15b,
1219 XMM16, XMM16b,
1220 XMM17, XMM17b,
1221 XMM18, XMM18b,
1222 XMM19, XMM19b,
1223 XMM20, XMM20b,
1224 XMM21, XMM21b,
1225 XMM22, XMM22b,
1226 XMM23, XMM23b,
1227 XMM24, XMM24b,
1228 XMM25, XMM25b,
1229 XMM26, XMM26b,
1230 XMM27, XMM27b,
1231 XMM28, XMM28b,
1232 XMM29, XMM29b,
1233 XMM30, XMM30b,
1234 XMM31, XMM31b
1235 #endif
1236 );
1237
1238 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} );
1239
1240 // Class for pre evex 32bit vector registers
1241 reg_class vectors_reg_legacy(XMM0,
1242 XMM1,
1243 XMM2,
1244 XMM3,
1245 XMM4,
1246 XMM5,
1247 XMM6,
1248 XMM7
1249 #ifdef _LP64
1250 ,XMM8,
1251 XMM9,
1252 XMM10,
1253 XMM11,
1254 XMM12,
1255 XMM13,
1256 XMM14,
1257 XMM15
1258 #endif
1259 );
1260
1261 // Class for evex 32bit vector registers
1262 reg_class vectors_reg_evex(XMM0,
1263 XMM1,
1264 XMM2,
1265 XMM3,
1266 XMM4,
1267 XMM5,
1268 XMM6,
1269 XMM7
1270 #ifdef _LP64
1271 ,XMM8,
1272 XMM9,
1273 XMM10,
1274 XMM11,
1275 XMM12,
1276 XMM13,
1277 XMM14,
1278 XMM15,
1279 XMM16,
1280 XMM17,
1281 XMM18,
1282 XMM19,
1283 XMM20,
1284 XMM21,
1285 XMM22,
1286 XMM23,
1287 XMM24,
1288 XMM25,
1289 XMM26,
1290 XMM27,
1291 XMM28,
1292 XMM29,
1293 XMM30,
1294 XMM31
1295 #endif
1296 );
1297
1298 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} );
1299
1300 // Class for all 64bit vector registers
1301 reg_class vectord_reg_legacy(XMM0, XMM0b,
1302 XMM1, XMM1b,
1303 XMM2, XMM2b,
1304 XMM3, XMM3b,
1305 XMM4, XMM4b,
1306 XMM5, XMM5b,
1307 XMM6, XMM6b,
1308 XMM7, XMM7b
1309 #ifdef _LP64
1310 ,XMM8, XMM8b,
1311 XMM9, XMM9b,
1312 XMM10, XMM10b,
1313 XMM11, XMM11b,
1314 XMM12, XMM12b,
1315 XMM13, XMM13b,
1316 XMM14, XMM14b,
1317 XMM15, XMM15b
1318 #endif
1319 );
1320
1321 // Class for all 64bit vector registers
1322 reg_class vectord_reg_evex(XMM0, XMM0b,
1323 XMM1, XMM1b,
1324 XMM2, XMM2b,
1325 XMM3, XMM3b,
1326 XMM4, XMM4b,
1327 XMM5, XMM5b,
1328 XMM6, XMM6b,
1329 XMM7, XMM7b
1330 #ifdef _LP64
1331 ,XMM8, XMM8b,
1332 XMM9, XMM9b,
1333 XMM10, XMM10b,
1334 XMM11, XMM11b,
1335 XMM12, XMM12b,
1336 XMM13, XMM13b,
1337 XMM14, XMM14b,
1338 XMM15, XMM15b,
1339 XMM16, XMM16b,
1340 XMM17, XMM17b,
1341 XMM18, XMM18b,
1342 XMM19, XMM19b,
1343 XMM20, XMM20b,
1344 XMM21, XMM21b,
1345 XMM22, XMM22b,
1346 XMM23, XMM23b,
1347 XMM24, XMM24b,
1348 XMM25, XMM25b,
1349 XMM26, XMM26b,
1350 XMM27, XMM27b,
1351 XMM28, XMM28b,
1352 XMM29, XMM29b,
1353 XMM30, XMM30b,
1354 XMM31, XMM31b
1355 #endif
1356 );
1357
1358 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} );
1359
1360 // Class for all 128bit vector registers
1361 reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d,
1362 XMM1, XMM1b, XMM1c, XMM1d,
1363 XMM2, XMM2b, XMM2c, XMM2d,
1364 XMM3, XMM3b, XMM3c, XMM3d,
1365 XMM4, XMM4b, XMM4c, XMM4d,
1366 XMM5, XMM5b, XMM5c, XMM5d,
1367 XMM6, XMM6b, XMM6c, XMM6d,
1368 XMM7, XMM7b, XMM7c, XMM7d
1369 #ifdef _LP64
1370 ,XMM8, XMM8b, XMM8c, XMM8d,
1371 XMM9, XMM9b, XMM9c, XMM9d,
1372 XMM10, XMM10b, XMM10c, XMM10d,
1373 XMM11, XMM11b, XMM11c, XMM11d,
1374 XMM12, XMM12b, XMM12c, XMM12d,
1375 XMM13, XMM13b, XMM13c, XMM13d,
1376 XMM14, XMM14b, XMM14c, XMM14d,
1377 XMM15, XMM15b, XMM15c, XMM15d
1378 #endif
1379 );
1380
1381 // Class for all 128bit vector registers
1382 reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d,
1383 XMM1, XMM1b, XMM1c, XMM1d,
1384 XMM2, XMM2b, XMM2c, XMM2d,
1385 XMM3, XMM3b, XMM3c, XMM3d,
1386 XMM4, XMM4b, XMM4c, XMM4d,
1387 XMM5, XMM5b, XMM5c, XMM5d,
1388 XMM6, XMM6b, XMM6c, XMM6d,
1389 XMM7, XMM7b, XMM7c, XMM7d
1390 #ifdef _LP64
1391 ,XMM8, XMM8b, XMM8c, XMM8d,
1392 XMM9, XMM9b, XMM9c, XMM9d,
1393 XMM10, XMM10b, XMM10c, XMM10d,
1394 XMM11, XMM11b, XMM11c, XMM11d,
1395 XMM12, XMM12b, XMM12c, XMM12d,
1396 XMM13, XMM13b, XMM13c, XMM13d,
1397 XMM14, XMM14b, XMM14c, XMM14d,
1398 XMM15, XMM15b, XMM15c, XMM15d,
1399 XMM16, XMM16b, XMM16c, XMM16d,
1400 XMM17, XMM17b, XMM17c, XMM17d,
1401 XMM18, XMM18b, XMM18c, XMM18d,
1402 XMM19, XMM19b, XMM19c, XMM19d,
1403 XMM20, XMM20b, XMM20c, XMM20d,
1404 XMM21, XMM21b, XMM21c, XMM21d,
1405 XMM22, XMM22b, XMM22c, XMM22d,
1406 XMM23, XMM23b, XMM23c, XMM23d,
1407 XMM24, XMM24b, XMM24c, XMM24d,
1408 XMM25, XMM25b, XMM25c, XMM25d,
1409 XMM26, XMM26b, XMM26c, XMM26d,
1410 XMM27, XMM27b, XMM27c, XMM27d,
1411 XMM28, XMM28b, XMM28c, XMM28d,
1412 XMM29, XMM29b, XMM29c, XMM29d,
1413 XMM30, XMM30b, XMM30c, XMM30d,
1414 XMM31, XMM31b, XMM31c, XMM31d
1415 #endif
1416 );
1417
1418 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} );
1419
1420 // Class for all 256bit vector registers
1421 reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h,
1422 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h,
1423 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h,
1424 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h,
1425 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h,
1426 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h,
1427 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h,
1428 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h
1429 #ifdef _LP64
1430 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h,
1431 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h,
1432 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
1433 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
1434 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
1435 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
1436 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
1437 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h
1438 #endif
1439 );
1440
1441 // Class for all 256bit vector registers
1442 reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h,
1443 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h,
1444 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h,
1445 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h,
1446 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h,
1447 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h,
1448 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h,
1449 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h
1450 #ifdef _LP64
1451 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h,
1452 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h,
1453 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
1454 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
1455 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
1456 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
1457 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
1458 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h,
1459 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h,
1460 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h,
1461 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h,
1462 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h,
1463 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h,
1464 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h,
1465 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h,
1466 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h,
1467 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h,
1468 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h,
1469 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h,
1470 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h,
1471 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h,
1472 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h,
1473 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h,
1474 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h
1475 #endif
1476 );
1477
1478 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} );
1479
1480 // Class for all 512bit vector registers
1481 reg_class vectorz_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
1482 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
1483 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
1484 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
1485 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
1486 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
1487 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
1488 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p
1489 #ifdef _LP64
1490 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
1491 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
1492 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1493 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1494 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1495 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1496 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1497 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p
1498 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
1499 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
1500 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
1501 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
1502 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
1503 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
1504 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
1505 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
1506 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
1507 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
1508 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
1509 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
1510 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
1511 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
1512 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
1513 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p
1514 #endif
1515 );
1516
1517 %}
1518
1519
1520 //----------SOURCE BLOCK-------------------------------------------------------
1521 // This is a block of C++ code which provides values, functions, and
1522 // definitions necessary in the rest of the architecture description
1523
1524 source_hpp %{
1525 // Header information of the source block.
1526 // Method declarations/definitions which are used outside
1527 // the ad-scope can conveniently be defined here.
1528 //
1529 // To keep related declarations/definitions/uses close together,
1530 // we switch between source %{ }% and source_hpp %{ }% freely as needed.
1531
1532 class NativeJump;
1533
1534 class CallStubImpl {
1535
1536 //--------------------------------------------------------------
1648 static address float_signflip() { return (address)float_signflip_pool; }
1649 static address double_signmask() { return (address)double_signmask_pool; }
1650 static address double_signflip() { return (address)double_signflip_pool; }
1651 #endif
1652
1653
1654 const bool Matcher::match_rule_supported(int opcode) {
1655 if (!has_match_rule(opcode))
1656 return false;
1657
1658 switch (opcode) {
1659 case Op_PopCountI:
1660 case Op_PopCountL:
1661 if (!UsePopCountInstruction)
1662 return false;
1663 break;
1664 case Op_MulVI:
1665 if ((UseSSE < 4) && (UseAVX < 1)) // only with SSE4_1 or AVX
1666 return false;
1667 break;
1668 case Op_MulVL:
1669 case Op_MulReductionVL:
1670 if (VM_Version::supports_avx512dq() == false)
1671 return false;
1672 case Op_AddReductionVL:
1673 if (UseAVX < 3) // only EVEX : vector connectivity becomes an issue here
1674 return false;
1675 case Op_AddReductionVI:
1676 if (UseSSE < 3) // requires at least SSE3
1677 return false;
1678 case Op_MulReductionVI:
1679 if (UseSSE < 4) // requires at least SSE4
1680 return false;
1681 case Op_AddReductionVF:
1682 case Op_AddReductionVD:
1683 case Op_MulReductionVF:
1684 case Op_MulReductionVD:
1685 if (UseSSE < 1) // requires at least SSE
1686 return false;
1687 break;
1688 case Op_CompareAndSwapL:
1689 #ifdef _LP64
1690 case Op_CompareAndSwapP:
1691 #endif
1692 if (!VM_Version::supports_cx8())
1693 return false;
1694 break;
1695 }
1696
1697 return true; // Per default match rules are supported.
1698 }
1699
1700 // Max vector size in bytes. 0 if not supported.
1701 const int Matcher::vector_width_in_bytes(BasicType bt) {
1702 assert(is_java_primitive(bt), "only primitive type vectors");
1703 if (UseSSE < 2) return 0;
1704 // SSE2 supports 128bit vectors for all types.
1705 // AVX2 supports 256bit vectors for all types.
1706 // AVX2/EVEX supports 512bit vectors for all types.
1707 int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16;
1708 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE.
1709 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE))
1710 size = (UseAVX > 2) ? 64 : 32;
1711 // Use flag to limit vector size.
1712 size = MIN2(size,(int)MaxVectorSize);
1713 // Minimum 2 values in vector (or 4 for bytes).
1714 switch (bt) {
1715 case T_DOUBLE:
1716 case T_LONG:
1717 if (size < 16) return 0;
1718 case T_FLOAT:
1719 case T_INT:
1720 if (size < 8) return 0;
1721 case T_BOOLEAN:
1722 case T_BYTE:
1723 case T_CHAR:
1724 case T_SHORT:
1725 if (size < 4) return 0;
1726 break;
1727 default:
1728 ShouldNotReachHere();
1729 }
1730 return size;
1732
1733 // Limits on vector size (number of elements) loaded into vector.
1734 const int Matcher::max_vector_size(const BasicType bt) {
1735 return vector_width_in_bytes(bt)/type2aelembytes(bt);
1736 }
1737 const int Matcher::min_vector_size(const BasicType bt) {
1738 int max_size = max_vector_size(bt);
1739 // Min size which can be loaded into vector is 4 bytes.
1740 int size = (type2aelembytes(bt) == 1) ? 4 : 2;
1741 return MIN2(size,max_size);
1742 }
1743
1744 // Vector ideal reg corresponding to specidied size in bytes
1745 const int Matcher::vector_ideal_reg(int size) {
1746 assert(MaxVectorSize >= size, "");
1747 switch(size) {
1748 case 4: return Op_VecS;
1749 case 8: return Op_VecD;
1750 case 16: return Op_VecX;
1751 case 32: return Op_VecY;
1752 case 64: return Op_VecZ;
1753 }
1754 ShouldNotReachHere();
1755 return 0;
1756 }
1757
1758 // Only lowest bits of xmm reg are used for vector shift count.
1759 const int Matcher::vector_shift_count_ideal_reg(int size) {
1760 return Op_VecS;
1761 }
1762
1763 // x86 supports misaligned vectors store/load.
1764 const bool Matcher::misaligned_vectors_ok() {
1765 return !AlignVector; // can be changed by flag
1766 }
1767
1768 // x86 AES instructions are compatible with SunJCE expanded
1769 // keys, hence we do not need to pass the original key to stubs
1770 const bool Matcher::pass_original_key_for_aes() {
1771 return false;
1772 }
1776 int src_hi, int dst_hi, uint ireg, outputStream* st) {
1777 // In 64-bit VM size calculation is very complex. Emitting instructions
1778 // into scratch buffer is used to get size in 64-bit VM.
1779 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); )
1780 assert(ireg == Op_VecS || // 32bit vector
1781 (src_lo & 1) == 0 && (src_lo + 1) == src_hi &&
1782 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi,
1783 "no non-adjacent vector moves" );
1784 if (cbuf) {
1785 MacroAssembler _masm(cbuf);
1786 int offset = __ offset();
1787 switch (ireg) {
1788 case Op_VecS: // copy whole register
1789 case Op_VecD:
1790 case Op_VecX:
1791 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
1792 break;
1793 case Op_VecY:
1794 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
1795 break;
1796 case Op_VecZ:
1797 __ evmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2);
1798 break;
1799 default:
1800 ShouldNotReachHere();
1801 }
1802 int size = __ offset() - offset;
1803 #ifdef ASSERT
1804 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
1805 assert(!do_size || size == 4, "incorrect size calculattion");
1806 #endif
1807 return size;
1808 #ifndef PRODUCT
1809 } else if (!do_size) {
1810 switch (ireg) {
1811 case Op_VecS:
1812 case Op_VecD:
1813 case Op_VecX:
1814 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
1815 break;
1816 case Op_VecY:
1817 case Op_VecZ:
1818 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
1819 break;
1820 default:
1821 ShouldNotReachHere();
1822 }
1823 #endif
1824 }
1825 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
1826 return (UseAVX > 2) ? 6 : 4;
1827 }
1828
1829 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
1830 int stack_offset, int reg, uint ireg, outputStream* st) {
1831 // In 64-bit VM size calculation is very complex. Emitting instructions
1832 // into scratch buffer is used to get size in 64-bit VM.
1833 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); )
1834 if (cbuf) {
1835 MacroAssembler _masm(cbuf);
1836 int offset = __ offset();
1837 if (is_load) {
1838 switch (ireg) {
1839 case Op_VecS:
1840 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
1841 break;
1842 case Op_VecD:
1843 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
1844 break;
1845 case Op_VecX:
1846 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
1847 break;
1848 case Op_VecY:
1849 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
1850 break;
1851 case Op_VecZ:
1852 __ evmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2);
1853 break;
1854 default:
1855 ShouldNotReachHere();
1856 }
1857 } else { // store
1858 switch (ireg) {
1859 case Op_VecS:
1860 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
1861 break;
1862 case Op_VecD:
1863 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
1864 break;
1865 case Op_VecX:
1866 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
1867 break;
1868 case Op_VecY:
1869 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
1870 break;
1871 case Op_VecZ:
1872 __ evmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2);
1873 break;
1874 default:
1875 ShouldNotReachHere();
1876 }
1877 }
1878 int size = __ offset() - offset;
1879 #ifdef ASSERT
1880 int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : (UseAVX > 2) ? 6 : 4);
1881 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
1882 assert(!do_size || size == (5+offset_size), "incorrect size calculattion");
1883 #endif
1884 return size;
1885 #ifndef PRODUCT
1886 } else if (!do_size) {
1887 if (is_load) {
1888 switch (ireg) {
1889 case Op_VecS:
1890 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
1891 break;
1892 case Op_VecD:
1893 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
1894 break;
1895 case Op_VecX:
1896 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
1897 break;
1898 case Op_VecY:
1899 case Op_VecZ:
1900 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
1901 break;
1902 default:
1903 ShouldNotReachHere();
1904 }
1905 } else { // store
1906 switch (ireg) {
1907 case Op_VecS:
1908 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
1909 break;
1910 case Op_VecD:
1911 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
1912 break;
1913 case Op_VecX:
1914 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
1915 break;
1916 case Op_VecY:
1917 case Op_VecZ:
1918 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
1919 break;
1920 default:
1921 ShouldNotReachHere();
1922 }
1923 }
1924 #endif
1925 }
1926 int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : (UseAVX > 2) ? 6 : 4);
1927 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
1928 return 5+offset_size;
1929 }
1930
1931 static inline jfloat replicate4_imm(int con, int width) {
1932 // Load a constant of "width" (in bytes) and replicate it to fill 32bit.
1933 assert(width == 1 || width == 2, "only byte or short types here");
1934 int bit_width = width * 8;
1935 jint val = con;
1936 val &= (1 << bit_width) - 1; // mask off sign bits
1937 while(bit_width < 32) {
1938 val |= (val << bit_width);
1939 bit_width <<= 1;
1940 }
1941 jfloat fval = *((jfloat*) &val); // coerce to float type
1942 return fval;
1943 }
1944
1945 static inline jdouble replicate8_imm(int con, int width) {
1946 // Load a constant of "width" (in bytes) and replicate it to fill 64bit.
2010 // Check that stack depth is unchanged: find majik cookie on stack
2011 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
2012 MacroAssembler _masm(&cbuf);
2013 Label L;
2014 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
2015 __ jccb(Assembler::equal, L);
2016 // Die if stack mismatch
2017 __ int3();
2018 __ bind(L);
2019 }
2020 %}
2021
2022 %}
2023
2024
2025 //----------OPERANDS-----------------------------------------------------------
2026 // Operand definitions must precede instruction definitions for correct parsing
2027 // in the ADLC because operands constitute user defined types which are used in
2028 // instruction definitions.
2029
2030 // This one generically applies only for evex, so only one version
2031 operand vecZ() %{
2032 constraint(ALLOC_IN_RC(vectorz_reg));
2033 match(VecZ);
2034
2035 format %{ %}
2036 interface(REG_INTER);
2037 %}
2038
2039 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit)
2040
2041 // ============================================================================
2042
2043 instruct ShouldNotReachHere() %{
2044 match(Halt);
2045 format %{ "int3\t# ShouldNotReachHere" %}
2046 ins_encode %{
2047 __ int3();
2048 %}
2049 ins_pipe(pipe_slow);
2050 %}
2051
2052 // ============================================================================
2053
2054 instruct addF_reg(regF dst, regF src) %{
2055 predicate((UseSSE>=1) && (UseAVX == 0));
2056 match(Set dst (AddF dst src));
2057
2058 format %{ "addss $dst, $src" %}
2619 ins_pipe(pipe_slow);
2620 %}
2621
2622 instruct absF_reg(regF dst) %{
2623 predicate((UseSSE>=1) && (UseAVX == 0));
2624 match(Set dst (AbsF dst));
2625 ins_cost(150);
2626 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %}
2627 ins_encode %{
2628 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask()));
2629 %}
2630 ins_pipe(pipe_slow);
2631 %}
2632
2633 instruct absF_reg_reg(regF dst, regF src) %{
2634 predicate(UseAVX > 0);
2635 match(Set dst (AbsF src));
2636 ins_cost(150);
2637 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
2638 ins_encode %{
2639 int vector_len = 0;
2640 __ vandps($dst$$XMMRegister, $src$$XMMRegister,
2641 ExternalAddress(float_signmask()), vector_len);
2642 %}
2643 ins_pipe(pipe_slow);
2644 %}
2645
2646 instruct absD_reg(regD dst) %{
2647 predicate((UseSSE>=2) && (UseAVX == 0));
2648 match(Set dst (AbsD dst));
2649 ins_cost(150);
2650 format %{ "andpd $dst, [0x7fffffffffffffff]\t"
2651 "# abs double by sign masking" %}
2652 ins_encode %{
2653 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask()));
2654 %}
2655 ins_pipe(pipe_slow);
2656 %}
2657
2658 instruct absD_reg_reg(regD dst, regD src) %{
2659 predicate(UseAVX > 0);
2660 match(Set dst (AbsD src));
2661 ins_cost(150);
2662 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t"
2663 "# abs double by sign masking" %}
2664 ins_encode %{
2665 int vector_len = 0;
2666 __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
2667 ExternalAddress(double_signmask()), vector_len);
2668 %}
2669 ins_pipe(pipe_slow);
2670 %}
2671
2672 instruct negF_reg(regF dst) %{
2673 predicate((UseSSE>=1) && (UseAVX == 0));
2674 match(Set dst (NegF dst));
2675 ins_cost(150);
2676 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %}
2677 ins_encode %{
2678 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip()));
2679 %}
2680 ins_pipe(pipe_slow);
2681 %}
2682
2683 instruct negF_reg_reg(regF dst, regF src) %{
2684 predicate(UseAVX > 0);
2685 match(Set dst (NegF src));
2686 ins_cost(150);
2687 format %{ "vxorps $dst, $src, [0x80000000]\t# neg float by sign flipping" %}
2688 ins_encode %{
2689 int vector_len = 0;
2690 __ vxorps($dst$$XMMRegister, $src$$XMMRegister,
2691 ExternalAddress(float_signflip()), vector_len);
2692 %}
2693 ins_pipe(pipe_slow);
2694 %}
2695
2696 instruct negD_reg(regD dst) %{
2697 predicate((UseSSE>=2) && (UseAVX == 0));
2698 match(Set dst (NegD dst));
2699 ins_cost(150);
2700 format %{ "xorpd $dst, [0x8000000000000000]\t"
2701 "# neg double by sign flipping" %}
2702 ins_encode %{
2703 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip()));
2704 %}
2705 ins_pipe(pipe_slow);
2706 %}
2707
2708 instruct negD_reg_reg(regD dst, regD src) %{
2709 predicate(UseAVX > 0);
2710 match(Set dst (NegD src));
2711 ins_cost(150);
2712 format %{ "vxorpd $dst, $src, [0x8000000000000000]\t"
2713 "# neg double by sign flipping" %}
2714 ins_encode %{
2715 int vector_len = 0;
2716 __ vxorpd($dst$$XMMRegister, $src$$XMMRegister,
2717 ExternalAddress(double_signflip()), vector_len);
2718 %}
2719 ins_pipe(pipe_slow);
2720 %}
2721
2722 instruct sqrtF_reg(regF dst, regF src) %{
2723 predicate(UseSSE>=1);
2724 match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
2725
2726 format %{ "sqrtss $dst, $src" %}
2727 ins_cost(150);
2728 ins_encode %{
2729 __ sqrtss($dst$$XMMRegister, $src$$XMMRegister);
2730 %}
2731 ins_pipe(pipe_slow);
2732 %}
2733
2734 instruct sqrtF_mem(regF dst, memory src) %{
2735 predicate(UseSSE>=1);
2736 match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF src)))));
2737
2772
2773 format %{ "sqrtsd $dst, $src" %}
2774 ins_cost(150);
2775 ins_encode %{
2776 __ sqrtsd($dst$$XMMRegister, $src$$Address);
2777 %}
2778 ins_pipe(pipe_slow);
2779 %}
2780
2781 instruct sqrtD_imm(regD dst, immD con) %{
2782 predicate(UseSSE>=2);
2783 match(Set dst (SqrtD con));
2784 format %{ "sqrtsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
2785 ins_cost(150);
2786 ins_encode %{
2787 __ sqrtsd($dst$$XMMRegister, $constantaddress($con));
2788 %}
2789 ins_pipe(pipe_slow);
2790 %}
2791
2792 // ====================VECTOR INSTRUCTIONS=====================================
2793
2794 // Load vectors (4 bytes long)
2795 instruct loadV4(vecS dst, memory mem) %{
2796 predicate(n->as_LoadVector()->memory_size() == 4);
2797 match(Set dst (LoadVector mem));
2798 ins_cost(125);
2799 format %{ "movd $dst,$mem\t! load vector (4 bytes)" %}
2800 ins_encode %{
2801 __ movdl($dst$$XMMRegister, $mem$$Address);
2802 %}
2803 ins_pipe( pipe_slow );
2804 %}
2805
2806 // Load vectors (8 bytes long)
2807 instruct loadV8(vecD dst, memory mem) %{
2808 predicate(n->as_LoadVector()->memory_size() == 8);
2809 match(Set dst (LoadVector mem));
2810 ins_cost(125);
2811 format %{ "movq $dst,$mem\t! load vector (8 bytes)" %}
2822 ins_cost(125);
2823 format %{ "movdqu $dst,$mem\t! load vector (16 bytes)" %}
2824 ins_encode %{
2825 __ movdqu($dst$$XMMRegister, $mem$$Address);
2826 %}
2827 ins_pipe( pipe_slow );
2828 %}
2829
2830 // Load vectors (32 bytes long)
2831 instruct loadV32(vecY dst, memory mem) %{
2832 predicate(n->as_LoadVector()->memory_size() == 32);
2833 match(Set dst (LoadVector mem));
2834 ins_cost(125);
2835 format %{ "vmovdqu $dst,$mem\t! load vector (32 bytes)" %}
2836 ins_encode %{
2837 __ vmovdqu($dst$$XMMRegister, $mem$$Address);
2838 %}
2839 ins_pipe( pipe_slow );
2840 %}
2841
2842 // Load vectors (64 bytes long)
2843 instruct loadV64(vecZ dst, memory mem) %{
2844 predicate(n->as_LoadVector()->memory_size() == 64);
2845 match(Set dst (LoadVector mem));
2846 ins_cost(125);
2847 format %{ "vmovdqu $dst k0,$mem\t! load vector (64 bytes)" %}
2848 ins_encode %{
2849 int vector_len = 2;
2850 __ evmovdqu($dst$$XMMRegister, $mem$$Address, vector_len);
2851 %}
2852 ins_pipe( pipe_slow );
2853 %}
2854
2855 // Store vectors
2856 instruct storeV4(memory mem, vecS src) %{
2857 predicate(n->as_StoreVector()->memory_size() == 4);
2858 match(Set mem (StoreVector mem src));
2859 ins_cost(145);
2860 format %{ "movd $mem,$src\t! store vector (4 bytes)" %}
2861 ins_encode %{
2862 __ movdl($mem$$Address, $src$$XMMRegister);
2863 %}
2864 ins_pipe( pipe_slow );
2865 %}
2866
2867 instruct storeV8(memory mem, vecD src) %{
2868 predicate(n->as_StoreVector()->memory_size() == 8);
2869 match(Set mem (StoreVector mem src));
2870 ins_cost(145);
2871 format %{ "movq $mem,$src\t! store vector (8 bytes)" %}
2872 ins_encode %{
2873 __ movq($mem$$Address, $src$$XMMRegister);
2874 %}
2880 match(Set mem (StoreVector mem src));
2881 ins_cost(145);
2882 format %{ "movdqu $mem,$src\t! store vector (16 bytes)" %}
2883 ins_encode %{
2884 __ movdqu($mem$$Address, $src$$XMMRegister);
2885 %}
2886 ins_pipe( pipe_slow );
2887 %}
2888
2889 instruct storeV32(memory mem, vecY src) %{
2890 predicate(n->as_StoreVector()->memory_size() == 32);
2891 match(Set mem (StoreVector mem src));
2892 ins_cost(145);
2893 format %{ "vmovdqu $mem,$src\t! store vector (32 bytes)" %}
2894 ins_encode %{
2895 __ vmovdqu($mem$$Address, $src$$XMMRegister);
2896 %}
2897 ins_pipe( pipe_slow );
2898 %}
2899
2900 instruct storeV64(memory mem, vecZ src) %{
2901 predicate(n->as_StoreVector()->memory_size() == 64);
2902 match(Set mem (StoreVector mem src));
2903 ins_cost(145);
2904 format %{ "vmovdqu $mem k0,$src\t! store vector (64 bytes)" %}
2905 ins_encode %{
2906 int vector_len = 2;
2907 __ evmovdqu($mem$$Address, $src$$XMMRegister, vector_len);
2908 %}
2909 ins_pipe( pipe_slow );
2910 %}
2911
2912 // Replicate byte scalar to be vector
2913 instruct Repl4B(vecS dst, rRegI src) %{
2914 predicate(n->as_Vector()->length() == 4);
2915 match(Set dst (ReplicateB src));
2916 format %{ "movd $dst,$src\n\t"
2917 "punpcklbw $dst,$dst\n\t"
2918 "pshuflw $dst,$dst,0x00\t! replicate4B" %}
2919 ins_encode %{
2920 __ movdl($dst$$XMMRegister, $src$$Register);
2921 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
2922 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
2923 %}
2924 ins_pipe( pipe_slow );
2925 %}
2926
2927 instruct Repl8B(vecD dst, rRegI src) %{
2928 predicate(n->as_Vector()->length() == 8);
2929 match(Set dst (ReplicateB src));
2930 format %{ "movd $dst,$src\n\t"
2931 "punpcklbw $dst,$dst\n\t"
2955 %}
2956
2957 instruct Repl32B(vecY dst, rRegI src) %{
2958 predicate(n->as_Vector()->length() == 32);
2959 match(Set dst (ReplicateB src));
2960 format %{ "movd $dst,$src\n\t"
2961 "punpcklbw $dst,$dst\n\t"
2962 "pshuflw $dst,$dst,0x00\n\t"
2963 "punpcklqdq $dst,$dst\n\t"
2964 "vinserti128h $dst,$dst,$dst\t! replicate32B" %}
2965 ins_encode %{
2966 __ movdl($dst$$XMMRegister, $src$$Register);
2967 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
2968 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
2969 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2970 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2971 %}
2972 ins_pipe( pipe_slow );
2973 %}
2974
2975 instruct Repl64B(vecZ dst, rRegI src) %{
2976 predicate(n->as_Vector()->length() == 64);
2977 match(Set dst (ReplicateB src));
2978 format %{ "movd $dst,$src\n\t"
2979 "punpcklbw $dst,$dst\n\t"
2980 "pshuflw $dst,$dst,0x00\n\t"
2981 "punpcklqdq $dst,$dst\n\t"
2982 "vinserti128h $dst,$dst,$dst\t! lower replicate32B\n\t"
2983 "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate632B" %}
2984 ins_encode %{
2985 __ movdl($dst$$XMMRegister, $src$$Register);
2986 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
2987 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
2988 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2989 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2990 __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2991 %}
2992 ins_pipe( pipe_slow );
2993 %}
2994
2995 // Replicate byte scalar immediate to be vector by loading from const table.
2996 instruct Repl4B_imm(vecS dst, immI con) %{
2997 predicate(n->as_Vector()->length() == 4);
2998 match(Set dst (ReplicateB con));
2999 format %{ "movdl $dst,[$constantaddress]\t! replicate4B($con)" %}
3000 ins_encode %{
3001 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 1)));
3002 %}
3003 ins_pipe( pipe_slow );
3004 %}
3005
3006 instruct Repl8B_imm(vecD dst, immI con) %{
3007 predicate(n->as_Vector()->length() == 8);
3008 match(Set dst (ReplicateB con));
3009 format %{ "movq $dst,[$constantaddress]\t! replicate8B($con)" %}
3010 ins_encode %{
3011 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
3012 %}
3013 ins_pipe( pipe_slow );
3014 %}
3022 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
3023 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3024 %}
3025 ins_pipe( pipe_slow );
3026 %}
3027
3028 instruct Repl32B_imm(vecY dst, immI con) %{
3029 predicate(n->as_Vector()->length() == 32);
3030 match(Set dst (ReplicateB con));
3031 format %{ "movq $dst,[$constantaddress]\n\t"
3032 "punpcklqdq $dst,$dst\n\t"
3033 "vinserti128h $dst,$dst,$dst\t! lreplicate32B($con)" %}
3034 ins_encode %{
3035 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
3036 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3037 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3038 %}
3039 ins_pipe( pipe_slow );
3040 %}
3041
3042 instruct Repl64B_imm(vecZ dst, immI con) %{
3043 predicate(n->as_Vector()->length() == 64);
3044 match(Set dst (ReplicateB con));
3045 format %{ "movq $dst,[$constantaddress]\n\t"
3046 "punpcklqdq $dst,$dst\n\t"
3047 "vinserti128h $dst,$dst,$dst\t! lower replicate32B($con)\n\t"
3048 "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate32B($con)" %}
3049 ins_encode %{
3050 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
3051 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3052 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3053 __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3054 %}
3055 ins_pipe( pipe_slow );
3056 %}
3057
3058 // Replicate byte scalar zero to be vector
3059 instruct Repl4B_zero(vecS dst, immI0 zero) %{
3060 predicate(n->as_Vector()->length() == 4);
3061 match(Set dst (ReplicateB zero));
3062 format %{ "pxor $dst,$dst\t! replicate4B zero" %}
3063 ins_encode %{
3064 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
3065 %}
3066 ins_pipe( fpu_reg_reg );
3067 %}
3068
3069 instruct Repl8B_zero(vecD dst, immI0 zero) %{
3070 predicate(n->as_Vector()->length() == 8);
3071 match(Set dst (ReplicateB zero));
3072 format %{ "pxor $dst,$dst\t! replicate8B zero" %}
3073 ins_encode %{
3074 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
3075 %}
3076 ins_pipe( fpu_reg_reg );
3077 %}
3078
3079 instruct Repl16B_zero(vecX dst, immI0 zero) %{
3080 predicate(n->as_Vector()->length() == 16);
3081 match(Set dst (ReplicateB zero));
3082 format %{ "pxor $dst,$dst\t! replicate16B zero" %}
3083 ins_encode %{
3084 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
3085 %}
3086 ins_pipe( fpu_reg_reg );
3087 %}
3088
3089 instruct Repl32B_zero(vecY dst, immI0 zero) %{
3090 predicate(n->as_Vector()->length() == 32);
3091 match(Set dst (ReplicateB zero));
3092 format %{ "vpxor $dst,$dst,$dst\t! replicate32B zero" %}
3093 ins_encode %{
3094 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
3095 int vector_len = 1;
3096 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len);
3097 %}
3098 ins_pipe( fpu_reg_reg );
3099 %}
3100
3101 instruct Repl64B_zero(vecZ dst, immI0 zero) %{
3102 predicate(n->as_Vector()->length() == 64);
3103 match(Set dst (ReplicateB zero));
3104 format %{ "vpxor $dst k0,$dst,$dst\t! replicate64B zero" %}
3105 ins_encode %{
3106 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it).
3107 int vector_len = 2;
3108 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len);
3109 %}
3110 ins_pipe( fpu_reg_reg );
3111 %}
3112
3113 // Replicate char/short (2 byte) scalar to be vector
3114 instruct Repl2S(vecS dst, rRegI src) %{
3115 predicate(n->as_Vector()->length() == 2);
3116 match(Set dst (ReplicateS src));
3117 format %{ "movd $dst,$src\n\t"
3118 "pshuflw $dst,$dst,0x00\t! replicate2S" %}
3119 ins_encode %{
3120 __ movdl($dst$$XMMRegister, $src$$Register);
3121 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3122 %}
3123 ins_pipe( fpu_reg_reg );
3124 %}
3125
3126 instruct Repl4S(vecD dst, rRegI src) %{
3127 predicate(n->as_Vector()->length() == 4);
3128 match(Set dst (ReplicateS src));
3148 %}
3149 ins_pipe( pipe_slow );
3150 %}
3151
3152 instruct Repl16S(vecY dst, rRegI src) %{
3153 predicate(n->as_Vector()->length() == 16);
3154 match(Set dst (ReplicateS src));
3155 format %{ "movd $dst,$src\n\t"
3156 "pshuflw $dst,$dst,0x00\n\t"
3157 "punpcklqdq $dst,$dst\n\t"
3158 "vinserti128h $dst,$dst,$dst\t! replicate16S" %}
3159 ins_encode %{
3160 __ movdl($dst$$XMMRegister, $src$$Register);
3161 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3162 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3163 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3164 %}
3165 ins_pipe( pipe_slow );
3166 %}
3167
3168 instruct Repl32S(vecZ dst, rRegI src) %{
3169 predicate(n->as_Vector()->length() == 32);
3170 match(Set dst (ReplicateS src));
3171 format %{ "movd $dst,$src\n\t"
3172 "pshuflw $dst,$dst,0x00\n\t"
3173 "punpcklqdq $dst,$dst\n\t"
3174 "vinserti128h $dst,$dst,$dst\t! lower replicate16S\n\t"
3175 "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate16S" %}
3176 ins_encode %{
3177 __ movdl($dst$$XMMRegister, $src$$Register);
3178 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3179 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3180 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3181 __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3182 %}
3183 ins_pipe( pipe_slow );
3184 %}
3185
3186 // Replicate char/short (2 byte) scalar immediate to be vector by loading from const table.
3187 instruct Repl2S_imm(vecS dst, immI con) %{
3188 predicate(n->as_Vector()->length() == 2);
3189 match(Set dst (ReplicateS con));
3190 format %{ "movdl $dst,[$constantaddress]\t! replicate2S($con)" %}
3191 ins_encode %{
3192 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 2)));
3193 %}
3194 ins_pipe( fpu_reg_reg );
3195 %}
3196
3197 instruct Repl4S_imm(vecD dst, immI con) %{
3198 predicate(n->as_Vector()->length() == 4);
3199 match(Set dst (ReplicateS con));
3200 format %{ "movq $dst,[$constantaddress]\t! replicate4S($con)" %}
3201 ins_encode %{
3202 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
3203 %}
3204 ins_pipe( fpu_reg_reg );
3205 %}
3213 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
3214 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3215 %}
3216 ins_pipe( pipe_slow );
3217 %}
3218
3219 instruct Repl16S_imm(vecY dst, immI con) %{
3220 predicate(n->as_Vector()->length() == 16);
3221 match(Set dst (ReplicateS con));
3222 format %{ "movq $dst,[$constantaddress]\n\t"
3223 "punpcklqdq $dst,$dst\n\t"
3224 "vinserti128h $dst,$dst,$dst\t! replicate16S($con)" %}
3225 ins_encode %{
3226 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
3227 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3228 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3229 %}
3230 ins_pipe( pipe_slow );
3231 %}
3232
3233 instruct Repl32S_imm(vecZ dst, immI con) %{
3234 predicate(n->as_Vector()->length() == 32);
3235 match(Set dst (ReplicateS con));
3236 format %{ "movq $dst,[$constantaddress]\n\t"
3237 "punpcklqdq $dst,$dst\n\t"
3238 "vinserti128h $dst,$dst,$dst\t! lower replicate16S($con)\n\t"
3239 "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate16S($con)" %}
3240 ins_encode %{
3241 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
3242 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3243 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3244 __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3245 %}
3246 ins_pipe( pipe_slow );
3247 %}
3248
3249 // Replicate char/short (2 byte) scalar zero to be vector
3250 instruct Repl2S_zero(vecS dst, immI0 zero) %{
3251 predicate(n->as_Vector()->length() == 2);
3252 match(Set dst (ReplicateS zero));
3253 format %{ "pxor $dst,$dst\t! replicate2S zero" %}
3254 ins_encode %{
3255 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
3256 %}
3257 ins_pipe( fpu_reg_reg );
3258 %}
3259
3260 instruct Repl4S_zero(vecD dst, immI0 zero) %{
3261 predicate(n->as_Vector()->length() == 4);
3262 match(Set dst (ReplicateS zero));
3263 format %{ "pxor $dst,$dst\t! replicate4S zero" %}
3264 ins_encode %{
3265 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
3266 %}
3267 ins_pipe( fpu_reg_reg );
3268 %}
3269
3270 instruct Repl8S_zero(vecX dst, immI0 zero) %{
3271 predicate(n->as_Vector()->length() == 8);
3272 match(Set dst (ReplicateS zero));
3273 format %{ "pxor $dst,$dst\t! replicate8S zero" %}
3274 ins_encode %{
3275 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
3276 %}
3277 ins_pipe( fpu_reg_reg );
3278 %}
3279
3280 instruct Repl16S_zero(vecY dst, immI0 zero) %{
3281 predicate(n->as_Vector()->length() == 16);
3282 match(Set dst (ReplicateS zero));
3283 format %{ "vpxor $dst,$dst,$dst\t! replicate16S zero" %}
3284 ins_encode %{
3285 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
3286 int vector_len = 1;
3287 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len);
3288 %}
3289 ins_pipe( fpu_reg_reg );
3290 %}
3291
3292 instruct Repl32S_zero(vecZ dst, immI0 zero) %{
3293 predicate(n->as_Vector()->length() == 32);
3294 match(Set dst (ReplicateS zero));
3295 format %{ "vpxor $dst k0,$dst,$dst\t! replicate32S zero" %}
3296 ins_encode %{
3297 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it).
3298 int vector_len = 2;
3299 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len);
3300 %}
3301 ins_pipe( fpu_reg_reg );
3302 %}
3303
3304 // Replicate integer (4 byte) scalar to be vector
3305 instruct Repl2I(vecD dst, rRegI src) %{
3306 predicate(n->as_Vector()->length() == 2);
3307 match(Set dst (ReplicateI src));
3308 format %{ "movd $dst,$src\n\t"
3309 "pshufd $dst,$dst,0x00\t! replicate2I" %}
3310 ins_encode %{
3311 __ movdl($dst$$XMMRegister, $src$$Register);
3312 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3313 %}
3314 ins_pipe( fpu_reg_reg );
3315 %}
3316
3317 instruct Repl4I(vecX dst, rRegI src) %{
3318 predicate(n->as_Vector()->length() == 4);
3319 match(Set dst (ReplicateI src));
3323 __ movdl($dst$$XMMRegister, $src$$Register);
3324 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3325 %}
3326 ins_pipe( pipe_slow );
3327 %}
3328
3329 instruct Repl8I(vecY dst, rRegI src) %{
3330 predicate(n->as_Vector()->length() == 8);
3331 match(Set dst (ReplicateI src));
3332 format %{ "movd $dst,$src\n\t"
3333 "pshufd $dst,$dst,0x00\n\t"
3334 "vinserti128h $dst,$dst,$dst\t! replicate8I" %}
3335 ins_encode %{
3336 __ movdl($dst$$XMMRegister, $src$$Register);
3337 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3338 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3339 %}
3340 ins_pipe( pipe_slow );
3341 %}
3342
3343 instruct Repl16I(vecZ dst, rRegI src) %{
3344 predicate(n->as_Vector()->length() == 16);
3345 match(Set dst (ReplicateI src));
3346 format %{ "movd $dst,$src\n\t"
3347 "pshufd $dst,$dst,0x00\n\t"
3348 "vinserti128h $dst,$dst,$dst\t! lower replicate8I\n\t"
3349 "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate8I" %}
3350 ins_encode %{
3351 __ movdl($dst$$XMMRegister, $src$$Register);
3352 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3353 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3354 __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3355 %}
3356 ins_pipe( pipe_slow );
3357 %}
3358
3359 // Replicate integer (4 byte) scalar immediate to be vector by loading from const table.
3360 instruct Repl2I_imm(vecD dst, immI con) %{
3361 predicate(n->as_Vector()->length() == 2);
3362 match(Set dst (ReplicateI con));
3363 format %{ "movq $dst,[$constantaddress]\t! replicate2I($con)" %}
3364 ins_encode %{
3365 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
3366 %}
3367 ins_pipe( fpu_reg_reg );
3368 %}
3369
3370 instruct Repl4I_imm(vecX dst, immI con) %{
3371 predicate(n->as_Vector()->length() == 4);
3372 match(Set dst (ReplicateI con));
3373 format %{ "movq $dst,[$constantaddress]\t! replicate4I($con)\n\t"
3374 "punpcklqdq $dst,$dst" %}
3375 ins_encode %{
3376 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
3377 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3378 %}
3379 ins_pipe( pipe_slow );
3380 %}
3381
3382 instruct Repl8I_imm(vecY dst, immI con) %{
3383 predicate(n->as_Vector()->length() == 8);
3384 match(Set dst (ReplicateI con));
3385 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t"
3386 "punpcklqdq $dst,$dst\n\t"
3387 "vinserti128h $dst,$dst,$dst" %}
3388 ins_encode %{
3389 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
3390 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3391 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3392 %}
3393 ins_pipe( pipe_slow );
3394 %}
3395
3396 instruct Repl16I_imm(vecZ dst, immI con) %{
3397 predicate(n->as_Vector()->length() == 16);
3398 match(Set dst (ReplicateI con));
3399 format %{ "movq $dst,[$constantaddress]\t! replicate16I($con)\n\t"
3400 "punpcklqdq $dst,$dst\n\t"
3401 "vinserti128h $dst,$dst,$dst\n\t"
3402 "vinserti64x4h $dst k0,$dst,$dst" %}
3403 ins_encode %{
3404 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
3405 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3406 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3407 __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3408 %}
3409 ins_pipe( pipe_slow );
3410 %}
3411
3412 // Integer could be loaded into xmm register directly from memory.
3413 instruct Repl2I_mem(vecD dst, memory mem) %{
3414 predicate(n->as_Vector()->length() == 2);
3415 match(Set dst (ReplicateI (LoadI mem)));
3416 format %{ "movd $dst,$mem\n\t"
3417 "pshufd $dst,$dst,0x00\t! replicate2I" %}
3418 ins_encode %{
3419 __ movdl($dst$$XMMRegister, $mem$$Address);
3420 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3421 %}
3422 ins_pipe( fpu_reg_reg );
3423 %}
3424
3425 instruct Repl4I_mem(vecX dst, memory mem) %{
3426 predicate(n->as_Vector()->length() == 4);
3427 match(Set dst (ReplicateI (LoadI mem)));
3428 format %{ "movd $dst,$mem\n\t"
3429 "pshufd $dst,$dst,0x00\t! replicate4I" %}
3430 ins_encode %{
3431 __ movdl($dst$$XMMRegister, $mem$$Address);
3432 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3433 %}
3434 ins_pipe( pipe_slow );
3435 %}
3436
3437 instruct Repl8I_mem(vecY dst, memory mem) %{
3438 predicate(n->as_Vector()->length() == 8);
3439 match(Set dst (ReplicateI (LoadI mem)));
3440 format %{ "movd $dst,$mem\n\t"
3441 "pshufd $dst,$dst,0x00\n\t"
3442 "vinserti128h $dst,$dst,$dst\t! replicate8I" %}
3443 ins_encode %{
3444 __ movdl($dst$$XMMRegister, $mem$$Address);
3445 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3446 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3447 %}
3448 ins_pipe( pipe_slow );
3449 %}
3450
3451 instruct Repl16I_mem(vecZ dst, memory mem) %{
3452 predicate(n->as_Vector()->length() == 16);
3453 match(Set dst (ReplicateI (LoadI mem)));
3454 format %{ "movd $dst,$mem\n\t"
3455 "pshufd $dst,$dst,0x00\n\t"
3456 "vinserti128h $dst,$dst,$dst\t! lower replicate8I\n\t"
3457 "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate8I" %}
3458 ins_encode %{
3459 __ movdl($dst$$XMMRegister, $mem$$Address);
3460 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3461 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3462 __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3463 %}
3464 ins_pipe( pipe_slow );
3465 %}
3466
3467 // Replicate integer (4 byte) scalar zero to be vector
3468 instruct Repl2I_zero(vecD dst, immI0 zero) %{
3469 predicate(n->as_Vector()->length() == 2);
3470 match(Set dst (ReplicateI zero));
3471 format %{ "pxor $dst,$dst\t! replicate2I" %}
3472 ins_encode %{
3473 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
3474 %}
3475 ins_pipe( fpu_reg_reg );
3476 %}
3477
3478 instruct Repl4I_zero(vecX dst, immI0 zero) %{
3479 predicate(n->as_Vector()->length() == 4);
3480 match(Set dst (ReplicateI zero));
3481 format %{ "pxor $dst,$dst\t! replicate4I zero)" %}
3482 ins_encode %{
3483 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
3484 %}
3485 ins_pipe( fpu_reg_reg );
3486 %}
3487
3488 instruct Repl8I_zero(vecY dst, immI0 zero) %{
3489 predicate(n->as_Vector()->length() == 8);
3490 match(Set dst (ReplicateI zero));
3491 format %{ "vpxor $dst,$dst,$dst\t! replicate8I zero" %}
3492 ins_encode %{
3493 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
3494 int vector_len = 1;
3495 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len);
3496 %}
3497 ins_pipe( fpu_reg_reg );
3498 %}
3499
3500 instruct Repl16I_zero(vecZ dst, immI0 zero) %{
3501 predicate(n->as_Vector()->length() == 16);
3502 match(Set dst (ReplicateI zero));
3503 format %{ "vpxor $dst k0,$dst,$dst\t! replicate16I zero" %}
3504 ins_encode %{
3505 // Use vxorpd since AVX does not have vpxor for 512-bit (AVX2 will have it).
3506 int vector_len = 2;
3507 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len);
3508 %}
3509 ins_pipe( fpu_reg_reg );
3510 %}
3511
3512 // Replicate long (8 byte) scalar to be vector
3513 #ifdef _LP64
3514 instruct Repl2L(vecX dst, rRegL src) %{
3515 predicate(n->as_Vector()->length() == 2);
3516 match(Set dst (ReplicateL src));
3517 format %{ "movdq $dst,$src\n\t"
3518 "punpcklqdq $dst,$dst\t! replicate2L" %}
3519 ins_encode %{
3520 __ movdq($dst$$XMMRegister, $src$$Register);
3521 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3522 %}
3523 ins_pipe( pipe_slow );
3524 %}
3525
3526 instruct Repl4L(vecY dst, rRegL src) %{
3527 predicate(n->as_Vector()->length() == 4);
3528 match(Set dst (ReplicateL src));
3529 format %{ "movdq $dst,$src\n\t"
3530 "punpcklqdq $dst,$dst\n\t"
3531 "vinserti128h $dst,$dst,$dst\t! replicate4L" %}
3532 ins_encode %{
3533 __ movdq($dst$$XMMRegister, $src$$Register);
3534 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3535 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3536 %}
3537 ins_pipe( pipe_slow );
3538 %}
3539
3540 instruct Repl8L(vecZ dst, rRegL src) %{
3541 predicate(n->as_Vector()->length() == 8);
3542 match(Set dst (ReplicateL src));
3543 format %{ "movdq $dst,$src\n\t"
3544 "punpcklqdq $dst,$dst\n\t"
3545 "vinserti128h $dst,$dst,$dst\t! lower replicate4L\n\t"
3546 "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate4L" %}
3547 ins_encode %{
3548 __ movdq($dst$$XMMRegister, $src$$Register);
3549 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3550 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3551 __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3552 %}
3553 ins_pipe( pipe_slow );
3554 %}
3555 #else // _LP64
3556 instruct Repl2L(vecX dst, eRegL src, regD tmp) %{
3557 predicate(n->as_Vector()->length() == 2);
3558 match(Set dst (ReplicateL src));
3559 effect(TEMP dst, USE src, TEMP tmp);
3560 format %{ "movdl $dst,$src.lo\n\t"
3561 "movdl $tmp,$src.hi\n\t"
3562 "punpckldq $dst,$tmp\n\t"
3563 "punpcklqdq $dst,$dst\t! replicate2L"%}
3564 ins_encode %{
3565 __ movdl($dst$$XMMRegister, $src$$Register);
3566 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
3567 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
3568 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3569 %}
3570 ins_pipe( pipe_slow );
3571 %}
3572
3573 instruct Repl4L(vecY dst, eRegL src, regD tmp) %{
3574 predicate(n->as_Vector()->length() == 4);
3575 match(Set dst (ReplicateL src));
3576 effect(TEMP dst, USE src, TEMP tmp);
3577 format %{ "movdl $dst,$src.lo\n\t"
3578 "movdl $tmp,$src.hi\n\t"
3579 "punpckldq $dst,$tmp\n\t"
3580 "punpcklqdq $dst,$dst\n\t"
3581 "vinserti128h $dst,$dst,$dst\t! replicate4L" %}
3582 ins_encode %{
3583 __ movdl($dst$$XMMRegister, $src$$Register);
3584 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
3585 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
3586 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3587 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3588 %}
3589 ins_pipe( pipe_slow );
3590 %}
3591
3592 instruct Repl8L(vecZ dst, eRegL src, regD tmp) %{
3593 predicate(n->as_Vector()->length() == 4);
3594 match(Set dst (ReplicateL src));
3595 effect(TEMP dst, USE src, TEMP tmp);
3596 format %{ "movdl $dst,$src.lo\n\t"
3597 "movdl $tmp,$src.hi\n\t"
3598 "punpckldq $dst,$tmp\n\t"
3599 "vinserti128h $dst,$dst,$dst\t! lower replicate4L\n\t"
3600 "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate4L" %}
3601 ins_encode %{
3602 __ movdl($dst$$XMMRegister, $src$$Register);
3603 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
3604 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
3605 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3606 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3607 __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3608 %}
3609 ins_pipe( pipe_slow );
3610 %}
3611 #endif // _LP64
3612
3613 // Replicate long (8 byte) scalar immediate to be vector by loading from const table.
3614 instruct Repl2L_imm(vecX dst, immL con) %{
3615 predicate(n->as_Vector()->length() == 2);
3616 match(Set dst (ReplicateL con));
3617 format %{ "movq $dst,[$constantaddress]\n\t"
3618 "punpcklqdq $dst,$dst\t! replicate2L($con)" %}
3619 ins_encode %{
3620 __ movq($dst$$XMMRegister, $constantaddress($con));
3621 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3622 %}
3623 ins_pipe( pipe_slow );
3624 %}
3625
3626 instruct Repl4L_imm(vecY dst, immL con) %{
3627 predicate(n->as_Vector()->length() == 4);
3628 match(Set dst (ReplicateL con));
3629 format %{ "movq $dst,[$constantaddress]\n\t"
3630 "punpcklqdq $dst,$dst\n\t"
3631 "vinserti128h $dst,$dst,$dst\t! replicate4L($con)" %}
3632 ins_encode %{
3633 __ movq($dst$$XMMRegister, $constantaddress($con));
3634 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3635 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3636 %}
3637 ins_pipe( pipe_slow );
3638 %}
3639
3640 instruct Repl8L_imm(vecZ dst, immL con) %{
3641 predicate(n->as_Vector()->length() == 8);
3642 match(Set dst (ReplicateL con));
3643 format %{ "movq $dst,[$constantaddress]\n\t"
3644 "punpcklqdq $dst,$dst\n\t"
3645 "vinserti128h $dst,$dst,$dst\t! lower replicate4L($con)\n\t"
3646 "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate4L($con)" %}
3647 ins_encode %{
3648 __ movq($dst$$XMMRegister, $constantaddress($con));
3649 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3650 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3651 __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3652 %}
3653 ins_pipe( pipe_slow );
3654 %}
3655
3656 // Long could be loaded into xmm register directly from memory.
3657 instruct Repl2L_mem(vecX dst, memory mem) %{
3658 predicate(n->as_Vector()->length() == 2);
3659 match(Set dst (ReplicateL (LoadL mem)));
3660 format %{ "movq $dst,$mem\n\t"
3661 "punpcklqdq $dst,$dst\t! replicate2L" %}
3662 ins_encode %{
3663 __ movq($dst$$XMMRegister, $mem$$Address);
3664 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3665 %}
3666 ins_pipe( pipe_slow );
3667 %}
3668
3669 instruct Repl4L_mem(vecY dst, memory mem) %{
3670 predicate(n->as_Vector()->length() == 4);
3671 match(Set dst (ReplicateL (LoadL mem)));
3672 format %{ "movq $dst,$mem\n\t"
3673 "punpcklqdq $dst,$dst\n\t"
3674 "vinserti128h $dst,$dst,$dst\t! replicate4L" %}
3675 ins_encode %{
3676 __ movq($dst$$XMMRegister, $mem$$Address);
3677 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3678 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3679 %}
3680 ins_pipe( pipe_slow );
3681 %}
3682
3683 instruct Repl8L_mem(vecZ dst, memory mem) %{
3684 predicate(n->as_Vector()->length() == 8);
3685 match(Set dst (ReplicateL (LoadL mem)));
3686 format %{ "movq $dst,$mem\n\t"
3687 "punpcklqdq $dst,$dst\n\t"
3688 "vinserti128h $dst,$dst,$dst\t! lower replicate4L\n\t"
3689 "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate4L" %}
3690 ins_encode %{
3691 __ movq($dst$$XMMRegister, $mem$$Address);
3692 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3693 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3694 __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3695 %}
3696 ins_pipe( pipe_slow );
3697 %}
3698
3699 // Replicate long (8 byte) scalar zero to be vector
3700 instruct Repl2L_zero(vecX dst, immL0 zero) %{
3701 predicate(n->as_Vector()->length() == 2);
3702 match(Set dst (ReplicateL zero));
3703 format %{ "pxor $dst,$dst\t! replicate2L zero" %}
3704 ins_encode %{
3705 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
3706 %}
3707 ins_pipe( fpu_reg_reg );
3708 %}
3709
3710 instruct Repl4L_zero(vecY dst, immL0 zero) %{
3711 predicate(n->as_Vector()->length() == 4);
3712 match(Set dst (ReplicateL zero));
3713 format %{ "vpxor $dst,$dst,$dst\t! replicate4L zero" %}
3714 ins_encode %{
3715 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
3716 int vector_len = 1;
3717 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len);
3718 %}
3719 ins_pipe( fpu_reg_reg );
3720 %}
3721
3722 instruct Repl8L_zero(vecZ dst, immL0 zero) %{
3723 predicate(n->as_Vector()->length() == 8);
3724 match(Set dst (ReplicateL zero));
3725 format %{ "vpxor $dst k0,$dst,$dst\t! replicate8L zero" %}
3726 ins_encode %{
3727 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it).
3728 int vector_len = 2;
3729 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len);
3730 %}
3731 ins_pipe( fpu_reg_reg );
3732 %}
3733
3734 // Replicate float (4 byte) scalar to be vector
3735 instruct Repl2F(vecD dst, regF src) %{
3736 predicate(n->as_Vector()->length() == 2);
3737 match(Set dst (ReplicateF src));
3738 format %{ "pshufd $dst,$dst,0x00\t! replicate2F" %}
3739 ins_encode %{
3740 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
3741 %}
3742 ins_pipe( fpu_reg_reg );
3743 %}
3744
3745 instruct Repl4F(vecX dst, regF src) %{
3746 predicate(n->as_Vector()->length() == 4);
3747 match(Set dst (ReplicateF src));
3748 format %{ "pshufd $dst,$dst,0x00\t! replicate4F" %}
3749 ins_encode %{
3750 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
3751 %}
3752 ins_pipe( pipe_slow );
3753 %}
3754
3755 instruct Repl8F(vecY dst, regF src) %{
3756 predicate(n->as_Vector()->length() == 8);
3757 match(Set dst (ReplicateF src));
3758 format %{ "pshufd $dst,$src,0x00\n\t"
3759 "vinsertf128h $dst,$dst,$dst\t! replicate8F" %}
3760 ins_encode %{
3761 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
3762 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3763 %}
3764 ins_pipe( pipe_slow );
3765 %}
3766
3767 instruct Repl16F(vecZ dst, regF src) %{
3768 predicate(n->as_Vector()->length() == 16);
3769 match(Set dst (ReplicateF src));
3770 format %{ "pshufd $dst,$src,0x00\n\t"
3771 "vinsertf128h $dst,$dst,$dst\t! lower replicate8F\n\t"
3772 "vinsertf64x4h $dst k0,$dst,$dst\t! lower replicate8F" %}
3773 ins_encode %{
3774 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
3775 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3776 __ vinsertf64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3777 %}
3778 ins_pipe( pipe_slow );
3779 %}
3780
3781 // Replicate float (4 byte) scalar zero to be vector
3782 instruct Repl2F_zero(vecD dst, immF0 zero) %{
3783 predicate(n->as_Vector()->length() == 2);
3784 match(Set dst (ReplicateF zero));
3785 format %{ "xorps $dst,$dst\t! replicate2F zero" %}
3786 ins_encode %{
3787 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
3788 %}
3789 ins_pipe( fpu_reg_reg );
3790 %}
3791
3792 instruct Repl4F_zero(vecX dst, immF0 zero) %{
3793 predicate(n->as_Vector()->length() == 4);
3794 match(Set dst (ReplicateF zero));
3795 format %{ "xorps $dst,$dst\t! replicate4F zero" %}
3796 ins_encode %{
3797 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
3798 %}
3799 ins_pipe( fpu_reg_reg );
3800 %}
3801
3802 instruct Repl8F_zero(vecY dst, immF0 zero) %{
3803 predicate(n->as_Vector()->length() == 8);
3804 match(Set dst (ReplicateF zero));
3805 format %{ "vxorps $dst,$dst,$dst\t! replicate8F zero" %}
3806 ins_encode %{
3807 int vector_len = 1;
3808 __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len);
3809 %}
3810 ins_pipe( fpu_reg_reg );
3811 %}
3812
3813 instruct Repl16F_zero(vecZ dst, immF0 zero) %{
3814 predicate(n->as_Vector()->length() == 16);
3815 match(Set dst (ReplicateF zero));
3816 format %{ "vxorps $dst k0,$dst,$dst\t! replicate16F zero" %}
3817 ins_encode %{
3818 int vector_len = 2;
3819 __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len);
3820 %}
3821 ins_pipe( fpu_reg_reg );
3822 %}
3823
3824 // Replicate double (8 bytes) scalar to be vector
3825 instruct Repl2D(vecX dst, regD src) %{
3826 predicate(n->as_Vector()->length() == 2);
3827 match(Set dst (ReplicateD src));
3828 format %{ "pshufd $dst,$src,0x44\t! replicate2D" %}
3829 ins_encode %{
3830 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
3831 %}
3832 ins_pipe( pipe_slow );
3833 %}
3834
3835 instruct Repl4D(vecY dst, regD src) %{
3836 predicate(n->as_Vector()->length() == 4);
3837 match(Set dst (ReplicateD src));
3838 format %{ "pshufd $dst,$src,0x44\n\t"
3839 "vinsertf128h $dst,$dst,$dst\t! replicate4D" %}
3840 ins_encode %{
3841 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
3842 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3843 %}
3844 ins_pipe( pipe_slow );
3845 %}
3846
3847 instruct Repl8D(vecZ dst, regD src) %{
3848 predicate(n->as_Vector()->length() == 8);
3849 match(Set dst (ReplicateD src));
3850 format %{ "pshufd $dst,$src,0x44\n\t"
3851 "vinsertf128h $dst,$dst,$dst\t! lower replicate4D\n\t"
3852 "vinsertf64x4h $dst k0,$dst,$dst\t! upper replicate4D" %}
3853 ins_encode %{
3854 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
3855 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3856 __ vinsertf64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3857 %}
3858 ins_pipe( pipe_slow );
3859 %}
3860
3861 // Replicate double (8 byte) scalar zero to be vector
3862 instruct Repl2D_zero(vecX dst, immD0 zero) %{
3863 predicate(n->as_Vector()->length() == 2);
3864 match(Set dst (ReplicateD zero));
3865 format %{ "xorpd $dst,$dst\t! replicate2D zero" %}
3866 ins_encode %{
3867 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister);
3868 %}
3869 ins_pipe( fpu_reg_reg );
3870 %}
3871
3872 instruct Repl4D_zero(vecY dst, immD0 zero) %{
3873 predicate(n->as_Vector()->length() == 4);
3874 match(Set dst (ReplicateD zero));
3875 format %{ "vxorpd $dst,$dst,$dst,vect256\t! replicate4D zero" %}
3876 ins_encode %{
3877 int vector_len = 1;
3878 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len);
3879 %}
3880 ins_pipe( fpu_reg_reg );
3881 %}
3882
3883 instruct Repl8D_zero(vecZ dst, immD0 zero) %{
3884 predicate(n->as_Vector()->length() == 8);
3885 match(Set dst (ReplicateD zero));
3886 format %{ "vxorpd $dst k0,$dst,$dst,vect512\t! replicate8D zero" %}
3887 ins_encode %{
3888 int vector_len = 2;
3889 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len);
3890 %}
3891 ins_pipe( fpu_reg_reg );
3892 %}
3893
3894 // ====================REDUCTION ARITHMETIC=======================================
3895
3896 instruct rsadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{
3897 predicate(UseSSE > 2 && UseAVX == 0);
3898 match(Set dst (AddReductionVI src1 src2));
3899 effect(TEMP tmp2, TEMP tmp);
3900 format %{ "movdqu $tmp2,$src2\n\t"
3901 "phaddd $tmp2,$tmp2\n\t"
3902 "movd $tmp,$src1\n\t"
3903 "paddd $tmp,$tmp2\n\t"
3904 "movd $dst,$tmp\t! add reduction2I" %}
3905 ins_encode %{
3906 __ movdqu($tmp2$$XMMRegister, $src2$$XMMRegister);
3907 __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister);
3908 __ movdl($tmp$$XMMRegister, $src1$$Register);
3909 __ paddd($tmp$$XMMRegister, $tmp2$$XMMRegister);
3910 __ movdl($dst$$Register, $tmp$$XMMRegister);
3911 %}
3912 ins_pipe( pipe_slow );
3913 %}
3914
3915 instruct rvadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{
3916 predicate(UseAVX > 0 && UseAVX < 3);
3917 match(Set dst (AddReductionVI src1 src2));
3918 effect(TEMP tmp, TEMP tmp2);
3919 format %{ "vphaddd $tmp,$src2,$src2\n\t"
3920 "movd $tmp2,$src1\n\t"
3921 "vpaddd $tmp2,$tmp2,$tmp\n\t"
3922 "movd $dst,$tmp2\t! add reduction2I" %}
3923 ins_encode %{
3924 int vector_len = 0;
3925 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len);
3926 __ movdl($tmp2$$XMMRegister, $src1$$Register);
3927 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len);
3928 __ movdl($dst$$Register, $tmp2$$XMMRegister);
3929 %}
3930 ins_pipe( pipe_slow );
3931 %}
3932
3933 instruct rvadd2I_reduction_reg_evex(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{
3934 predicate(UseAVX > 2);
3935 match(Set dst (AddReductionVI src1 src2));
3936 effect(TEMP tmp, TEMP tmp2);
3937 format %{ "pshufd $tmp2,$src2,0x1\n\t"
3938 "vpaddd $tmp,$src2,$tmp2\n\t"
3939 "movd $tmp2,$src1\n\t"
3940 "vpaddd $tmp2,$tmp,$tmp2\n\t"
3941 "movd $dst,$tmp2\t! add reduction2I" %}
3942 ins_encode %{
3943 int vector_len = 0;
3944 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
3945 __ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len);
3946 __ movdl($tmp2$$XMMRegister, $src1$$Register);
3947 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
3948 __ movdl($dst$$Register, $tmp2$$XMMRegister);
3949 %}
3950 ins_pipe( pipe_slow );
3951 %}
3952
3953 instruct rsadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{
3954 predicate(UseSSE > 2 && UseAVX == 0);
3955 match(Set dst (AddReductionVI src1 src2));
3956 effect(TEMP tmp2, TEMP tmp);
3957 format %{ "movdqu $tmp2,$src2\n\t"
3958 "phaddd $tmp2,$tmp2\n\t"
3959 "phaddd $tmp2,$tmp2\n\t"
3960 "movd $tmp,$src1\n\t"
3961 "paddd $tmp,$tmp2\n\t"
3962 "movd $dst,$tmp\t! add reduction4I" %}
3963 ins_encode %{
3964 __ movdqu($tmp2$$XMMRegister, $src2$$XMMRegister);
3965 __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister);
3966 __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister);
3967 __ movdl($tmp$$XMMRegister, $src1$$Register);
3968 __ paddd($tmp$$XMMRegister, $tmp2$$XMMRegister);
3969 __ movdl($dst$$Register, $tmp$$XMMRegister);
3970 %}
3971 ins_pipe( pipe_slow );
3972 %}
3973
3974 instruct rvadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{
3975 predicate(UseAVX > 0 && UseAVX < 3);
3976 match(Set dst (AddReductionVI src1 src2));
3977 effect(TEMP tmp, TEMP tmp2);
3978 format %{ "vphaddd $tmp,$src2,$src2\n\t"
3979 "vphaddd $tmp,$tmp,$tmp2\n\t"
3980 "movd $tmp2,$src1\n\t"
3981 "vpaddd $tmp2,$tmp2,$tmp\n\t"
3982 "movd $dst,$tmp2\t! add reduction4I" %}
3983 ins_encode %{
3984 int vector_len = 0;
3985 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len);
3986 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
3987 __ movdl($tmp2$$XMMRegister, $src1$$Register);
3988 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len);
3989 __ movdl($dst$$Register, $tmp2$$XMMRegister);
3990 %}
3991 ins_pipe( pipe_slow );
3992 %}
3993
3994 instruct rvadd4I_reduction_reg_evex(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{
3995 predicate(UseAVX > 2);
3996 match(Set dst (AddReductionVI src1 src2));
3997 effect(TEMP tmp, TEMP tmp2);
3998 format %{ "pshufd $tmp2,$src2,0xE\n\t"
3999 "vpaddd $tmp,$src2,$tmp2\n\t"
4000 "pshufd $tmp2,$tmp,0x1\n\t"
4001 "vpaddd $tmp,$tmp,$tmp2\n\t"
4002 "movd $tmp2,$src1\n\t"
4003 "vpaddd $tmp2,$tmp,$tmp2\n\t"
4004 "movd $dst,$tmp2\t! add reduction4I" %}
4005 ins_encode %{
4006 int vector_len = 0;
4007 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE);
4008 __ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4009 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1);
4010 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4011 __ movdl($tmp2$$XMMRegister, $src1$$Register);
4012 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4013 __ movdl($dst$$Register, $tmp2$$XMMRegister);
4014 %}
4015 ins_pipe( pipe_slow );
4016 %}
4017
4018 instruct rvadd8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{
4019 predicate(UseAVX > 0 && UseAVX < 3);
4020 match(Set dst (AddReductionVI src1 src2));
4021 effect(TEMP tmp, TEMP tmp2);
4022 format %{ "vphaddd $tmp,$src2,$src2\n\t"
4023 "vphaddd $tmp,$tmp,$tmp2\n\t"
4024 "vextracti128 $tmp2,$tmp\n\t"
4025 "vpaddd $tmp,$tmp,$tmp2\n\t"
4026 "movd $tmp2,$src1\n\t"
4027 "vpaddd $tmp2,$tmp2,$tmp\n\t"
4028 "movd $dst,$tmp2\t! add reduction8I" %}
4029 ins_encode %{
4030 int vector_len = 1;
4031 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len);
4032 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4033 __ vextracti128h($tmp2$$XMMRegister, $tmp$$XMMRegister);
4034 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
4035 __ movdl($tmp2$$XMMRegister, $src1$$Register);
4036 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
4037 __ movdl($dst$$Register, $tmp2$$XMMRegister);
4038 %}
4039 ins_pipe( pipe_slow );
4040 %}
4041
4042 instruct rvadd8I_reduction_reg_evex(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{
4043 predicate(UseAVX > 2);
4044 match(Set dst (AddReductionVI src1 src2));
4045 effect(TEMP tmp, TEMP tmp2);
4046 format %{ "vextracti128 $tmp,$src2\n\t"
4047 "vpaddd $tmp,$tmp,$src2\n\t"
4048 "pshufd $tmp2,$tmp,0xE\n\t"
4049 "vpaddd $tmp,$tmp,$tmp2\n\t"
4050 "pshufd $tmp2,$tmp,0x1\n\t"
4051 "vpaddd $tmp,$tmp,$tmp2\n\t"
4052 "movd $tmp2,$src1\n\t"
4053 "vpaddd $tmp2,$tmp,$tmp2\n\t"
4054 "movd $dst,$tmp2\t! add reduction8I" %}
4055 ins_encode %{
4056 int vector_len = 0;
4057 __ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister);
4058 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len);
4059 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE);
4060 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4061 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1);
4062 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4063 __ movdl($tmp2$$XMMRegister, $src1$$Register);
4064 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4065 __ movdl($dst$$Register, $tmp2$$XMMRegister);
4066 %}
4067 ins_pipe( pipe_slow );
4068 %}
4069
4070 instruct rvadd16I_reduction_reg_evex(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{
4071 predicate(UseAVX > 2);
4072 match(Set dst (AddReductionVI src1 src2));
4073 effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
4074 format %{ "vextracti64x4 $tmp3,$src2\n\t"
4075 "vpaddd $tmp3,$tmp3,$src2\n\t"
4076 "vextracti128 $tmp,$tmp3\n\t"
4077 "vpaddd $tmp,$tmp,$tmp3\n\t"
4078 "pshufd $tmp2,$tmp,0xE\n\t"
4079 "vpaddd $tmp,$tmp,$tmp2\n\t"
4080 "pshufd $tmp2,$tmp,0x1\n\t"
4081 "vpaddd $tmp,$tmp,$tmp2\n\t"
4082 "movd $tmp2,$src1\n\t"
4083 "vpaddd $tmp2,$tmp,$tmp2\n\t"
4084 "movd $dst,$tmp2\t! mul reduction16I" %}
4085 ins_encode %{
4086 __ vextracti64x4h($tmp3$$XMMRegister, $src2$$XMMRegister);
4087 __ vpaddd($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1);
4088 __ vextracti128h($tmp$$XMMRegister, $tmp3$$XMMRegister);
4089 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0);
4090 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE);
4091 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
4092 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1);
4093 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
4094 __ movdl($tmp2$$XMMRegister, $src1$$Register);
4095 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
4096 __ movdl($dst$$Register, $tmp2$$XMMRegister);
4097 %}
4098 ins_pipe( pipe_slow );
4099 %}
4100
4101 #ifdef _LP64
4102 instruct rvadd2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, regF tmp, regF tmp2) %{
4103 predicate(UseAVX > 2);
4104 match(Set dst (AddReductionVL src1 src2));
4105 effect(TEMP tmp, TEMP tmp2);
4106 format %{ "pshufd $tmp2,$src2,0xE\n\t"
4107 "vpaddq $tmp,$src2,$tmp2\n\t"
4108 "movdq $tmp2,$src1\n\t"
4109 "vpaddq $tmp2,$tmp,$tmp2\n\t"
4110 "movdq $dst,$tmp2\t! add reduction2L" %}
4111 ins_encode %{
4112 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE);
4113 __ vpaddq($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, 0);
4114 __ movdq($tmp2$$XMMRegister, $src1$$Register);
4115 __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
4116 __ movdq($dst$$Register, $tmp2$$XMMRegister);
4117 %}
4118 ins_pipe( pipe_slow );
4119 %}
4120
4121 instruct rvadd4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{
4122 predicate(UseAVX > 2);
4123 match(Set dst (AddReductionVL src1 src2));
4124 effect(TEMP tmp, TEMP tmp2);
4125 format %{ "vextracti64x2 $tmp,$src2, 0x1\n\t"
4126 "vpaddq $tmp2,$tmp,$src2\n\t"
4127 "pshufd $tmp,$tmp2,0xE\n\t"
4128 "vpaddq $tmp2,$tmp2,$tmp\n\t"
4129 "movdq $tmp,$src1\n\t"
4130 "vpaddq $tmp2,$tmp2,$tmp\n\t"
4131 "movdq $dst,$tmp2\t! add reduction4L" %}
4132 ins_encode %{
4133 __ vextracti64x2h($tmp$$XMMRegister, $src2$$XMMRegister, 0x1);
4134 __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0);
4135 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
4136 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
4137 __ movdq($tmp$$XMMRegister, $src1$$Register);
4138 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
4139 __ movdq($dst$$Register, $tmp2$$XMMRegister);
4140 %}
4141 ins_pipe( pipe_slow );
4142 %}
4143
4144 instruct rvadd8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{
4145 predicate(UseAVX > 2);
4146 match(Set dst (AddReductionVL src1 src2));
4147 effect(TEMP tmp, TEMP tmp2);
4148 format %{ "vextracti64x4 $tmp2,$src2\n\t"
4149 "vpaddq $tmp2,$tmp2,$src2\n\t"
4150 "vextracti128 $tmp,$tmp2\n\t"
4151 "vpaddq $tmp2,$tmp2,$tmp\n\t"
4152 "pshufd $tmp,$tmp2,0xE\n\t"
4153 "vpaddq $tmp2,$tmp2,$tmp\n\t"
4154 "movdq $tmp,$src1\n\t"
4155 "vpaddq $tmp2,$tmp2,$tmp\n\t"
4156 "movdq $dst,$tmp2\t! add reduction8L" %}
4157 ins_encode %{
4158 __ vextracti64x4h($tmp2$$XMMRegister, $src2$$XMMRegister);
4159 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1);
4160 __ vextracti128h($tmp$$XMMRegister, $tmp2$$XMMRegister);
4161 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
4162 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
4163 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
4164 __ movdq($tmp$$XMMRegister, $src1$$Register);
4165 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
4166 __ movdq($dst$$Register, $tmp2$$XMMRegister);
4167 %}
4168 ins_pipe( pipe_slow );
4169 %}
4170 #endif
4171
4172 instruct rsadd2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{
4173 predicate(UseSSE >= 1 && UseAVX == 0);
4174 match(Set dst (AddReductionVF src1 src2));
4175 effect(TEMP tmp, TEMP tmp2);
4176 format %{ "movdqu $tmp,$src1\n\t"
4177 "addss $tmp,$src2\n\t"
4178 "pshufd $tmp2,$src2,0x01\n\t"
4179 "addss $tmp,$tmp2\n\t"
4180 "movdqu $dst,$tmp\t! add reduction2F" %}
4181 ins_encode %{
4182 __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister);
4183 __ addss($tmp$$XMMRegister, $src2$$XMMRegister);
4184 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01);
4185 __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister);
4186 __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister);
4187 %}
4188 ins_pipe( pipe_slow );
4189 %}
4190
4191 instruct rvadd2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{
4275 ins_encode %{
4276 __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
4277 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
4278 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4279 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
4280 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4281 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
4282 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4283 __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister);
4284 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
4285 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01);
4286 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4287 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02);
4288 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4289 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03);
4290 __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4291 %}
4292 ins_pipe( pipe_slow );
4293 %}
4294
4295 instruct radd16F_reduction_reg(regF dst, regF src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{
4296 predicate(UseAVX > 2);
4297 match(Set dst (AddReductionVF src1 src2));
4298 effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
4299 format %{ "vaddss $tmp2,$src1,$src2\n\t"
4300 "pshufd $tmp,$src2,0x01\n\t"
4301 "vaddss $tmp2,$tmp2,$tmp\n\t"
4302 "pshufd $tmp,$src2,0x02\n\t"
4303 "vaddss $tmp2,$tmp2,$tmp\n\t"
4304 "pshufd $tmp,$src2,0x03\n\t"
4305 "vaddss $tmp2,$tmp2,$tmp\n\t"
4306 "vextractf64x2 $tmp3,$src2, 0x1\n\t"
4307 "vaddss $tmp2,$tmp2,$tmp3\n\t"
4308 "pshufd $tmp,$tmp3,0x01\n\t"
4309 "vaddss $tmp2,$tmp2,$tmp\n\t"
4310 "pshufd $tmp,$tmp3,0x02\n\t"
4311 "vaddss $tmp2,$tmp2,$tmp\n\t"
4312 "pshufd $tmp,$tmp3,0x03\n\t"
4313 "vaddss $tmp2,$tmp2,$tmp\n\t"
4314 "vextractf64x2 $tmp3,$src2, 0x2\n\t"
4315 "vaddss $tmp2,$tmp2,$tmp3\n\t"
4316 "pshufd $tmp,$tmp3,0x01\n\t"
4317 "vaddss $tmp2,$tmp2,$tmp\n\t"
4318 "pshufd $tmp,$tmp3,0x02\n\t"
4319 "vaddss $tmp2,$tmp2,$tmp\n\t"
4320 "pshufd $tmp,$tmp3,0x03\n\t"
4321 "vaddss $tmp2,$tmp2,$tmp\n\t"
4322 "vextractf64x2 $tmp3,$src2, 0x3\n\t"
4323 "vaddss $tmp2,$tmp2,$tmp3\n\t"
4324 "pshufd $tmp,$tmp3,0x01\n\t"
4325 "vaddss $tmp2,$tmp2,$tmp\n\t"
4326 "pshufd $tmp,$tmp3,0x02\n\t"
4327 "vaddss $tmp2,$tmp2,$tmp\n\t"
4328 "pshufd $tmp,$tmp3,0x03\n\t"
4329 "vaddss $dst,$tmp2,$tmp\t! add reduction16F" %}
4330 ins_encode %{
4331 __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
4332 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
4333 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4334 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
4335 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4336 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
4337 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4338 __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1);
4339 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
4340 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01);
4341 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4342 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02);
4343 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4344 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03);
4345 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4346 __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x2);
4347 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
4348 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01);
4349 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4350 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02);
4351 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4352 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03);
4353 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4354 __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x3);
4355 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
4356 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01);
4357 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4358 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02);
4359 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4360 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03);
4361 __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4362 %}
4363 ins_pipe( pipe_slow );
4364 %}
4365
4366 instruct rsadd2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp) %{
4367 predicate(UseSSE >= 1 && UseAVX == 0);
4368 match(Set dst (AddReductionVD src1 src2));
4369 effect(TEMP tmp, TEMP dst);
4370 format %{ "movdqu $tmp,$src1\n\t"
4371 "addsd $tmp,$src2\n\t"
4372 "pshufd $dst,$src2,0xE\n\t"
4373 "addsd $dst,$tmp\t! add reduction2D" %}
4374 ins_encode %{
4375 __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister);
4376 __ addsd($tmp$$XMMRegister, $src2$$XMMRegister);
4377 __ pshufd($dst$$XMMRegister, $src2$$XMMRegister, 0xE);
4378 __ addsd($dst$$XMMRegister, $tmp$$XMMRegister);
4379 %}
4380 ins_pipe( pipe_slow );
4381 %}
4382
4383 instruct rvadd2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp, regD tmp2) %{
4384 predicate(UseAVX > 0);
4385 match(Set dst (AddReductionVD src1 src2));
4393 __ vaddsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4394 %}
4395 ins_pipe( pipe_slow );
4396 %}
4397
4398 instruct rvadd4D_reduction_reg(regD dst, regD src1, vecY src2, regD tmp, regD tmp2, regD tmp3) %{
4399 predicate(UseAVX > 0);
4400 match(Set dst (AddReductionVD src1 src2));
4401 effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
4402 format %{ "vaddsd $tmp2,$src1,$src2\n\t"
4403 "pshufd $tmp,$src2,0xE\n\t"
4404 "vaddsd $tmp2,$tmp2,$tmp\n\t"
4405 "vextractf128 $tmp3,$src2\n\t"
4406 "vaddsd $tmp2,$tmp2,$tmp3\n\t"
4407 "pshufd $tmp,$tmp3,0xE\n\t"
4408 "vaddsd $dst,$tmp2,$tmp\t! add reduction4D" %}
4409 ins_encode %{
4410 __ vaddsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
4411 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
4412 __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4413 __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister);
4414 __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
4415 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE);
4416 __ vaddsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4417 %}
4418 ins_pipe( pipe_slow );
4419 %}
4420
4421 instruct rvadd8D_reduction_reg(regD dst, regD src1, vecZ src2, regD tmp, regD tmp2, regD tmp3) %{
4422 predicate(UseAVX > 2);
4423 match(Set dst (AddReductionVD src1 src2));
4424 effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
4425 format %{ "vaddsd $tmp2,$src1,$src2\n\t"
4426 "pshufd $tmp,$src2,0xE\n\t"
4427 "vaddsd $tmp2,$tmp2,$tmp\n\t"
4428 "vextractf64x2 $tmp3,$src2, 0x1\n\t"
4429 "vaddsd $tmp2,$tmp2,$tmp3\n\t"
4430 "pshufd $tmp,$tmp3,0xE\n\t"
4431 "vaddsd $tmp2,$tmp2,$tmp\n\t"
4432 "vextractf64x2 $tmp3,$src2, 0x2\n\t"
4433 "vaddsd $tmp2,$tmp2,$tmp3\n\t"
4434 "pshufd $tmp,$tmp3,0xE\n\t"
4435 "vaddsd $tmp2,$tmp2,$tmp\n\t"
4436 "vextractf64x2 $tmp3,$src2, 0x3\n\t"
4437 "vaddsd $tmp2,$tmp2,$tmp3\n\t"
4438 "pshufd $tmp,$tmp3,0xE\n\t"
4439 "vaddsd $dst,$tmp2,$tmp\t! add reduction8D" %}
4440 ins_encode %{
4441 __ vaddsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
4442 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
4443 __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4444 __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1);
4445 __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
4446 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE);
4447 __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4448 __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x2);
4449 __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
4450 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE);
4451 __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4452 __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x3);
4453 __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
4454 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE);
4455 __ vaddsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4456 %}
4457 ins_pipe( pipe_slow );
4458 %}
4459
4460 instruct rsmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{
4461 predicate(UseSSE > 3 && UseAVX == 0);
4462 match(Set dst (MulReductionVI src1 src2));
4463 effect(TEMP tmp, TEMP tmp2);
4464 format %{ "pshufd $tmp2,$src2,0x1\n\t"
4465 "pmulld $tmp2,$src2\n\t"
4466 "movd $tmp,$src1\n\t"
4467 "pmulld $tmp2,$tmp\n\t"
4468 "movd $dst,$tmp2\t! mul reduction2I" %}
4469 ins_encode %{
4470 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
4471 __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister);
4472 __ movdl($tmp$$XMMRegister, $src1$$Register);
4473 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister);
4474 __ movdl($dst$$Register, $tmp2$$XMMRegister);
4475 %}
4476 ins_pipe( pipe_slow );
4477 %}
4478
4479 instruct rvmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{
4480 predicate(UseAVX > 0);
4481 match(Set dst (MulReductionVI src1 src2));
4482 effect(TEMP tmp, TEMP tmp2);
4483 format %{ "pshufd $tmp2,$src2,0x1\n\t"
4484 "vpmulld $tmp,$src2,$tmp2\n\t"
4485 "movd $tmp2,$src1\n\t"
4486 "vpmulld $tmp2,$tmp,$tmp2\n\t"
4487 "movd $dst,$tmp2\t! mul reduction2I" %}
4488 ins_encode %{
4489 int vector_len = 0;
4490 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
4491 __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4492 __ movdl($tmp2$$XMMRegister, $src1$$Register);
4493 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4494 __ movdl($dst$$Register, $tmp2$$XMMRegister);
4495 %}
4496 ins_pipe( pipe_slow );
4497 %}
4498
4499 instruct rsmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{
4500 predicate(UseSSE > 3 && UseAVX == 0);
4501 match(Set dst (MulReductionVI src1 src2));
4502 effect(TEMP tmp, TEMP tmp2);
4503 format %{ "pshufd $tmp2,$src2,0xE\n\t"
4504 "pmulld $tmp2,$src2\n\t"
4505 "pshufd $tmp,$tmp2,0x1\n\t"
4506 "pmulld $tmp2,$tmp\n\t"
4507 "movd $tmp,$src1\n\t"
4508 "pmulld $tmp2,$tmp\n\t"
4509 "movd $dst,$tmp2\t! mul reduction4I" %}
4510 ins_encode %{
4511 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE);
4512 __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister);
4513 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x1);
4514 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister);
4515 __ movdl($tmp$$XMMRegister, $src1$$Register);
4516 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister);
4517 __ movdl($dst$$Register, $tmp2$$XMMRegister);
4518 %}
4519 ins_pipe( pipe_slow );
4520 %}
4521
4522 instruct rvmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{
4523 predicate(UseAVX > 0);
4524 match(Set dst (MulReductionVI src1 src2));
4525 effect(TEMP tmp, TEMP tmp2);
4526 format %{ "pshufd $tmp2,$src2,0xE\n\t"
4527 "vpmulld $tmp,$src2,$tmp2\n\t"
4528 "pshufd $tmp2,$tmp,0x1\n\t"
4529 "vpmulld $tmp,$tmp,$tmp2\n\t"
4530 "movd $tmp2,$src1\n\t"
4531 "vpmulld $tmp2,$tmp,$tmp2\n\t"
4532 "movd $dst,$tmp2\t! mul reduction4I" %}
4533 ins_encode %{
4534 int vector_len = 0;
4535 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE);
4536 __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4537 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1);
4538 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4539 __ movdl($tmp2$$XMMRegister, $src1$$Register);
4540 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4541 __ movdl($dst$$Register, $tmp2$$XMMRegister);
4542 %}
4543 ins_pipe( pipe_slow );
4544 %}
4545
4546 instruct rvmul8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{
4547 predicate(UseAVX > 0);
4548 match(Set dst (MulReductionVI src1 src2));
4549 effect(TEMP tmp, TEMP tmp2);
4550 format %{ "vextracti128 $tmp,$src2\n\t"
4551 "vpmulld $tmp,$tmp,$src2\n\t"
4552 "pshufd $tmp2,$tmp,0xE\n\t"
4553 "vpmulld $tmp,$tmp,$tmp2\n\t"
4554 "pshufd $tmp2,$tmp,0x1\n\t"
4555 "vpmulld $tmp,$tmp,$tmp2\n\t"
4556 "movd $tmp2,$src1\n\t"
4557 "vpmulld $tmp2,$tmp,$tmp2\n\t"
4558 "movd $dst,$tmp2\t! mul reduction8I" %}
4559 ins_encode %{
4560 int vector_len = 0;
4561 __ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister);
4562 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len);
4563 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE);
4564 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4565 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1);
4566 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4567 __ movdl($tmp2$$XMMRegister, $src1$$Register);
4568 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
4569 __ movdl($dst$$Register, $tmp2$$XMMRegister);
4570 %}
4571 ins_pipe( pipe_slow );
4572 %}
4573
4574 instruct rvmul16I_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{
4575 predicate(UseAVX > 2);
4576 match(Set dst (MulReductionVI src1 src2));
4577 effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
4578 format %{ "vextracti64x4 $tmp3,$src2\n\t"
4579 "vpmulld $tmp3,$tmp3,$src2\n\t"
4580 "vextracti128 $tmp,$tmp3\n\t"
4581 "vpmulld $tmp,$tmp,$src2\n\t"
4582 "pshufd $tmp2,$tmp,0xE\n\t"
4583 "vpmulld $tmp,$tmp,$tmp2\n\t"
4584 "pshufd $tmp2,$tmp,0x1\n\t"
4585 "vpmulld $tmp,$tmp,$tmp2\n\t"
4586 "movd $tmp2,$src1\n\t"
4587 "vpmulld $tmp2,$tmp,$tmp2\n\t"
4588 "movd $dst,$tmp2\t! mul reduction16I" %}
4589 ins_encode %{
4590 __ vextracti64x4h($tmp3$$XMMRegister, $src2$$XMMRegister);
4591 __ vpmulld($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1);
4592 __ vextracti128h($tmp$$XMMRegister, $tmp3$$XMMRegister);
4593 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0);
4594 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE);
4595 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
4596 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1);
4597 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
4598 __ movdl($tmp2$$XMMRegister, $src1$$Register);
4599 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
4600 __ movdl($dst$$Register, $tmp2$$XMMRegister);
4601 %}
4602 ins_pipe( pipe_slow );
4603 %}
4604
4605 #ifdef _LP64
4606 instruct rvmul2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, regF tmp, regF tmp2) %{
4607 predicate(UseAVX > 2 && VM_Version::supports_avx512dq());
4608 match(Set dst (MulReductionVL src1 src2));
4609 effect(TEMP tmp, TEMP tmp2);
4610 format %{ "pshufd $tmp2,$src2,0xE\n\t"
4611 "vpmullq $tmp,$src2,$tmp2\n\t"
4612 "movdq $tmp2,$src1\n\t"
4613 "vpmullq $tmp2,$tmp,$tmp2\n\t"
4614 "movdq $dst,$tmp2\t! mul reduction2L" %}
4615 ins_encode %{
4616 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE);
4617 __ vpmullq($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, 0);
4618 __ movdq($tmp2$$XMMRegister, $src1$$Register);
4619 __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
4620 __ movdq($dst$$Register, $tmp2$$XMMRegister);
4621 %}
4622 ins_pipe( pipe_slow );
4623 %}
4624
4625 instruct rvmul4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{
4626 predicate(UseAVX > 2 && VM_Version::supports_avx512dq());
4627 match(Set dst (MulReductionVL src1 src2));
4628 effect(TEMP tmp, TEMP tmp2);
4629 format %{ "vextracti64x2 $tmp,$src2, 0x1\n\t"
4630 "vpmullq $tmp2,$tmp,$src2\n\t"
4631 "pshufd $tmp,$tmp2,0xE\n\t"
4632 "vpmullq $tmp2,$tmp2,$tmp\n\t"
4633 "movdq $tmp,$src1\n\t"
4634 "vpmullq $tmp2,$tmp2,$tmp\n\t"
4635 "movdq $dst,$tmp2\t! mul reduction4L" %}
4636 ins_encode %{
4637 __ vextracti64x2h($tmp$$XMMRegister, $src2$$XMMRegister, 0x1);
4638 __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0);
4639 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
4640 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
4641 __ movdq($tmp$$XMMRegister, $src1$$Register);
4642 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
4643 __ movdq($dst$$Register, $tmp2$$XMMRegister);
4644 %}
4645 ins_pipe( pipe_slow );
4646 %}
4647
4648 instruct rvmul8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{
4649 predicate(UseAVX > 2 && VM_Version::supports_avx512dq());
4650 match(Set dst (MulReductionVL src1 src2));
4651 effect(TEMP tmp, TEMP tmp2);
4652 format %{ "vextracti64x4 $tmp2,$src2\n\t"
4653 "vpmullq $tmp2,$tmp2,$src2\n\t"
4654 "vextracti128 $tmp,$tmp2\n\t"
4655 "vpmullq $tmp2,$tmp2,$tmp\n\t"
4656 "pshufd $tmp,$tmp2,0xE\n\t"
4657 "vpmullq $tmp2,$tmp2,$tmp\n\t"
4658 "movdq $tmp,$src1\n\t"
4659 "vpmullq $tmp2,$tmp2,$tmp\n\t"
4660 "movdq $dst,$tmp2\t! mul reduction8L" %}
4661 ins_encode %{
4662 __ vextracti64x4h($tmp2$$XMMRegister, $src2$$XMMRegister);
4663 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1);
4664 __ vextracti128h($tmp$$XMMRegister, $tmp2$$XMMRegister);
4665 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
4666 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
4667 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
4668 __ movdq($tmp$$XMMRegister, $src1$$Register);
4669 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
4670 __ movdq($dst$$Register, $tmp2$$XMMRegister);
4671 %}
4672 ins_pipe( pipe_slow );
4673 %}
4674 #endif
4675
4676 instruct rsmul2F_reduction(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{
4677 predicate(UseSSE >= 1 && UseAVX == 0);
4678 match(Set dst (MulReductionVF src1 src2));
4679 effect(TEMP tmp, TEMP tmp2);
4680 format %{ "movdqu $tmp,$src1\n\t"
4681 "mulss $tmp,$src2\n\t"
4682 "pshufd $tmp2,$src2,0x01\n\t"
4683 "mulss $tmp,$tmp2\n\t"
4684 "movdqu $dst,$tmp\t! mul reduction2F" %}
4685 ins_encode %{
4686 __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister);
4687 __ mulss($tmp$$XMMRegister, $src2$$XMMRegister);
4688 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01);
4689 __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister);
4690 __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister);
4691 %}
4692 ins_pipe( pipe_slow );
4693 %}
4694
4695 instruct rvmul2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{
4696 predicate(UseAVX > 0);
4697 match(Set dst (MulReductionVF src1 src2));
4698 effect(TEMP tmp, TEMP tmp2);
4699 format %{ "vmulss $tmp2,$src1,$src2\n\t"
4700 "pshufd $tmp,$src2,0x01\n\t"
4701 "vmulss $dst,$tmp2,$tmp\t! mul reduction2F" %}
4702 ins_encode %{
4703 __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
4704 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
4705 __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4706 %}
4707 ins_pipe( pipe_slow );
4708 %}
4709
4710 instruct rsmul4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{
4711 predicate(UseSSE >= 1 && UseAVX == 0);
4712 match(Set dst (MulReductionVF src1 src2));
4713 effect(TEMP tmp, TEMP tmp2);
4714 format %{ "movdqu $tmp,$src1\n\t"
4715 "mulss $tmp,$src2\n\t"
4716 "pshufd $tmp2,$src2,0x01\n\t"
4717 "mulss $tmp,$tmp2\n\t"
4718 "pshufd $tmp2,$src2,0x02\n\t"
4719 "mulss $tmp,$tmp2\n\t"
4720 "pshufd $tmp2,$src2,0x03\n\t"
4721 "mulss $tmp,$tmp2\n\t"
4722 "movdqu $dst,$tmp\t! mul reduction4F" %}
4723 ins_encode %{
4724 __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister);
4725 __ mulss($tmp$$XMMRegister, $src2$$XMMRegister);
4726 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01);
4727 __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister);
4728 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x02);
4729 __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister);
4730 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x03);
4731 __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister);
4732 __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister);
4733 %}
4734 ins_pipe( pipe_slow );
4735 %}
4736
4737 instruct rvmul4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{
4738 predicate(UseAVX > 0);
4739 match(Set dst (MulReductionVF src1 src2));
4740 effect(TEMP tmp, TEMP tmp2);
4741 format %{ "vmulss $tmp2,$src1,$src2\n\t"
4742 "pshufd $tmp,$src2,0x01\n\t"
4743 "vmulss $tmp2,$tmp2,$tmp\n\t"
4744 "pshufd $tmp,$src2,0x02\n\t"
4745 "vmulss $tmp2,$tmp2,$tmp\n\t"
4746 "pshufd $tmp,$src2,0x03\n\t"
4747 "vmulss $dst,$tmp2,$tmp\t! mul reduction4F" %}
4748 ins_encode %{
4749 __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
4750 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
4751 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4752 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
4753 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4754 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
4755 __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4756 %}
4757 ins_pipe( pipe_slow );
4758 %}
4759
4760 instruct rvmul8F_reduction_reg(regF dst, regF src1, vecY src2, regF tmp, regF tmp2, regF tmp3) %{
4761 predicate(UseAVX > 0);
4762 match(Set dst (MulReductionVF src1 src2));
4763 effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
4764 format %{ "vmulss $tmp2,$src1,$src2\n\t"
4765 "pshufd $tmp,$src2,0x01\n\t"
4766 "vmulss $tmp2,$tmp2,$tmp\n\t"
4767 "pshufd $tmp,$src2,0x02\n\t"
4779 ins_encode %{
4780 __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
4781 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
4782 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4783 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
4784 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4785 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
4786 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4787 __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister);
4788 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
4789 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01);
4790 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4791 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02);
4792 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4793 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03);
4794 __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4795 %}
4796 ins_pipe( pipe_slow );
4797 %}
4798
4799 instruct rvmul16F_reduction_reg(regF dst, regF src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{
4800 predicate(UseAVX > 2);
4801 match(Set dst (MulReductionVF src1 src2));
4802 effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
4803 format %{ "vmulss $tmp2,$src1,$src2\n\t"
4804 "pshufd $tmp,$src2,0x01\n\t"
4805 "vmulss $tmp2,$tmp2,$tmp\n\t"
4806 "pshufd $tmp,$src2,0x02\n\t"
4807 "vmulss $tmp2,$tmp2,$tmp\n\t"
4808 "pshufd $tmp,$src2,0x03\n\t"
4809 "vmulss $tmp2,$tmp2,$tmp\n\t"
4810 "vextractf32x4 $tmp3,$src2, 0x1\n\t"
4811 "vmulss $tmp2,$tmp2,$tmp3\n\t"
4812 "pshufd $tmp,$tmp3,0x01\n\t"
4813 "vmulss $tmp2,$tmp2,$tmp\n\t"
4814 "pshufd $tmp,$tmp3,0x02\n\t"
4815 "vmulss $tmp2,$tmp2,$tmp\n\t"
4816 "pshufd $tmp,$tmp3,0x03\n\t"
4817 "vmulss $tmp2,$tmp2,$tmp\n\t"
4818 "vextractf32x4 $tmp3,$src2, 0x2\n\t"
4819 "vmulss $tmp2,$tmp2,$tmp3\n\t"
4820 "pshufd $tmp,$tmp3,0x01\n\t"
4821 "vmulss $tmp2,$tmp2,$tmp\n\t"
4822 "pshufd $tmp,$tmp3,0x02\n\t"
4823 "vmulss $tmp2,$tmp2,$tmp\n\t"
4824 "pshufd $tmp,$tmp3,0x03\n\t"
4825 "vmulss $tmp2,$tmp2,$tmp\n\t"
4826 "vextractf32x4 $tmp3,$src2, 0x3\n\t"
4827 "vmulss $tmp2,$tmp2,$tmp3\n\t"
4828 "pshufd $tmp,$tmp3,0x01\n\t"
4829 "vmulss $tmp2,$tmp2,$tmp\n\t"
4830 "pshufd $tmp,$tmp3,0x02\n\t"
4831 "vmulss $tmp2,$tmp2,$tmp\n\t"
4832 "pshufd $tmp,$tmp3,0x03\n\t"
4833 "vmulss $dst,$tmp2,$tmp\t! mul reduction16F" %}
4834 ins_encode %{
4835 __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
4836 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
4837 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4838 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
4839 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4840 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
4841 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4842 __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1);
4843 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
4844 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01);
4845 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4846 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02);
4847 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4848 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03);
4849 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4850 __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x2);
4851 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
4852 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01);
4853 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4854 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02);
4855 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4856 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03);
4857 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4858 __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x3);
4859 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
4860 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01);
4861 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4862 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02);
4863 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4864 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03);
4865 __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4866 %}
4867 ins_pipe( pipe_slow );
4868 %}
4869
4870 instruct rsmul2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp) %{
4871 predicate(UseSSE >= 1 && UseAVX == 0);
4872 match(Set dst (MulReductionVD src1 src2));
4873 effect(TEMP tmp, TEMP dst);
4874 format %{ "movdqu $tmp,$src1\n\t"
4875 "mulsd $tmp,$src2\n\t"
4876 "pshufd $dst,$src2,0xE\n\t"
4877 "mulsd $dst,$tmp\t! mul reduction2D" %}
4878 ins_encode %{
4879 __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister);
4880 __ mulsd($tmp$$XMMRegister, $src2$$XMMRegister);
4881 __ pshufd($dst$$XMMRegister, $src2$$XMMRegister, 0xE);
4882 __ mulsd($dst$$XMMRegister, $tmp$$XMMRegister);
4883 %}
4884 ins_pipe( pipe_slow );
4885 %}
4886
4887 instruct rvmul2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp, regD tmp2) %{
4888 predicate(UseAVX > 0);
4889 match(Set dst (MulReductionVD src1 src2));
4890 effect(TEMP tmp, TEMP tmp2);
4891 format %{ "vmulsd $tmp2,$src1,$src2\n\t"
4892 "pshufd $tmp,$src2,0xE\n\t"
4893 "vmulsd $dst,$tmp2,$tmp\t! mul reduction2D" %}
4894 ins_encode %{
4895 __ vmulsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
4896 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
4897 __ vmulsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4905 effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
4906 format %{ "vmulsd $tmp2,$src1,$src2\n\t"
4907 "pshufd $tmp,$src2,0xE\n\t"
4908 "vmulsd $tmp2,$tmp2,$tmp\n\t"
4909 "vextractf128 $tmp3,$src2\n\t"
4910 "vmulsd $tmp2,$tmp2,$tmp3\n\t"
4911 "pshufd $tmp,$tmp3,0xE\n\t"
4912 "vmulsd $dst,$tmp2,$tmp\t! mul reduction4D" %}
4913 ins_encode %{
4914 __ vmulsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
4915 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
4916 __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4917 __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister);
4918 __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
4919 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE);
4920 __ vmulsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4921 %}
4922 ins_pipe( pipe_slow );
4923 %}
4924
4925 instruct rvmul8D_reduction_reg(regD dst, regD src1, vecZ src2, regD tmp, regD tmp2, regD tmp3) %{
4926 predicate(UseAVX > 2);
4927 match(Set dst (MulReductionVD src1 src2));
4928 effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
4929 format %{ "vmulsd $tmp2,$src1,$src2\n\t"
4930 "pshufd $tmp,$src2,0xE\n\t"
4931 "vmulsd $tmp2,$tmp2,$tmp\n\t"
4932 "vextractf64x2 $tmp3,$src2, 0x1\n\t"
4933 "vmulsd $tmp2,$tmp2,$tmp3\n\t"
4934 "pshufd $tmp,$src2,0xE\n\t"
4935 "vmulsd $tmp2,$tmp2,$tmp\n\t"
4936 "vextractf64x2 $tmp3,$src2, 0x2\n\t"
4937 "vmulsd $tmp2,$tmp2,$tmp3\n\t"
4938 "pshufd $tmp,$tmp3,0xE\n\t"
4939 "vmulsd $tmp2,$tmp2,$tmp\n\t"
4940 "vextractf64x2 $tmp3,$src2, 0x3\n\t"
4941 "vmulsd $tmp2,$tmp2,$tmp3\n\t"
4942 "pshufd $tmp,$tmp3,0xE\n\t"
4943 "vmulsd $dst,$tmp2,$tmp\t! mul reduction8D" %}
4944 ins_encode %{
4945 __ vmulsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
4946 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
4947 __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4948 __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1);
4949 __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
4950 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE);
4951 __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4952 __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x2);
4953 __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
4954 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE);
4955 __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4956 __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x3);
4957 __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
4958 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE);
4959 __ vmulsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
4960 %}
4961 ins_pipe( pipe_slow );
4962 %}
4963
4964 // ====================VECTOR ARITHMETIC=======================================
4965
4966 // --------------------------------- ADD --------------------------------------
4967
4968 // Bytes vector add
4969 instruct vadd4B(vecS dst, vecS src) %{
4970 predicate(n->as_Vector()->length() == 4);
4971 match(Set dst (AddVB dst src));
4972 format %{ "paddb $dst,$src\t! add packed4B" %}
4973 ins_encode %{
4974 __ paddb($dst$$XMMRegister, $src$$XMMRegister);
4975 %}
4976 ins_pipe( pipe_slow );
4977 %}
4978
4979 instruct vadd4B_reg(vecS dst, vecS src1, vecS src2) %{
4980 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4981 match(Set dst (AddVB src1 src2));
4982 format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %}
4983 ins_encode %{
4984 int vector_len = 0;
4985 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
4986 %}
4987 ins_pipe( pipe_slow );
4988 %}
4989
4990 instruct vadd8B(vecD dst, vecD src) %{
4991 predicate(n->as_Vector()->length() == 8);
4992 match(Set dst (AddVB dst src));
4993 format %{ "paddb $dst,$src\t! add packed8B" %}
4994 ins_encode %{
4995 __ paddb($dst$$XMMRegister, $src$$XMMRegister);
4996 %}
4997 ins_pipe( pipe_slow );
4998 %}
4999
5000 instruct vadd8B_reg(vecD dst, vecD src1, vecD src2) %{
5001 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
5002 match(Set dst (AddVB src1 src2));
5003 format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %}
5004 ins_encode %{
5005 int vector_len = 0;
5006 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5007 %}
5008 ins_pipe( pipe_slow );
5009 %}
5010
5011 instruct vadd16B(vecX dst, vecX src) %{
5012 predicate(n->as_Vector()->length() == 16);
5013 match(Set dst (AddVB dst src));
5014 format %{ "paddb $dst,$src\t! add packed16B" %}
5015 ins_encode %{
5016 __ paddb($dst$$XMMRegister, $src$$XMMRegister);
5017 %}
5018 ins_pipe( pipe_slow );
5019 %}
5020
5021 instruct vadd16B_reg(vecX dst, vecX src1, vecX src2) %{
5022 predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
5023 match(Set dst (AddVB src1 src2));
5024 format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %}
5025 ins_encode %{
5026 int vector_len = 0;
5027 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5028 %}
5029 ins_pipe( pipe_slow );
5030 %}
5031
5032 instruct vadd16B_mem(vecX dst, vecX src, memory mem) %{
5033 predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
5034 match(Set dst (AddVB src (LoadVector mem)));
5035 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %}
5036 ins_encode %{
5037 int vector_len = 0;
5038 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5039 %}
5040 ins_pipe( pipe_slow );
5041 %}
5042
5043 instruct vadd32B_reg(vecY dst, vecY src1, vecY src2) %{
5044 predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
5045 match(Set dst (AddVB src1 src2));
5046 format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %}
5047 ins_encode %{
5048 int vector_len = 1;
5049 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5050 %}
5051 ins_pipe( pipe_slow );
5052 %}
5053
5054 instruct vadd32B_mem(vecY dst, vecY src, memory mem) %{
5055 predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
5056 match(Set dst (AddVB src (LoadVector mem)));
5057 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %}
5058 ins_encode %{
5059 int vector_len = 1;
5060 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5061 %}
5062 ins_pipe( pipe_slow );
5063 %}
5064
5065 instruct vadd64B_reg(vecZ dst, vecZ src1, vecZ src2) %{
5066 predicate(UseAVX > 2 && n->as_Vector()->length() == 64);
5067 match(Set dst (AddVB src1 src2));
5068 format %{ "vpaddb $dst,$src1,$src2\t! add packed64B" %}
5069 ins_encode %{
5070 int vector_len = 2;
5071 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5072 %}
5073 ins_pipe( pipe_slow );
5074 %}
5075
5076 instruct vadd64B_mem(vecZ dst, vecZ src, memory mem) %{
5077 predicate(UseAVX > 2 && n->as_Vector()->length() == 64);
5078 match(Set dst (AddVB src (LoadVector mem)));
5079 format %{ "vpaddb $dst,$src,$mem\t! add packed64B" %}
5080 ins_encode %{
5081 int vector_len = 2;
5082 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5083 %}
5084 ins_pipe( pipe_slow );
5085 %}
5086
5087 // Shorts/Chars vector add
5088 instruct vadd2S(vecS dst, vecS src) %{
5089 predicate(n->as_Vector()->length() == 2);
5090 match(Set dst (AddVS dst src));
5091 format %{ "paddw $dst,$src\t! add packed2S" %}
5092 ins_encode %{
5093 __ paddw($dst$$XMMRegister, $src$$XMMRegister);
5094 %}
5095 ins_pipe( pipe_slow );
5096 %}
5097
5098 instruct vadd2S_reg(vecS dst, vecS src1, vecS src2) %{
5099 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
5100 match(Set dst (AddVS src1 src2));
5101 format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %}
5102 ins_encode %{
5103 int vector_len = 0;
5104 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5105 %}
5106 ins_pipe( pipe_slow );
5107 %}
5108
5109 instruct vadd4S(vecD dst, vecD src) %{
5110 predicate(n->as_Vector()->length() == 4);
5111 match(Set dst (AddVS dst src));
5112 format %{ "paddw $dst,$src\t! add packed4S" %}
5113 ins_encode %{
5114 __ paddw($dst$$XMMRegister, $src$$XMMRegister);
5115 %}
5116 ins_pipe( pipe_slow );
5117 %}
5118
5119 instruct vadd4S_reg(vecD dst, vecD src1, vecD src2) %{
5120 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
5121 match(Set dst (AddVS src1 src2));
5122 format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %}
5123 ins_encode %{
5124 int vector_len = 0;
5125 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5126 %}
5127 ins_pipe( pipe_slow );
5128 %}
5129
5130 instruct vadd8S(vecX dst, vecX src) %{
5131 predicate(n->as_Vector()->length() == 8);
5132 match(Set dst (AddVS dst src));
5133 format %{ "paddw $dst,$src\t! add packed8S" %}
5134 ins_encode %{
5135 __ paddw($dst$$XMMRegister, $src$$XMMRegister);
5136 %}
5137 ins_pipe( pipe_slow );
5138 %}
5139
5140 instruct vadd8S_reg(vecX dst, vecX src1, vecX src2) %{
5141 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
5142 match(Set dst (AddVS src1 src2));
5143 format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %}
5144 ins_encode %{
5145 int vector_len = 0;
5146 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5147 %}
5148 ins_pipe( pipe_slow );
5149 %}
5150
5151 instruct vadd8S_mem(vecX dst, vecX src, memory mem) %{
5152 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
5153 match(Set dst (AddVS src (LoadVector mem)));
5154 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %}
5155 ins_encode %{
5156 int vector_len = 0;
5157 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5158 %}
5159 ins_pipe( pipe_slow );
5160 %}
5161
5162 instruct vadd16S_reg(vecY dst, vecY src1, vecY src2) %{
5163 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
5164 match(Set dst (AddVS src1 src2));
5165 format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %}
5166 ins_encode %{
5167 int vector_len = 1;
5168 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5169 %}
5170 ins_pipe( pipe_slow );
5171 %}
5172
5173 instruct vadd16S_mem(vecY dst, vecY src, memory mem) %{
5174 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
5175 match(Set dst (AddVS src (LoadVector mem)));
5176 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %}
5177 ins_encode %{
5178 int vector_len = 1;
5179 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5180 %}
5181 ins_pipe( pipe_slow );
5182 %}
5183
5184 instruct vadd32S_reg(vecZ dst, vecZ src1, vecZ src2) %{
5185 predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
5186 match(Set dst (AddVS src1 src2));
5187 format %{ "vpaddw $dst,$src1,$src2\t! add packed32S" %}
5188 ins_encode %{
5189 int vector_len = 2;
5190 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5191 %}
5192 ins_pipe( pipe_slow );
5193 %}
5194
5195 instruct vadd32S_mem(vecZ dst, vecZ src, memory mem) %{
5196 predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
5197 match(Set dst (AddVS src (LoadVector mem)));
5198 format %{ "vpaddw $dst,$src,$mem\t! add packed32S" %}
5199 ins_encode %{
5200 int vector_len = 2;
5201 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5202 %}
5203 ins_pipe( pipe_slow );
5204 %}
5205
5206 // Integers vector add
5207 instruct vadd2I(vecD dst, vecD src) %{
5208 predicate(n->as_Vector()->length() == 2);
5209 match(Set dst (AddVI dst src));
5210 format %{ "paddd $dst,$src\t! add packed2I" %}
5211 ins_encode %{
5212 __ paddd($dst$$XMMRegister, $src$$XMMRegister);
5213 %}
5214 ins_pipe( pipe_slow );
5215 %}
5216
5217 instruct vadd2I_reg(vecD dst, vecD src1, vecD src2) %{
5218 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
5219 match(Set dst (AddVI src1 src2));
5220 format %{ "vpaddd $dst,$src1,$src2\t! add packed2I" %}
5221 ins_encode %{
5222 int vector_len = 0;
5223 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5224 %}
5225 ins_pipe( pipe_slow );
5226 %}
5227
5228 instruct vadd4I(vecX dst, vecX src) %{
5229 predicate(n->as_Vector()->length() == 4);
5230 match(Set dst (AddVI dst src));
5231 format %{ "paddd $dst,$src\t! add packed4I" %}
5232 ins_encode %{
5233 __ paddd($dst$$XMMRegister, $src$$XMMRegister);
5234 %}
5235 ins_pipe( pipe_slow );
5236 %}
5237
5238 instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{
5239 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
5240 match(Set dst (AddVI src1 src2));
5241 format %{ "vpaddd $dst,$src1,$src2\t! add packed4I" %}
5242 ins_encode %{
5243 int vector_len = 0;
5244 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5245 %}
5246 ins_pipe( pipe_slow );
5247 %}
5248
5249 instruct vadd4I_mem(vecX dst, vecX src, memory mem) %{
5250 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
5251 match(Set dst (AddVI src (LoadVector mem)));
5252 format %{ "vpaddd $dst,$src,$mem\t! add packed4I" %}
5253 ins_encode %{
5254 int vector_len = 0;
5255 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5256 %}
5257 ins_pipe( pipe_slow );
5258 %}
5259
5260 instruct vadd8I_reg(vecY dst, vecY src1, vecY src2) %{
5261 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
5262 match(Set dst (AddVI src1 src2));
5263 format %{ "vpaddd $dst,$src1,$src2\t! add packed8I" %}
5264 ins_encode %{
5265 int vector_len = 1;
5266 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5267 %}
5268 ins_pipe( pipe_slow );
5269 %}
5270
5271 instruct vadd8I_mem(vecY dst, vecY src, memory mem) %{
5272 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
5273 match(Set dst (AddVI src (LoadVector mem)));
5274 format %{ "vpaddd $dst,$src,$mem\t! add packed8I" %}
5275 ins_encode %{
5276 int vector_len = 1;
5277 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5278 %}
5279 ins_pipe( pipe_slow );
5280 %}
5281
5282 instruct vadd16I_reg(vecZ dst, vecZ src1, vecZ src2) %{
5283 predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
5284 match(Set dst (AddVI src1 src2));
5285 format %{ "vpaddd $dst,$src1,$src2\t! add packed16I" %}
5286 ins_encode %{
5287 int vector_len = 2;
5288 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5289 %}
5290 ins_pipe( pipe_slow );
5291 %}
5292
5293 instruct vadd16I_mem(vecZ dst, vecZ src, memory mem) %{
5294 predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
5295 match(Set dst (AddVI src (LoadVector mem)));
5296 format %{ "vpaddd $dst,$src,$mem\t! add packed16I" %}
5297 ins_encode %{
5298 int vector_len = 2;
5299 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5300 %}
5301 ins_pipe( pipe_slow );
5302 %}
5303
5304 // Longs vector add
5305 instruct vadd2L(vecX dst, vecX src) %{
5306 predicate(n->as_Vector()->length() == 2);
5307 match(Set dst (AddVL dst src));
5308 format %{ "paddq $dst,$src\t! add packed2L" %}
5309 ins_encode %{
5310 __ paddq($dst$$XMMRegister, $src$$XMMRegister);
5311 %}
5312 ins_pipe( pipe_slow );
5313 %}
5314
5315 instruct vadd2L_reg(vecX dst, vecX src1, vecX src2) %{
5316 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
5317 match(Set dst (AddVL src1 src2));
5318 format %{ "vpaddq $dst,$src1,$src2\t! add packed2L" %}
5319 ins_encode %{
5320 int vector_len = 0;
5321 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5322 %}
5323 ins_pipe( pipe_slow );
5324 %}
5325
5326 instruct vadd2L_mem(vecX dst, vecX src, memory mem) %{
5327 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
5328 match(Set dst (AddVL src (LoadVector mem)));
5329 format %{ "vpaddq $dst,$src,$mem\t! add packed2L" %}
5330 ins_encode %{
5331 int vector_len = 0;
5332 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5333 %}
5334 ins_pipe( pipe_slow );
5335 %}
5336
5337 instruct vadd4L_reg(vecY dst, vecY src1, vecY src2) %{
5338 predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
5339 match(Set dst (AddVL src1 src2));
5340 format %{ "vpaddq $dst,$src1,$src2\t! add packed4L" %}
5341 ins_encode %{
5342 int vector_len = 1;
5343 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5344 %}
5345 ins_pipe( pipe_slow );
5346 %}
5347
5348 instruct vadd4L_mem(vecY dst, vecY src, memory mem) %{
5349 predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
5350 match(Set dst (AddVL src (LoadVector mem)));
5351 format %{ "vpaddq $dst,$src,$mem\t! add packed4L" %}
5352 ins_encode %{
5353 int vector_len = 1;
5354 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5355 %}
5356 ins_pipe( pipe_slow );
5357 %}
5358
5359 instruct vadd8L_reg(vecZ dst, vecZ src1, vecZ src2) %{
5360 predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
5361 match(Set dst (AddVL src1 src2));
5362 format %{ "vpaddq $dst,$src1,$src2\t! add packed8L" %}
5363 ins_encode %{
5364 int vector_len = 2;
5365 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5366 %}
5367 ins_pipe( pipe_slow );
5368 %}
5369
5370 instruct vadd8L_mem(vecZ dst, vecZ src, memory mem) %{
5371 predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
5372 match(Set dst (AddVL src (LoadVector mem)));
5373 format %{ "vpaddq $dst,$src,$mem\t! add packed8L" %}
5374 ins_encode %{
5375 int vector_len = 2;
5376 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5377 %}
5378 ins_pipe( pipe_slow );
5379 %}
5380
5381 // Floats vector add
5382 instruct vadd2F(vecD dst, vecD src) %{
5383 predicate(n->as_Vector()->length() == 2);
5384 match(Set dst (AddVF dst src));
5385 format %{ "addps $dst,$src\t! add packed2F" %}
5386 ins_encode %{
5387 __ addps($dst$$XMMRegister, $src$$XMMRegister);
5388 %}
5389 ins_pipe( pipe_slow );
5390 %}
5391
5392 instruct vadd2F_reg(vecD dst, vecD src1, vecD src2) %{
5393 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
5394 match(Set dst (AddVF src1 src2));
5395 format %{ "vaddps $dst,$src1,$src2\t! add packed2F" %}
5396 ins_encode %{
5397 int vector_len = 0;
5398 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5399 %}
5400 ins_pipe( pipe_slow );
5401 %}
5402
5403 instruct vadd4F(vecX dst, vecX src) %{
5404 predicate(n->as_Vector()->length() == 4);
5405 match(Set dst (AddVF dst src));
5406 format %{ "addps $dst,$src\t! add packed4F" %}
5407 ins_encode %{
5408 __ addps($dst$$XMMRegister, $src$$XMMRegister);
5409 %}
5410 ins_pipe( pipe_slow );
5411 %}
5412
5413 instruct vadd4F_reg(vecX dst, vecX src1, vecX src2) %{
5414 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
5415 match(Set dst (AddVF src1 src2));
5416 format %{ "vaddps $dst,$src1,$src2\t! add packed4F" %}
5417 ins_encode %{
5418 int vector_len = 0;
5419 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5420 %}
5421 ins_pipe( pipe_slow );
5422 %}
5423
5424 instruct vadd4F_mem(vecX dst, vecX src, memory mem) %{
5425 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
5426 match(Set dst (AddVF src (LoadVector mem)));
5427 format %{ "vaddps $dst,$src,$mem\t! add packed4F" %}
5428 ins_encode %{
5429 int vector_len = 0;
5430 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5431 %}
5432 ins_pipe( pipe_slow );
5433 %}
5434
5435 instruct vadd8F_reg(vecY dst, vecY src1, vecY src2) %{
5436 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
5437 match(Set dst (AddVF src1 src2));
5438 format %{ "vaddps $dst,$src1,$src2\t! add packed8F" %}
5439 ins_encode %{
5440 int vector_len = 1;
5441 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5442 %}
5443 ins_pipe( pipe_slow );
5444 %}
5445
5446 instruct vadd8F_mem(vecY dst, vecY src, memory mem) %{
5447 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
5448 match(Set dst (AddVF src (LoadVector mem)));
5449 format %{ "vaddps $dst,$src,$mem\t! add packed8F" %}
5450 ins_encode %{
5451 int vector_len = 1;
5452 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5453 %}
5454 ins_pipe( pipe_slow );
5455 %}
5456
5457 instruct vadd16F_reg(vecZ dst, vecZ src1, vecZ src2) %{
5458 predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
5459 match(Set dst (AddVF src1 src2));
5460 format %{ "vaddps $dst,$src1,$src2\t! add packed16F" %}
5461 ins_encode %{
5462 int vector_len = 2;
5463 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5464 %}
5465 ins_pipe( pipe_slow );
5466 %}
5467
5468 instruct vadd16F_mem(vecZ dst, vecZ src, memory mem) %{
5469 predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
5470 match(Set dst (AddVF src (LoadVector mem)));
5471 format %{ "vaddps $dst,$src,$mem\t! add packed16F" %}
5472 ins_encode %{
5473 int vector_len = 2;
5474 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5475 %}
5476 ins_pipe( pipe_slow );
5477 %}
5478
5479 // Doubles vector add
5480 instruct vadd2D(vecX dst, vecX src) %{
5481 predicate(n->as_Vector()->length() == 2);
5482 match(Set dst (AddVD dst src));
5483 format %{ "addpd $dst,$src\t! add packed2D" %}
5484 ins_encode %{
5485 __ addpd($dst$$XMMRegister, $src$$XMMRegister);
5486 %}
5487 ins_pipe( pipe_slow );
5488 %}
5489
5490 instruct vadd2D_reg(vecX dst, vecX src1, vecX src2) %{
5491 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
5492 match(Set dst (AddVD src1 src2));
5493 format %{ "vaddpd $dst,$src1,$src2\t! add packed2D" %}
5494 ins_encode %{
5495 int vector_len = 0;
5496 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5497 %}
5498 ins_pipe( pipe_slow );
5499 %}
5500
5501 instruct vadd2D_mem(vecX dst, vecX src, memory mem) %{
5502 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
5503 match(Set dst (AddVD src (LoadVector mem)));
5504 format %{ "vaddpd $dst,$src,$mem\t! add packed2D" %}
5505 ins_encode %{
5506 int vector_len = 0;
5507 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5508 %}
5509 ins_pipe( pipe_slow );
5510 %}
5511
5512 instruct vadd4D_reg(vecY dst, vecY src1, vecY src2) %{
5513 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
5514 match(Set dst (AddVD src1 src2));
5515 format %{ "vaddpd $dst,$src1,$src2\t! add packed4D" %}
5516 ins_encode %{
5517 int vector_len = 1;
5518 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5519 %}
5520 ins_pipe( pipe_slow );
5521 %}
5522
5523 instruct vadd4D_mem(vecY dst, vecY src, memory mem) %{
5524 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
5525 match(Set dst (AddVD src (LoadVector mem)));
5526 format %{ "vaddpd $dst,$src,$mem\t! add packed4D" %}
5527 ins_encode %{
5528 int vector_len = 1;
5529 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5530 %}
5531 ins_pipe( pipe_slow );
5532 %}
5533
5534 instruct vadd8D_reg(vecZ dst, vecZ src1, vecZ src2) %{
5535 predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
5536 match(Set dst (AddVD src1 src2));
5537 format %{ "vaddpd $dst,$src1,$src2\t! add packed8D" %}
5538 ins_encode %{
5539 int vector_len = 2;
5540 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5541 %}
5542 ins_pipe( pipe_slow );
5543 %}
5544
5545 instruct vadd8D_mem(vecZ dst, vecZ src, memory mem) %{
5546 predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
5547 match(Set dst (AddVD src (LoadVector mem)));
5548 format %{ "vaddpd $dst,$src,$mem\t! add packed8D" %}
5549 ins_encode %{
5550 int vector_len = 2;
5551 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5552 %}
5553 ins_pipe( pipe_slow );
5554 %}
5555
5556 // --------------------------------- SUB --------------------------------------
5557
5558 // Bytes vector sub
5559 instruct vsub4B(vecS dst, vecS src) %{
5560 predicate(n->as_Vector()->length() == 4);
5561 match(Set dst (SubVB dst src));
5562 format %{ "psubb $dst,$src\t! sub packed4B" %}
5563 ins_encode %{
5564 __ psubb($dst$$XMMRegister, $src$$XMMRegister);
5565 %}
5566 ins_pipe( pipe_slow );
5567 %}
5568
5569 instruct vsub4B_reg(vecS dst, vecS src1, vecS src2) %{
5570 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
5571 match(Set dst (SubVB src1 src2));
5572 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %}
5573 ins_encode %{
5574 int vector_len = 0;
5575 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5576 %}
5577 ins_pipe( pipe_slow );
5578 %}
5579
5580 instruct vsub8B(vecD dst, vecD src) %{
5581 predicate(n->as_Vector()->length() == 8);
5582 match(Set dst (SubVB dst src));
5583 format %{ "psubb $dst,$src\t! sub packed8B" %}
5584 ins_encode %{
5585 __ psubb($dst$$XMMRegister, $src$$XMMRegister);
5586 %}
5587 ins_pipe( pipe_slow );
5588 %}
5589
5590 instruct vsub8B_reg(vecD dst, vecD src1, vecD src2) %{
5591 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
5592 match(Set dst (SubVB src1 src2));
5593 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %}
5594 ins_encode %{
5595 int vector_len = 0;
5596 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5597 %}
5598 ins_pipe( pipe_slow );
5599 %}
5600
5601 instruct vsub16B(vecX dst, vecX src) %{
5602 predicate(n->as_Vector()->length() == 16);
5603 match(Set dst (SubVB dst src));
5604 format %{ "psubb $dst,$src\t! sub packed16B" %}
5605 ins_encode %{
5606 __ psubb($dst$$XMMRegister, $src$$XMMRegister);
5607 %}
5608 ins_pipe( pipe_slow );
5609 %}
5610
5611 instruct vsub16B_reg(vecX dst, vecX src1, vecX src2) %{
5612 predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
5613 match(Set dst (SubVB src1 src2));
5614 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %}
5615 ins_encode %{
5616 int vector_len = 0;
5617 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5618 %}
5619 ins_pipe( pipe_slow );
5620 %}
5621
5622 instruct vsub16B_mem(vecX dst, vecX src, memory mem) %{
5623 predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
5624 match(Set dst (SubVB src (LoadVector mem)));
5625 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %}
5626 ins_encode %{
5627 int vector_len = 0;
5628 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5629 %}
5630 ins_pipe( pipe_slow );
5631 %}
5632
5633 instruct vsub32B_reg(vecY dst, vecY src1, vecY src2) %{
5634 predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
5635 match(Set dst (SubVB src1 src2));
5636 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %}
5637 ins_encode %{
5638 int vector_len = 1;
5639 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5640 %}
5641 ins_pipe( pipe_slow );
5642 %}
5643
5644 instruct vsub32B_mem(vecY dst, vecY src, memory mem) %{
5645 predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
5646 match(Set dst (SubVB src (LoadVector mem)));
5647 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %}
5648 ins_encode %{
5649 int vector_len = 1;
5650 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5651 %}
5652 ins_pipe( pipe_slow );
5653 %}
5654
5655 instruct vsub64B_reg(vecZ dst, vecZ src1, vecZ src2) %{
5656 predicate(UseAVX > 2 && n->as_Vector()->length() == 64);
5657 match(Set dst (SubVB src1 src2));
5658 format %{ "vpsubb $dst,$src1,$src2\t! sub packed64B" %}
5659 ins_encode %{
5660 int vector_len = 2;
5661 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5662 %}
5663 ins_pipe( pipe_slow );
5664 %}
5665
5666 instruct vsub64B_mem(vecZ dst, vecZ src, memory mem) %{
5667 predicate(UseAVX > 2 && n->as_Vector()->length() == 64);
5668 match(Set dst (SubVB src (LoadVector mem)));
5669 format %{ "vpsubb $dst,$src,$mem\t! sub packed64B" %}
5670 ins_encode %{
5671 int vector_len = 2;
5672 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5673 %}
5674 ins_pipe( pipe_slow );
5675 %}
5676
5677 // Shorts/Chars vector sub
5678 instruct vsub2S(vecS dst, vecS src) %{
5679 predicate(n->as_Vector()->length() == 2);
5680 match(Set dst (SubVS dst src));
5681 format %{ "psubw $dst,$src\t! sub packed2S" %}
5682 ins_encode %{
5683 __ psubw($dst$$XMMRegister, $src$$XMMRegister);
5684 %}
5685 ins_pipe( pipe_slow );
5686 %}
5687
5688 instruct vsub2S_reg(vecS dst, vecS src1, vecS src2) %{
5689 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
5690 match(Set dst (SubVS src1 src2));
5691 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %}
5692 ins_encode %{
5693 int vector_len = 0;
5694 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5695 %}
5696 ins_pipe( pipe_slow );
5697 %}
5698
5699 instruct vsub4S(vecD dst, vecD src) %{
5700 predicate(n->as_Vector()->length() == 4);
5701 match(Set dst (SubVS dst src));
5702 format %{ "psubw $dst,$src\t! sub packed4S" %}
5703 ins_encode %{
5704 __ psubw($dst$$XMMRegister, $src$$XMMRegister);
5705 %}
5706 ins_pipe( pipe_slow );
5707 %}
5708
5709 instruct vsub4S_reg(vecD dst, vecD src1, vecD src2) %{
5710 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
5711 match(Set dst (SubVS src1 src2));
5712 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %}
5713 ins_encode %{
5714 int vector_len = 0;
5715 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5716 %}
5717 ins_pipe( pipe_slow );
5718 %}
5719
5720 instruct vsub8S(vecX dst, vecX src) %{
5721 predicate(n->as_Vector()->length() == 8);
5722 match(Set dst (SubVS dst src));
5723 format %{ "psubw $dst,$src\t! sub packed8S" %}
5724 ins_encode %{
5725 __ psubw($dst$$XMMRegister, $src$$XMMRegister);
5726 %}
5727 ins_pipe( pipe_slow );
5728 %}
5729
5730 instruct vsub8S_reg(vecX dst, vecX src1, vecX src2) %{
5731 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
5732 match(Set dst (SubVS src1 src2));
5733 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %}
5734 ins_encode %{
5735 int vector_len = 0;
5736 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5737 %}
5738 ins_pipe( pipe_slow );
5739 %}
5740
5741 instruct vsub8S_mem(vecX dst, vecX src, memory mem) %{
5742 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
5743 match(Set dst (SubVS src (LoadVector mem)));
5744 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %}
5745 ins_encode %{
5746 int vector_len = 0;
5747 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5748 %}
5749 ins_pipe( pipe_slow );
5750 %}
5751
5752 instruct vsub16S_reg(vecY dst, vecY src1, vecY src2) %{
5753 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
5754 match(Set dst (SubVS src1 src2));
5755 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %}
5756 ins_encode %{
5757 int vector_len = 1;
5758 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5759 %}
5760 ins_pipe( pipe_slow );
5761 %}
5762
5763 instruct vsub16S_mem(vecY dst, vecY src, memory mem) %{
5764 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
5765 match(Set dst (SubVS src (LoadVector mem)));
5766 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %}
5767 ins_encode %{
5768 int vector_len = 1;
5769 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5770 %}
5771 ins_pipe( pipe_slow );
5772 %}
5773
5774 instruct vsub32S_reg(vecZ dst, vecZ src1, vecZ src2) %{
5775 predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
5776 match(Set dst (SubVS src1 src2));
5777 format %{ "vpsubw $dst,$src1,$src2\t! sub packed32S" %}
5778 ins_encode %{
5779 int vector_len = 2;
5780 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5781 %}
5782 ins_pipe( pipe_slow );
5783 %}
5784
5785 instruct vsub32S_mem(vecZ dst, vecZ src, memory mem) %{
5786 predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
5787 match(Set dst (SubVS src (LoadVector mem)));
5788 format %{ "vpsubw $dst,$src,$mem\t! sub packed32S" %}
5789 ins_encode %{
5790 int vector_len = 2;
5791 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5792 %}
5793 ins_pipe( pipe_slow );
5794 %}
5795
5796 // Integers vector sub
5797 instruct vsub2I(vecD dst, vecD src) %{
5798 predicate(n->as_Vector()->length() == 2);
5799 match(Set dst (SubVI dst src));
5800 format %{ "psubd $dst,$src\t! sub packed2I" %}
5801 ins_encode %{
5802 __ psubd($dst$$XMMRegister, $src$$XMMRegister);
5803 %}
5804 ins_pipe( pipe_slow );
5805 %}
5806
5807 instruct vsub2I_reg(vecD dst, vecD src1, vecD src2) %{
5808 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
5809 match(Set dst (SubVI src1 src2));
5810 format %{ "vpsubd $dst,$src1,$src2\t! sub packed2I" %}
5811 ins_encode %{
5812 int vector_len = 0;
5813 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5814 %}
5815 ins_pipe( pipe_slow );
5816 %}
5817
5818 instruct vsub4I(vecX dst, vecX src) %{
5819 predicate(n->as_Vector()->length() == 4);
5820 match(Set dst (SubVI dst src));
5821 format %{ "psubd $dst,$src\t! sub packed4I" %}
5822 ins_encode %{
5823 __ psubd($dst$$XMMRegister, $src$$XMMRegister);
5824 %}
5825 ins_pipe( pipe_slow );
5826 %}
5827
5828 instruct vsub4I_reg(vecX dst, vecX src1, vecX src2) %{
5829 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
5830 match(Set dst (SubVI src1 src2));
5831 format %{ "vpsubd $dst,$src1,$src2\t! sub packed4I" %}
5832 ins_encode %{
5833 int vector_len = 0;
5834 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5835 %}
5836 ins_pipe( pipe_slow );
5837 %}
5838
5839 instruct vsub4I_mem(vecX dst, vecX src, memory mem) %{
5840 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
5841 match(Set dst (SubVI src (LoadVector mem)));
5842 format %{ "vpsubd $dst,$src,$mem\t! sub packed4I" %}
5843 ins_encode %{
5844 int vector_len = 0;
5845 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5846 %}
5847 ins_pipe( pipe_slow );
5848 %}
5849
5850 instruct vsub8I_reg(vecY dst, vecY src1, vecY src2) %{
5851 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
5852 match(Set dst (SubVI src1 src2));
5853 format %{ "vpsubd $dst,$src1,$src2\t! sub packed8I" %}
5854 ins_encode %{
5855 int vector_len = 1;
5856 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5857 %}
5858 ins_pipe( pipe_slow );
5859 %}
5860
5861 instruct vsub8I_mem(vecY dst, vecY src, memory mem) %{
5862 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
5863 match(Set dst (SubVI src (LoadVector mem)));
5864 format %{ "vpsubd $dst,$src,$mem\t! sub packed8I" %}
5865 ins_encode %{
5866 int vector_len = 1;
5867 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5868 %}
5869 ins_pipe( pipe_slow );
5870 %}
5871
5872 instruct vsub16I_reg(vecZ dst, vecZ src1, vecZ src2) %{
5873 predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
5874 match(Set dst (SubVI src1 src2));
5875 format %{ "vpsubd $dst,$src1,$src2\t! sub packed16I" %}
5876 ins_encode %{
5877 int vector_len = 2;
5878 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5879 %}
5880 ins_pipe( pipe_slow );
5881 %}
5882
5883 instruct vsub16I_mem(vecZ dst, vecZ src, memory mem) %{
5884 predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
5885 match(Set dst (SubVI src (LoadVector mem)));
5886 format %{ "vpsubd $dst,$src,$mem\t! sub packed16I" %}
5887 ins_encode %{
5888 int vector_len = 2;
5889 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5890 %}
5891 ins_pipe( pipe_slow );
5892 %}
5893
5894 // Longs vector sub
5895 instruct vsub2L(vecX dst, vecX src) %{
5896 predicate(n->as_Vector()->length() == 2);
5897 match(Set dst (SubVL dst src));
5898 format %{ "psubq $dst,$src\t! sub packed2L" %}
5899 ins_encode %{
5900 __ psubq($dst$$XMMRegister, $src$$XMMRegister);
5901 %}
5902 ins_pipe( pipe_slow );
5903 %}
5904
5905 instruct vsub2L_reg(vecX dst, vecX src1, vecX src2) %{
5906 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
5907 match(Set dst (SubVL src1 src2));
5908 format %{ "vpsubq $dst,$src1,$src2\t! sub packed2L" %}
5909 ins_encode %{
5910 int vector_len = 0;
5911 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5912 %}
5913 ins_pipe( pipe_slow );
5914 %}
5915
5916 instruct vsub2L_mem(vecX dst, vecX src, memory mem) %{
5917 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
5918 match(Set dst (SubVL src (LoadVector mem)));
5919 format %{ "vpsubq $dst,$src,$mem\t! sub packed2L" %}
5920 ins_encode %{
5921 int vector_len = 0;
5922 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5923 %}
5924 ins_pipe( pipe_slow );
5925 %}
5926
5927 instruct vsub4L_reg(vecY dst, vecY src1, vecY src2) %{
5928 predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
5929 match(Set dst (SubVL src1 src2));
5930 format %{ "vpsubq $dst,$src1,$src2\t! sub packed4L" %}
5931 ins_encode %{
5932 int vector_len = 1;
5933 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5934 %}
5935 ins_pipe( pipe_slow );
5936 %}
5937
5938 instruct vsub4L_mem(vecY dst, vecY src, memory mem) %{
5939 predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
5940 match(Set dst (SubVL src (LoadVector mem)));
5941 format %{ "vpsubq $dst,$src,$mem\t! sub packed4L" %}
5942 ins_encode %{
5943 int vector_len = 1;
5944 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5945 %}
5946 ins_pipe( pipe_slow );
5947 %}
5948
5949 instruct vsub8L_reg(vecZ dst, vecZ src1, vecZ src2) %{
5950 predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
5951 match(Set dst (SubVL src1 src2));
5952 format %{ "vpsubq $dst,$src1,$src2\t! sub packed8L" %}
5953 ins_encode %{
5954 int vector_len = 2;
5955 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5956 %}
5957 ins_pipe( pipe_slow );
5958 %}
5959
5960 instruct vsub8L_mem(vecZ dst, vecZ src, memory mem) %{
5961 predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
5962 match(Set dst (SubVL src (LoadVector mem)));
5963 format %{ "vpsubq $dst,$src,$mem\t! sub packed8L" %}
5964 ins_encode %{
5965 int vector_len = 2;
5966 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
5967 %}
5968 ins_pipe( pipe_slow );
5969 %}
5970
5971 // Floats vector sub
5972 instruct vsub2F(vecD dst, vecD src) %{
5973 predicate(n->as_Vector()->length() == 2);
5974 match(Set dst (SubVF dst src));
5975 format %{ "subps $dst,$src\t! sub packed2F" %}
5976 ins_encode %{
5977 __ subps($dst$$XMMRegister, $src$$XMMRegister);
5978 %}
5979 ins_pipe( pipe_slow );
5980 %}
5981
5982 instruct vsub2F_reg(vecD dst, vecD src1, vecD src2) %{
5983 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
5984 match(Set dst (SubVF src1 src2));
5985 format %{ "vsubps $dst,$src1,$src2\t! sub packed2F" %}
5986 ins_encode %{
5987 int vector_len = 0;
5988 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
5989 %}
5990 ins_pipe( pipe_slow );
5991 %}
5992
5993 instruct vsub4F(vecX dst, vecX src) %{
5994 predicate(n->as_Vector()->length() == 4);
5995 match(Set dst (SubVF dst src));
5996 format %{ "subps $dst,$src\t! sub packed4F" %}
5997 ins_encode %{
5998 __ subps($dst$$XMMRegister, $src$$XMMRegister);
5999 %}
6000 ins_pipe( pipe_slow );
6001 %}
6002
6003 instruct vsub4F_reg(vecX dst, vecX src1, vecX src2) %{
6004 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
6005 match(Set dst (SubVF src1 src2));
6006 format %{ "vsubps $dst,$src1,$src2\t! sub packed4F" %}
6007 ins_encode %{
6008 int vector_len = 0;
6009 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6010 %}
6011 ins_pipe( pipe_slow );
6012 %}
6013
6014 instruct vsub4F_mem(vecX dst, vecX src, memory mem) %{
6015 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
6016 match(Set dst (SubVF src (LoadVector mem)));
6017 format %{ "vsubps $dst,$src,$mem\t! sub packed4F" %}
6018 ins_encode %{
6019 int vector_len = 0;
6020 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6021 %}
6022 ins_pipe( pipe_slow );
6023 %}
6024
6025 instruct vsub8F_reg(vecY dst, vecY src1, vecY src2) %{
6026 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
6027 match(Set dst (SubVF src1 src2));
6028 format %{ "vsubps $dst,$src1,$src2\t! sub packed8F" %}
6029 ins_encode %{
6030 int vector_len = 1;
6031 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6032 %}
6033 ins_pipe( pipe_slow );
6034 %}
6035
6036 instruct vsub8F_mem(vecY dst, vecY src, memory mem) %{
6037 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
6038 match(Set dst (SubVF src (LoadVector mem)));
6039 format %{ "vsubps $dst,$src,$mem\t! sub packed8F" %}
6040 ins_encode %{
6041 int vector_len = 1;
6042 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6043 %}
6044 ins_pipe( pipe_slow );
6045 %}
6046
6047 instruct vsub16F_reg(vecZ dst, vecZ src1, vecZ src2) %{
6048 predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
6049 match(Set dst (SubVF src1 src2));
6050 format %{ "vsubps $dst,$src1,$src2\t! sub packed16F" %}
6051 ins_encode %{
6052 int vector_len = 2;
6053 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6054 %}
6055 ins_pipe( pipe_slow );
6056 %}
6057
6058 instruct vsub16F_mem(vecZ dst, vecZ src, memory mem) %{
6059 predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
6060 match(Set dst (SubVF src (LoadVector mem)));
6061 format %{ "vsubps $dst,$src,$mem\t! sub packed16F" %}
6062 ins_encode %{
6063 int vector_len = 2;
6064 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6065 %}
6066 ins_pipe( pipe_slow );
6067 %}
6068
6069 // Doubles vector sub
6070 instruct vsub2D(vecX dst, vecX src) %{
6071 predicate(n->as_Vector()->length() == 2);
6072 match(Set dst (SubVD dst src));
6073 format %{ "subpd $dst,$src\t! sub packed2D" %}
6074 ins_encode %{
6075 __ subpd($dst$$XMMRegister, $src$$XMMRegister);
6076 %}
6077 ins_pipe( pipe_slow );
6078 %}
6079
6080 instruct vsub2D_reg(vecX dst, vecX src1, vecX src2) %{
6081 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
6082 match(Set dst (SubVD src1 src2));
6083 format %{ "vsubpd $dst,$src1,$src2\t! sub packed2D" %}
6084 ins_encode %{
6085 int vector_len = 0;
6086 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6087 %}
6088 ins_pipe( pipe_slow );
6089 %}
6090
6091 instruct vsub2D_mem(vecX dst, vecX src, memory mem) %{
6092 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
6093 match(Set dst (SubVD src (LoadVector mem)));
6094 format %{ "vsubpd $dst,$src,$mem\t! sub packed2D" %}
6095 ins_encode %{
6096 int vector_len = 0;
6097 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6098 %}
6099 ins_pipe( pipe_slow );
6100 %}
6101
6102 instruct vsub4D_reg(vecY dst, vecY src1, vecY src2) %{
6103 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
6104 match(Set dst (SubVD src1 src2));
6105 format %{ "vsubpd $dst,$src1,$src2\t! sub packed4D" %}
6106 ins_encode %{
6107 int vector_len = 1;
6108 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6109 %}
6110 ins_pipe( pipe_slow );
6111 %}
6112
6113 instruct vsub4D_mem(vecY dst, vecY src, memory mem) %{
6114 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
6115 match(Set dst (SubVD src (LoadVector mem)));
6116 format %{ "vsubpd $dst,$src,$mem\t! sub packed4D" %}
6117 ins_encode %{
6118 int vector_len = 1;
6119 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6120 %}
6121 ins_pipe( pipe_slow );
6122 %}
6123
6124 instruct vsub8D_reg(vecZ dst, vecZ src1, vecZ src2) %{
6125 predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
6126 match(Set dst (SubVD src1 src2));
6127 format %{ "vsubpd $dst,$src1,$src2\t! sub packed8D" %}
6128 ins_encode %{
6129 int vector_len = 2;
6130 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6131 %}
6132 ins_pipe( pipe_slow );
6133 %}
6134
6135 instruct vsub8D_mem(vecZ dst, vecZ src, memory mem) %{
6136 predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
6137 match(Set dst (SubVD src (LoadVector mem)));
6138 format %{ "vsubpd $dst,$src,$mem\t! sub packed8D" %}
6139 ins_encode %{
6140 int vector_len = 2;
6141 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6142 %}
6143 ins_pipe( pipe_slow );
6144 %}
6145
6146 // --------------------------------- MUL --------------------------------------
6147
6148 // Shorts/Chars vector mul
6149 instruct vmul2S(vecS dst, vecS src) %{
6150 predicate(n->as_Vector()->length() == 2);
6151 match(Set dst (MulVS dst src));
6152 format %{ "pmullw $dst,$src\t! mul packed2S" %}
6153 ins_encode %{
6154 __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
6155 %}
6156 ins_pipe( pipe_slow );
6157 %}
6158
6159 instruct vmul2S_reg(vecS dst, vecS src1, vecS src2) %{
6160 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
6161 match(Set dst (MulVS src1 src2));
6162 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %}
6163 ins_encode %{
6164 int vector_len = 0;
6165 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6166 %}
6167 ins_pipe( pipe_slow );
6168 %}
6169
6170 instruct vmul4S(vecD dst, vecD src) %{
6171 predicate(n->as_Vector()->length() == 4);
6172 match(Set dst (MulVS dst src));
6173 format %{ "pmullw $dst,$src\t! mul packed4S" %}
6174 ins_encode %{
6175 __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
6176 %}
6177 ins_pipe( pipe_slow );
6178 %}
6179
6180 instruct vmul4S_reg(vecD dst, vecD src1, vecD src2) %{
6181 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
6182 match(Set dst (MulVS src1 src2));
6183 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %}
6184 ins_encode %{
6185 int vector_len = 0;
6186 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6187 %}
6188 ins_pipe( pipe_slow );
6189 %}
6190
6191 instruct vmul8S(vecX dst, vecX src) %{
6192 predicate(n->as_Vector()->length() == 8);
6193 match(Set dst (MulVS dst src));
6194 format %{ "pmullw $dst,$src\t! mul packed8S" %}
6195 ins_encode %{
6196 __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
6197 %}
6198 ins_pipe( pipe_slow );
6199 %}
6200
6201 instruct vmul8S_reg(vecX dst, vecX src1, vecX src2) %{
6202 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
6203 match(Set dst (MulVS src1 src2));
6204 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %}
6205 ins_encode %{
6206 int vector_len = 0;
6207 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6208 %}
6209 ins_pipe( pipe_slow );
6210 %}
6211
6212 instruct vmul8S_mem(vecX dst, vecX src, memory mem) %{
6213 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
6214 match(Set dst (MulVS src (LoadVector mem)));
6215 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %}
6216 ins_encode %{
6217 int vector_len = 0;
6218 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6219 %}
6220 ins_pipe( pipe_slow );
6221 %}
6222
6223 instruct vmul16S_reg(vecY dst, vecY src1, vecY src2) %{
6224 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
6225 match(Set dst (MulVS src1 src2));
6226 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %}
6227 ins_encode %{
6228 int vector_len = 1;
6229 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6230 %}
6231 ins_pipe( pipe_slow );
6232 %}
6233
6234 instruct vmul16S_mem(vecY dst, vecY src, memory mem) %{
6235 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
6236 match(Set dst (MulVS src (LoadVector mem)));
6237 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %}
6238 ins_encode %{
6239 int vector_len = 1;
6240 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6241 %}
6242 ins_pipe( pipe_slow );
6243 %}
6244
6245 instruct vmul32S_reg(vecZ dst, vecZ src1, vecZ src2) %{
6246 predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
6247 match(Set dst (MulVS src1 src2));
6248 format %{ "vpmullw $dst,$src1,$src2\t! mul packed32S" %}
6249 ins_encode %{
6250 int vector_len = 2;
6251 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6252 %}
6253 ins_pipe( pipe_slow );
6254 %}
6255
6256 instruct vmul32S_mem(vecZ dst, vecZ src, memory mem) %{
6257 predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
6258 match(Set dst (MulVS src (LoadVector mem)));
6259 format %{ "vpmullw $dst,$src,$mem\t! mul packed32S" %}
6260 ins_encode %{
6261 int vector_len = 2;
6262 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6263 %}
6264 ins_pipe( pipe_slow );
6265 %}
6266
6267 // Integers vector mul (sse4_1)
6268 instruct vmul2I(vecD dst, vecD src) %{
6269 predicate(UseSSE > 3 && n->as_Vector()->length() == 2);
6270 match(Set dst (MulVI dst src));
6271 format %{ "pmulld $dst,$src\t! mul packed2I" %}
6272 ins_encode %{
6273 __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
6274 %}
6275 ins_pipe( pipe_slow );
6276 %}
6277
6278 instruct vmul2I_reg(vecD dst, vecD src1, vecD src2) %{
6279 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
6280 match(Set dst (MulVI src1 src2));
6281 format %{ "vpmulld $dst,$src1,$src2\t! mul packed2I" %}
6282 ins_encode %{
6283 int vector_len = 0;
6284 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6285 %}
6286 ins_pipe( pipe_slow );
6287 %}
6288
6289 instruct vmul2L_reg(vecX dst, vecX src1, vecX src2) %{
6290 predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && VM_Version::supports_avx512dq());
6291 match(Set dst (MulVL src1 src2));
6292 format %{ "vpmullq $dst,$src1,$src2\t! mul packed2L" %}
6293 ins_encode %{
6294 int vector_len = 0;
6295 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6296 %}
6297 ins_pipe( pipe_slow );
6298 %}
6299
6300 instruct vmul4I(vecX dst, vecX src) %{
6301 predicate(UseSSE > 3 && n->as_Vector()->length() == 4);
6302 match(Set dst (MulVI dst src));
6303 format %{ "pmulld $dst,$src\t! mul packed4I" %}
6304 ins_encode %{
6305 __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
6306 %}
6307 ins_pipe( pipe_slow );
6308 %}
6309
6310 instruct vmul4I_reg(vecX dst, vecX src1, vecX src2) %{
6311 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
6312 match(Set dst (MulVI src1 src2));
6313 format %{ "vpmulld $dst,$src1,$src2\t! mul packed4I" %}
6314 ins_encode %{
6315 int vector_len = 0;
6316 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6317 %}
6318 ins_pipe( pipe_slow );
6319 %}
6320
6321 instruct vmul4I_mem(vecX dst, vecX src, memory mem) %{
6322 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
6323 match(Set dst (MulVI src (LoadVector mem)));
6324 format %{ "vpmulld $dst,$src,$mem\t! mul packed4I" %}
6325 ins_encode %{
6326 int vector_len = 0;
6327 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6328 %}
6329 ins_pipe( pipe_slow );
6330 %}
6331
6332 instruct vmul4L_reg(vecY dst, vecY src1, vecY src2) %{
6333 predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && VM_Version::supports_avx512dq());
6334 match(Set dst (MulVL src1 src2));
6335 format %{ "vpmullq $dst,$src1,$src2\t! mul packed4L" %}
6336 ins_encode %{
6337 int vector_len = 1;
6338 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6339 %}
6340 ins_pipe( pipe_slow );
6341 %}
6342
6343 instruct vmul4L_mem(vecY dst, vecY src, memory mem) %{
6344 predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && VM_Version::supports_avx512dq());
6345 match(Set dst (MulVL src (LoadVector mem)));
6346 format %{ "vpmullq $dst,$src,$mem\t! mul packed4L" %}
6347 ins_encode %{
6348 int vector_len = 1;
6349 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6350 %}
6351 ins_pipe( pipe_slow );
6352 %}
6353
6354 instruct vmul8I_reg(vecY dst, vecY src1, vecY src2) %{
6355 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
6356 match(Set dst (MulVI src1 src2));
6357 format %{ "vpmulld $dst,$src1,$src2\t! mul packed8I" %}
6358 ins_encode %{
6359 int vector_len = 1;
6360 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6361 %}
6362 ins_pipe( pipe_slow );
6363 %}
6364
6365 instruct vmul8L_reg(vecZ dst, vecZ src1, vecZ src2) %{
6366 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq());
6367 match(Set dst (MulVL src1 src2));
6368 format %{ "vpmullq $dst,$src1,$src2\t! mul packed8L" %}
6369 ins_encode %{
6370 int vector_len = 2;
6371 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6372 %}
6373 ins_pipe( pipe_slow );
6374 %}
6375
6376 instruct vmul16I_reg(vecZ dst, vecZ src1, vecZ src2) %{
6377 predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
6378 match(Set dst (MulVI src1 src2));
6379 format %{ "vpmulld $dst,$src1,$src2\t! mul packed16I" %}
6380 ins_encode %{
6381 int vector_len = 2;
6382 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6383 %}
6384 ins_pipe( pipe_slow );
6385 %}
6386
6387 instruct vmul8I_mem(vecY dst, vecY src, memory mem) %{
6388 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
6389 match(Set dst (MulVI src (LoadVector mem)));
6390 format %{ "vpmulld $dst,$src,$mem\t! mul packed8I" %}
6391 ins_encode %{
6392 int vector_len = 1;
6393 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6394 %}
6395 ins_pipe( pipe_slow );
6396 %}
6397
6398 instruct vmul8L_mem(vecZ dst, vecZ src, memory mem) %{
6399 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq());
6400 match(Set dst (MulVL src (LoadVector mem)));
6401 format %{ "vpmullq $dst,$src,$mem\t! mul packed8L" %}
6402 ins_encode %{
6403 int vector_len = 2;
6404 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6405 %}
6406 ins_pipe( pipe_slow );
6407 %}
6408
6409 instruct vmul16I_mem(vecZ dst, vecZ src, memory mem) %{
6410 predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
6411 match(Set dst (MulVI src (LoadVector mem)));
6412 format %{ "vpmulld $dst,$src,$mem\t! mul packed16I" %}
6413 ins_encode %{
6414 int vector_len = 2;
6415 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6416 %}
6417 ins_pipe( pipe_slow );
6418 %}
6419
6420 // Floats vector mul
6421 instruct vmul2F(vecD dst, vecD src) %{
6422 predicate(n->as_Vector()->length() == 2);
6423 match(Set dst (MulVF dst src));
6424 format %{ "mulps $dst,$src\t! mul packed2F" %}
6425 ins_encode %{
6426 __ mulps($dst$$XMMRegister, $src$$XMMRegister);
6427 %}
6428 ins_pipe( pipe_slow );
6429 %}
6430
6431 instruct vmul2F_reg(vecD dst, vecD src1, vecD src2) %{
6432 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
6433 match(Set dst (MulVF src1 src2));
6434 format %{ "vmulps $dst,$src1,$src2\t! mul packed2F" %}
6435 ins_encode %{
6436 int vector_len = 0;
6437 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6438 %}
6439 ins_pipe( pipe_slow );
6440 %}
6441
6442 instruct vmul4F(vecX dst, vecX src) %{
6443 predicate(n->as_Vector()->length() == 4);
6444 match(Set dst (MulVF dst src));
6445 format %{ "mulps $dst,$src\t! mul packed4F" %}
6446 ins_encode %{
6447 __ mulps($dst$$XMMRegister, $src$$XMMRegister);
6448 %}
6449 ins_pipe( pipe_slow );
6450 %}
6451
6452 instruct vmul4F_reg(vecX dst, vecX src1, vecX src2) %{
6453 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
6454 match(Set dst (MulVF src1 src2));
6455 format %{ "vmulps $dst,$src1,$src2\t! mul packed4F" %}
6456 ins_encode %{
6457 int vector_len = 0;
6458 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6459 %}
6460 ins_pipe( pipe_slow );
6461 %}
6462
6463 instruct vmul4F_mem(vecX dst, vecX src, memory mem) %{
6464 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
6465 match(Set dst (MulVF src (LoadVector mem)));
6466 format %{ "vmulps $dst,$src,$mem\t! mul packed4F" %}
6467 ins_encode %{
6468 int vector_len = 0;
6469 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6470 %}
6471 ins_pipe( pipe_slow );
6472 %}
6473
6474 instruct vmul8F_reg(vecY dst, vecY src1, vecY src2) %{
6475 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
6476 match(Set dst (MulVF src1 src2));
6477 format %{ "vmulps $dst,$src1,$src2\t! mul packed8F" %}
6478 ins_encode %{
6479 int vector_len = 1;
6480 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6481 %}
6482 ins_pipe( pipe_slow );
6483 %}
6484
6485 instruct vmul8F_mem(vecY dst, vecY src, memory mem) %{
6486 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
6487 match(Set dst (MulVF src (LoadVector mem)));
6488 format %{ "vmulps $dst,$src,$mem\t! mul packed8F" %}
6489 ins_encode %{
6490 int vector_len = 1;
6491 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6492 %}
6493 ins_pipe( pipe_slow );
6494 %}
6495
6496 instruct vmul16F_reg(vecZ dst, vecZ src1, vecZ src2) %{
6497 predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
6498 match(Set dst (MulVF src1 src2));
6499 format %{ "vmulps $dst,$src1,$src2\t! mul packed16F" %}
6500 ins_encode %{
6501 int vector_len = 2;
6502 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6503 %}
6504 ins_pipe( pipe_slow );
6505 %}
6506
6507 instruct vmul16F_mem(vecZ dst, vecZ src, memory mem) %{
6508 predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
6509 match(Set dst (MulVF src (LoadVector mem)));
6510 format %{ "vmulps $dst,$src,$mem\t! mul packed16F" %}
6511 ins_encode %{
6512 int vector_len = 2;
6513 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6514 %}
6515 ins_pipe( pipe_slow );
6516 %}
6517
6518 // Doubles vector mul
6519 instruct vmul2D(vecX dst, vecX src) %{
6520 predicate(n->as_Vector()->length() == 2);
6521 match(Set dst (MulVD dst src));
6522 format %{ "mulpd $dst,$src\t! mul packed2D" %}
6523 ins_encode %{
6524 __ mulpd($dst$$XMMRegister, $src$$XMMRegister);
6525 %}
6526 ins_pipe( pipe_slow );
6527 %}
6528
6529 instruct vmul2D_reg(vecX dst, vecX src1, vecX src2) %{
6530 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
6531 match(Set dst (MulVD src1 src2));
6532 format %{ "vmulpd $dst,$src1,$src2\t! mul packed2D" %}
6533 ins_encode %{
6534 int vector_len = 0;
6535 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6536 %}
6537 ins_pipe( pipe_slow );
6538 %}
6539
6540 instruct vmul2D_mem(vecX dst, vecX src, memory mem) %{
6541 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
6542 match(Set dst (MulVD src (LoadVector mem)));
6543 format %{ "vmulpd $dst,$src,$mem\t! mul packed2D" %}
6544 ins_encode %{
6545 int vector_len = 0;
6546 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6547 %}
6548 ins_pipe( pipe_slow );
6549 %}
6550
6551 instruct vmul4D_reg(vecY dst, vecY src1, vecY src2) %{
6552 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
6553 match(Set dst (MulVD src1 src2));
6554 format %{ "vmulpd $dst,$src1,$src2\t! mul packed4D" %}
6555 ins_encode %{
6556 int vector_len = 1;
6557 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6558 %}
6559 ins_pipe( pipe_slow );
6560 %}
6561
6562 instruct vmul4D_mem(vecY dst, vecY src, memory mem) %{
6563 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
6564 match(Set dst (MulVD src (LoadVector mem)));
6565 format %{ "vmulpd $dst,$src,$mem\t! mul packed4D" %}
6566 ins_encode %{
6567 int vector_len = 1;
6568 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6569 %}
6570 ins_pipe( pipe_slow );
6571 %}
6572
6573 instruct vmul8D_reg(vecZ dst, vecZ src1, vecZ src2) %{
6574 predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
6575 match(Set dst (MulVD src1 src2));
6576 format %{ "vmulpd $dst k0,$src1,$src2\t! mul packed8D" %}
6577 ins_encode %{
6578 int vector_len = 2;
6579 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6580 %}
6581 ins_pipe( pipe_slow );
6582 %}
6583
6584 instruct vmul8D_mem(vecZ dst, vecZ src, memory mem) %{
6585 predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
6586 match(Set dst (MulVD src (LoadVector mem)));
6587 format %{ "vmulpd $dst k0,$src,$mem\t! mul packed8D" %}
6588 ins_encode %{
6589 int vector_len = 2;
6590 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6591 %}
6592 ins_pipe( pipe_slow );
6593 %}
6594
6595 // --------------------------------- DIV --------------------------------------
6596
6597 // Floats vector div
6598 instruct vdiv2F(vecD dst, vecD src) %{
6599 predicate(n->as_Vector()->length() == 2);
6600 match(Set dst (DivVF dst src));
6601 format %{ "divps $dst,$src\t! div packed2F" %}
6602 ins_encode %{
6603 __ divps($dst$$XMMRegister, $src$$XMMRegister);
6604 %}
6605 ins_pipe( pipe_slow );
6606 %}
6607
6608 instruct vdiv2F_reg(vecD dst, vecD src1, vecD src2) %{
6609 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
6610 match(Set dst (DivVF src1 src2));
6611 format %{ "vdivps $dst,$src1,$src2\t! div packed2F" %}
6612 ins_encode %{
6613 int vector_len = 0;
6614 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6615 %}
6616 ins_pipe( pipe_slow );
6617 %}
6618
6619 instruct vdiv4F(vecX dst, vecX src) %{
6620 predicate(n->as_Vector()->length() == 4);
6621 match(Set dst (DivVF dst src));
6622 format %{ "divps $dst,$src\t! div packed4F" %}
6623 ins_encode %{
6624 __ divps($dst$$XMMRegister, $src$$XMMRegister);
6625 %}
6626 ins_pipe( pipe_slow );
6627 %}
6628
6629 instruct vdiv4F_reg(vecX dst, vecX src1, vecX src2) %{
6630 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
6631 match(Set dst (DivVF src1 src2));
6632 format %{ "vdivps $dst,$src1,$src2\t! div packed4F" %}
6633 ins_encode %{
6634 int vector_len = 0;
6635 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6636 %}
6637 ins_pipe( pipe_slow );
6638 %}
6639
6640 instruct vdiv4F_mem(vecX dst, vecX src, memory mem) %{
6641 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
6642 match(Set dst (DivVF src (LoadVector mem)));
6643 format %{ "vdivps $dst,$src,$mem\t! div packed4F" %}
6644 ins_encode %{
6645 int vector_len = 0;
6646 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6647 %}
6648 ins_pipe( pipe_slow );
6649 %}
6650
6651 instruct vdiv8F_reg(vecY dst, vecY src1, vecY src2) %{
6652 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
6653 match(Set dst (DivVF src1 src2));
6654 format %{ "vdivps $dst,$src1,$src2\t! div packed8F" %}
6655 ins_encode %{
6656 int vector_len = 1;
6657 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6658 %}
6659 ins_pipe( pipe_slow );
6660 %}
6661
6662 instruct vdiv8F_mem(vecY dst, vecY src, memory mem) %{
6663 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
6664 match(Set dst (DivVF src (LoadVector mem)));
6665 format %{ "vdivps $dst,$src,$mem\t! div packed8F" %}
6666 ins_encode %{
6667 int vector_len = 1;
6668 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6669 %}
6670 ins_pipe( pipe_slow );
6671 %}
6672
6673 instruct vdiv16F_reg(vecZ dst, vecZ src1, vecZ src2) %{
6674 predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
6675 match(Set dst (DivVF src1 src2));
6676 format %{ "vdivps $dst,$src1,$src2\t! div packed16F" %}
6677 ins_encode %{
6678 int vector_len = 2;
6679 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6680 %}
6681 ins_pipe( pipe_slow );
6682 %}
6683
6684 instruct vdiv16F_mem(vecZ dst, vecZ src, memory mem) %{
6685 predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
6686 match(Set dst (DivVF src (LoadVector mem)));
6687 format %{ "vdivps $dst,$src,$mem\t! div packed16F" %}
6688 ins_encode %{
6689 int vector_len = 2;
6690 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6691 %}
6692 ins_pipe( pipe_slow );
6693 %}
6694
6695 // Doubles vector div
6696 instruct vdiv2D(vecX dst, vecX src) %{
6697 predicate(n->as_Vector()->length() == 2);
6698 match(Set dst (DivVD dst src));
6699 format %{ "divpd $dst,$src\t! div packed2D" %}
6700 ins_encode %{
6701 __ divpd($dst$$XMMRegister, $src$$XMMRegister);
6702 %}
6703 ins_pipe( pipe_slow );
6704 %}
6705
6706 instruct vdiv2D_reg(vecX dst, vecX src1, vecX src2) %{
6707 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
6708 match(Set dst (DivVD src1 src2));
6709 format %{ "vdivpd $dst,$src1,$src2\t! div packed2D" %}
6710 ins_encode %{
6711 int vector_len = 0;
6712 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6713 %}
6714 ins_pipe( pipe_slow );
6715 %}
6716
6717 instruct vdiv2D_mem(vecX dst, vecX src, memory mem) %{
6718 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
6719 match(Set dst (DivVD src (LoadVector mem)));
6720 format %{ "vdivpd $dst,$src,$mem\t! div packed2D" %}
6721 ins_encode %{
6722 int vector_len = 0;
6723 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6724 %}
6725 ins_pipe( pipe_slow );
6726 %}
6727
6728 instruct vdiv4D_reg(vecY dst, vecY src1, vecY src2) %{
6729 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
6730 match(Set dst (DivVD src1 src2));
6731 format %{ "vdivpd $dst,$src1,$src2\t! div packed4D" %}
6732 ins_encode %{
6733 int vector_len = 1;
6734 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6735 %}
6736 ins_pipe( pipe_slow );
6737 %}
6738
6739 instruct vdiv4D_mem(vecY dst, vecY src, memory mem) %{
6740 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
6741 match(Set dst (DivVD src (LoadVector mem)));
6742 format %{ "vdivpd $dst,$src,$mem\t! div packed4D" %}
6743 ins_encode %{
6744 int vector_len = 1;
6745 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6746 %}
6747 ins_pipe( pipe_slow );
6748 %}
6749
6750 instruct vdiv8D_reg(vecZ dst, vecZ src1, vecZ src2) %{
6751 predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
6752 match(Set dst (DivVD src1 src2));
6753 format %{ "vdivpd $dst,$src1,$src2\t! div packed8D" %}
6754 ins_encode %{
6755 int vector_len = 2;
6756 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
6757 %}
6758 ins_pipe( pipe_slow );
6759 %}
6760
6761 instruct vdiv8D_mem(vecZ dst, vecZ src, memory mem) %{
6762 predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
6763 match(Set dst (DivVD src (LoadVector mem)));
6764 format %{ "vdivpd $dst,$src,$mem\t! div packed8D" %}
6765 ins_encode %{
6766 int vector_len = 2;
6767 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
6768 %}
6769 ins_pipe( pipe_slow );
6770 %}
6771
6772 // ------------------------------ Shift ---------------------------------------
6773
6774 // Left and right shift count vectors are the same on x86
6775 // (only lowest bits of xmm reg are used for count).
6776 instruct vshiftcnt(vecS dst, rRegI cnt) %{
6777 match(Set dst (LShiftCntV cnt));
6778 match(Set dst (RShiftCntV cnt));
6779 format %{ "movd $dst,$cnt\t! load shift count" %}
6780 ins_encode %{
6781 __ movdl($dst$$XMMRegister, $cnt$$Register);
6782 %}
6783 ins_pipe( pipe_slow );
6784 %}
6785
6786 // ------------------------------ LeftShift -----------------------------------
6787
6794 __ psllw($dst$$XMMRegister, $shift$$XMMRegister);
6795 %}
6796 ins_pipe( pipe_slow );
6797 %}
6798
6799 instruct vsll2S_imm(vecS dst, immI8 shift) %{
6800 predicate(n->as_Vector()->length() == 2);
6801 match(Set dst (LShiftVS dst shift));
6802 format %{ "psllw $dst,$shift\t! left shift packed2S" %}
6803 ins_encode %{
6804 __ psllw($dst$$XMMRegister, (int)$shift$$constant);
6805 %}
6806 ins_pipe( pipe_slow );
6807 %}
6808
6809 instruct vsll2S_reg(vecS dst, vecS src, vecS shift) %{
6810 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
6811 match(Set dst (LShiftVS src shift));
6812 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %}
6813 ins_encode %{
6814 int vector_len = 0;
6815 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
6816 %}
6817 ins_pipe( pipe_slow );
6818 %}
6819
6820 instruct vsll2S_reg_imm(vecS dst, vecS src, immI8 shift) %{
6821 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
6822 match(Set dst (LShiftVS src shift));
6823 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %}
6824 ins_encode %{
6825 int vector_len = 0;
6826 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
6827 %}
6828 ins_pipe( pipe_slow );
6829 %}
6830
6831 instruct vsll4S(vecD dst, vecS shift) %{
6832 predicate(n->as_Vector()->length() == 4);
6833 match(Set dst (LShiftVS dst shift));
6834 format %{ "psllw $dst,$shift\t! left shift packed4S" %}
6835 ins_encode %{
6836 __ psllw($dst$$XMMRegister, $shift$$XMMRegister);
6837 %}
6838 ins_pipe( pipe_slow );
6839 %}
6840
6841 instruct vsll4S_imm(vecD dst, immI8 shift) %{
6842 predicate(n->as_Vector()->length() == 4);
6843 match(Set dst (LShiftVS dst shift));
6844 format %{ "psllw $dst,$shift\t! left shift packed4S" %}
6845 ins_encode %{
6846 __ psllw($dst$$XMMRegister, (int)$shift$$constant);
6847 %}
6848 ins_pipe( pipe_slow );
6849 %}
6850
6851 instruct vsll4S_reg(vecD dst, vecD src, vecS shift) %{
6852 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
6853 match(Set dst (LShiftVS src shift));
6854 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %}
6855 ins_encode %{
6856 int vector_len = 0;
6857 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
6858 %}
6859 ins_pipe( pipe_slow );
6860 %}
6861
6862 instruct vsll4S_reg_imm(vecD dst, vecD src, immI8 shift) %{
6863 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
6864 match(Set dst (LShiftVS src shift));
6865 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %}
6866 ins_encode %{
6867 int vector_len = 0;
6868 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
6869 %}
6870 ins_pipe( pipe_slow );
6871 %}
6872
6873 instruct vsll8S(vecX dst, vecS shift) %{
6874 predicate(n->as_Vector()->length() == 8);
6875 match(Set dst (LShiftVS dst shift));
6876 format %{ "psllw $dst,$shift\t! left shift packed8S" %}
6877 ins_encode %{
6878 __ psllw($dst$$XMMRegister, $shift$$XMMRegister);
6879 %}
6880 ins_pipe( pipe_slow );
6881 %}
6882
6883 instruct vsll8S_imm(vecX dst, immI8 shift) %{
6884 predicate(n->as_Vector()->length() == 8);
6885 match(Set dst (LShiftVS dst shift));
6886 format %{ "psllw $dst,$shift\t! left shift packed8S" %}
6887 ins_encode %{
6888 __ psllw($dst$$XMMRegister, (int)$shift$$constant);
6889 %}
6890 ins_pipe( pipe_slow );
6891 %}
6892
6893 instruct vsll8S_reg(vecX dst, vecX src, vecS shift) %{
6894 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
6895 match(Set dst (LShiftVS src shift));
6896 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %}
6897 ins_encode %{
6898 int vector_len = 0;
6899 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
6900 %}
6901 ins_pipe( pipe_slow );
6902 %}
6903
6904 instruct vsll8S_reg_imm(vecX dst, vecX src, immI8 shift) %{
6905 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
6906 match(Set dst (LShiftVS src shift));
6907 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %}
6908 ins_encode %{
6909 int vector_len = 0;
6910 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
6911 %}
6912 ins_pipe( pipe_slow );
6913 %}
6914
6915 instruct vsll16S_reg(vecY dst, vecY src, vecS shift) %{
6916 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
6917 match(Set dst (LShiftVS src shift));
6918 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %}
6919 ins_encode %{
6920 int vector_len = 1;
6921 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
6922 %}
6923 ins_pipe( pipe_slow );
6924 %}
6925
6926 instruct vsll16S_reg_imm(vecY dst, vecY src, immI8 shift) %{
6927 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
6928 match(Set dst (LShiftVS src shift));
6929 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %}
6930 ins_encode %{
6931 int vector_len = 1;
6932 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
6933 %}
6934 ins_pipe( pipe_slow );
6935 %}
6936
6937 instruct vsll32S_reg(vecZ dst, vecZ src, vecS shift) %{
6938 predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
6939 match(Set dst (LShiftVS src shift));
6940 format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %}
6941 ins_encode %{
6942 int vector_len = 2;
6943 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
6944 %}
6945 ins_pipe( pipe_slow );
6946 %}
6947
6948 instruct vsll32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{
6949 predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
6950 match(Set dst (LShiftVS src shift));
6951 format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %}
6952 ins_encode %{
6953 int vector_len = 2;
6954 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
6955 %}
6956 ins_pipe( pipe_slow );
6957 %}
6958
6959 // Integers vector left shift
6960 instruct vsll2I(vecD dst, vecS shift) %{
6961 predicate(n->as_Vector()->length() == 2);
6962 match(Set dst (LShiftVI dst shift));
6963 format %{ "pslld $dst,$shift\t! left shift packed2I" %}
6964 ins_encode %{
6965 __ pslld($dst$$XMMRegister, $shift$$XMMRegister);
6966 %}
6967 ins_pipe( pipe_slow );
6968 %}
6969
6970 instruct vsll2I_imm(vecD dst, immI8 shift) %{
6971 predicate(n->as_Vector()->length() == 2);
6972 match(Set dst (LShiftVI dst shift));
6973 format %{ "pslld $dst,$shift\t! left shift packed2I" %}
6974 ins_encode %{
6975 __ pslld($dst$$XMMRegister, (int)$shift$$constant);
6976 %}
6977 ins_pipe( pipe_slow );
6978 %}
6979
6980 instruct vsll2I_reg(vecD dst, vecD src, vecS shift) %{
6981 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
6982 match(Set dst (LShiftVI src shift));
6983 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %}
6984 ins_encode %{
6985 int vector_len = 0;
6986 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
6987 %}
6988 ins_pipe( pipe_slow );
6989 %}
6990
6991 instruct vsll2I_reg_imm(vecD dst, vecD src, immI8 shift) %{
6992 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
6993 match(Set dst (LShiftVI src shift));
6994 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %}
6995 ins_encode %{
6996 int vector_len = 0;
6997 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
6998 %}
6999 ins_pipe( pipe_slow );
7000 %}
7001
7002 instruct vsll4I(vecX dst, vecS shift) %{
7003 predicate(n->as_Vector()->length() == 4);
7004 match(Set dst (LShiftVI dst shift));
7005 format %{ "pslld $dst,$shift\t! left shift packed4I" %}
7006 ins_encode %{
7007 __ pslld($dst$$XMMRegister, $shift$$XMMRegister);
7008 %}
7009 ins_pipe( pipe_slow );
7010 %}
7011
7012 instruct vsll4I_imm(vecX dst, immI8 shift) %{
7013 predicate(n->as_Vector()->length() == 4);
7014 match(Set dst (LShiftVI dst shift));
7015 format %{ "pslld $dst,$shift\t! left shift packed4I" %}
7016 ins_encode %{
7017 __ pslld($dst$$XMMRegister, (int)$shift$$constant);
7018 %}
7019 ins_pipe( pipe_slow );
7020 %}
7021
7022 instruct vsll4I_reg(vecX dst, vecX src, vecS shift) %{
7023 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
7024 match(Set dst (LShiftVI src shift));
7025 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %}
7026 ins_encode %{
7027 int vector_len = 0;
7028 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7029 %}
7030 ins_pipe( pipe_slow );
7031 %}
7032
7033 instruct vsll4I_reg_imm(vecX dst, vecX src, immI8 shift) %{
7034 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
7035 match(Set dst (LShiftVI src shift));
7036 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %}
7037 ins_encode %{
7038 int vector_len = 0;
7039 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7040 %}
7041 ins_pipe( pipe_slow );
7042 %}
7043
7044 instruct vsll8I_reg(vecY dst, vecY src, vecS shift) %{
7045 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
7046 match(Set dst (LShiftVI src shift));
7047 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %}
7048 ins_encode %{
7049 int vector_len = 1;
7050 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7051 %}
7052 ins_pipe( pipe_slow );
7053 %}
7054
7055 instruct vsll8I_reg_imm(vecY dst, vecY src, immI8 shift) %{
7056 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
7057 match(Set dst (LShiftVI src shift));
7058 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %}
7059 ins_encode %{
7060 int vector_len = 1;
7061 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7062 %}
7063 ins_pipe( pipe_slow );
7064 %}
7065
7066 instruct vsll16I_reg(vecZ dst, vecZ src, vecS shift) %{
7067 predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
7068 match(Set dst (LShiftVI src shift));
7069 format %{ "vpslld $dst,$src,$shift\t! left shift packed16I" %}
7070 ins_encode %{
7071 int vector_len = 2;
7072 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7073 %}
7074 ins_pipe( pipe_slow );
7075 %}
7076
7077 instruct vsll16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{
7078 predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
7079 match(Set dst (LShiftVI src shift));
7080 format %{ "vpslld $dst,$src,$shift\t! left shift packed16I" %}
7081 ins_encode %{
7082 int vector_len = 2;
7083 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7084 %}
7085 ins_pipe( pipe_slow );
7086 %}
7087
7088 // Longs vector left shift
7089 instruct vsll2L(vecX dst, vecS shift) %{
7090 predicate(n->as_Vector()->length() == 2);
7091 match(Set dst (LShiftVL dst shift));
7092 format %{ "psllq $dst,$shift\t! left shift packed2L" %}
7093 ins_encode %{
7094 __ psllq($dst$$XMMRegister, $shift$$XMMRegister);
7095 %}
7096 ins_pipe( pipe_slow );
7097 %}
7098
7099 instruct vsll2L_imm(vecX dst, immI8 shift) %{
7100 predicate(n->as_Vector()->length() == 2);
7101 match(Set dst (LShiftVL dst shift));
7102 format %{ "psllq $dst,$shift\t! left shift packed2L" %}
7103 ins_encode %{
7104 __ psllq($dst$$XMMRegister, (int)$shift$$constant);
7105 %}
7106 ins_pipe( pipe_slow );
7107 %}
7108
7109 instruct vsll2L_reg(vecX dst, vecX src, vecS shift) %{
7110 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
7111 match(Set dst (LShiftVL src shift));
7112 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %}
7113 ins_encode %{
7114 int vector_len = 0;
7115 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7116 %}
7117 ins_pipe( pipe_slow );
7118 %}
7119
7120 instruct vsll2L_reg_imm(vecX dst, vecX src, immI8 shift) %{
7121 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
7122 match(Set dst (LShiftVL src shift));
7123 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %}
7124 ins_encode %{
7125 int vector_len = 0;
7126 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7127 %}
7128 ins_pipe( pipe_slow );
7129 %}
7130
7131 instruct vsll4L_reg(vecY dst, vecY src, vecS shift) %{
7132 predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
7133 match(Set dst (LShiftVL src shift));
7134 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %}
7135 ins_encode %{
7136 int vector_len = 1;
7137 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7138 %}
7139 ins_pipe( pipe_slow );
7140 %}
7141
7142 instruct vsll4L_reg_imm(vecY dst, vecY src, immI8 shift) %{
7143 predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
7144 match(Set dst (LShiftVL src shift));
7145 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %}
7146 ins_encode %{
7147 int vector_len = 1;
7148 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7149 %}
7150 ins_pipe( pipe_slow );
7151 %}
7152
7153 instruct vsll8L_reg(vecZ dst, vecZ src, vecS shift) %{
7154 predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
7155 match(Set dst (LShiftVL src shift));
7156 format %{ "vpsllq $dst,$src,$shift\t! left shift packed8L" %}
7157 ins_encode %{
7158 int vector_len = 2;
7159 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7160 %}
7161 ins_pipe( pipe_slow );
7162 %}
7163
7164 instruct vsll8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{
7165 predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
7166 match(Set dst (LShiftVL src shift));
7167 format %{ "vpsllq $dst,$src,$shift\t! left shift packed8L" %}
7168 ins_encode %{
7169 int vector_len = 2;
7170 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7171 %}
7172 ins_pipe( pipe_slow );
7173 %}
7174
7175 // ----------------------- LogicalRightShift -----------------------------------
7176
7177 // Shorts vector logical right shift produces incorrect Java result
7178 // for negative data because java code convert short value into int with
7179 // sign extension before a shift. But char vectors are fine since chars are
7180 // unsigned values.
7181
7182 instruct vsrl2S(vecS dst, vecS shift) %{
7183 predicate(n->as_Vector()->length() == 2);
7184 match(Set dst (URShiftVS dst shift));
7185 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %}
7186 ins_encode %{
7187 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister);
7188 %}
7189 ins_pipe( pipe_slow );
7190 %}
7191
7192 instruct vsrl2S_imm(vecS dst, immI8 shift) %{
7193 predicate(n->as_Vector()->length() == 2);
7194 match(Set dst (URShiftVS dst shift));
7195 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %}
7196 ins_encode %{
7197 __ psrlw($dst$$XMMRegister, (int)$shift$$constant);
7198 %}
7199 ins_pipe( pipe_slow );
7200 %}
7201
7202 instruct vsrl2S_reg(vecS dst, vecS src, vecS shift) %{
7203 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
7204 match(Set dst (URShiftVS src shift));
7205 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %}
7206 ins_encode %{
7207 int vector_len = 0;
7208 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7209 %}
7210 ins_pipe( pipe_slow );
7211 %}
7212
7213 instruct vsrl2S_reg_imm(vecS dst, vecS src, immI8 shift) %{
7214 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
7215 match(Set dst (URShiftVS src shift));
7216 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %}
7217 ins_encode %{
7218 int vector_len = 0;
7219 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7220 %}
7221 ins_pipe( pipe_slow );
7222 %}
7223
7224 instruct vsrl4S(vecD dst, vecS shift) %{
7225 predicate(n->as_Vector()->length() == 4);
7226 match(Set dst (URShiftVS dst shift));
7227 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %}
7228 ins_encode %{
7229 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister);
7230 %}
7231 ins_pipe( pipe_slow );
7232 %}
7233
7234 instruct vsrl4S_imm(vecD dst, immI8 shift) %{
7235 predicate(n->as_Vector()->length() == 4);
7236 match(Set dst (URShiftVS dst shift));
7237 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %}
7238 ins_encode %{
7239 __ psrlw($dst$$XMMRegister, (int)$shift$$constant);
7240 %}
7241 ins_pipe( pipe_slow );
7242 %}
7243
7244 instruct vsrl4S_reg(vecD dst, vecD src, vecS shift) %{
7245 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
7246 match(Set dst (URShiftVS src shift));
7247 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %}
7248 ins_encode %{
7249 int vector_len = 0;
7250 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7251 %}
7252 ins_pipe( pipe_slow );
7253 %}
7254
7255 instruct vsrl4S_reg_imm(vecD dst, vecD src, immI8 shift) %{
7256 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
7257 match(Set dst (URShiftVS src shift));
7258 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %}
7259 ins_encode %{
7260 int vector_len = 0;
7261 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7262 %}
7263 ins_pipe( pipe_slow );
7264 %}
7265
7266 instruct vsrl8S(vecX dst, vecS shift) %{
7267 predicate(n->as_Vector()->length() == 8);
7268 match(Set dst (URShiftVS dst shift));
7269 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %}
7270 ins_encode %{
7271 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister);
7272 %}
7273 ins_pipe( pipe_slow );
7274 %}
7275
7276 instruct vsrl8S_imm(vecX dst, immI8 shift) %{
7277 predicate(n->as_Vector()->length() == 8);
7278 match(Set dst (URShiftVS dst shift));
7279 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %}
7280 ins_encode %{
7281 __ psrlw($dst$$XMMRegister, (int)$shift$$constant);
7282 %}
7283 ins_pipe( pipe_slow );
7284 %}
7285
7286 instruct vsrl8S_reg(vecX dst, vecX src, vecS shift) %{
7287 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
7288 match(Set dst (URShiftVS src shift));
7289 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %}
7290 ins_encode %{
7291 int vector_len = 0;
7292 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7293 %}
7294 ins_pipe( pipe_slow );
7295 %}
7296
7297 instruct vsrl8S_reg_imm(vecX dst, vecX src, immI8 shift) %{
7298 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
7299 match(Set dst (URShiftVS src shift));
7300 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %}
7301 ins_encode %{
7302 int vector_len = 0;
7303 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7304 %}
7305 ins_pipe( pipe_slow );
7306 %}
7307
7308 instruct vsrl16S_reg(vecY dst, vecY src, vecS shift) %{
7309 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
7310 match(Set dst (URShiftVS src shift));
7311 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %}
7312 ins_encode %{
7313 int vector_len = 1;
7314 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7315 %}
7316 ins_pipe( pipe_slow );
7317 %}
7318
7319 instruct vsrl16S_reg_imm(vecY dst, vecY src, immI8 shift) %{
7320 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
7321 match(Set dst (URShiftVS src shift));
7322 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %}
7323 ins_encode %{
7324 int vector_len = 1;
7325 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7326 %}
7327 ins_pipe( pipe_slow );
7328 %}
7329
7330 instruct vsrl32S_reg(vecZ dst, vecZ src, vecS shift) %{
7331 predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
7332 match(Set dst (URShiftVS src shift));
7333 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %}
7334 ins_encode %{
7335 int vector_len = 2;
7336 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7337 %}
7338 ins_pipe( pipe_slow );
7339 %}
7340
7341 instruct vsrl32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{
7342 predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
7343 match(Set dst (URShiftVS src shift));
7344 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %}
7345 ins_encode %{
7346 int vector_len = 2;
7347 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7348 %}
7349 ins_pipe( pipe_slow );
7350 %}
7351
7352 // Integers vector logical right shift
7353 instruct vsrl2I(vecD dst, vecS shift) %{
7354 predicate(n->as_Vector()->length() == 2);
7355 match(Set dst (URShiftVI dst shift));
7356 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %}
7357 ins_encode %{
7358 __ psrld($dst$$XMMRegister, $shift$$XMMRegister);
7359 %}
7360 ins_pipe( pipe_slow );
7361 %}
7362
7363 instruct vsrl2I_imm(vecD dst, immI8 shift) %{
7364 predicate(n->as_Vector()->length() == 2);
7365 match(Set dst (URShiftVI dst shift));
7366 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %}
7367 ins_encode %{
7368 __ psrld($dst$$XMMRegister, (int)$shift$$constant);
7369 %}
7370 ins_pipe( pipe_slow );
7371 %}
7372
7373 instruct vsrl2I_reg(vecD dst, vecD src, vecS shift) %{
7374 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
7375 match(Set dst (URShiftVI src shift));
7376 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %}
7377 ins_encode %{
7378 int vector_len = 0;
7379 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7380 %}
7381 ins_pipe( pipe_slow );
7382 %}
7383
7384 instruct vsrl2I_reg_imm(vecD dst, vecD src, immI8 shift) %{
7385 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
7386 match(Set dst (URShiftVI src shift));
7387 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %}
7388 ins_encode %{
7389 int vector_len = 0;
7390 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7391 %}
7392 ins_pipe( pipe_slow );
7393 %}
7394
7395 instruct vsrl4I(vecX dst, vecS shift) %{
7396 predicate(n->as_Vector()->length() == 4);
7397 match(Set dst (URShiftVI dst shift));
7398 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %}
7399 ins_encode %{
7400 __ psrld($dst$$XMMRegister, $shift$$XMMRegister);
7401 %}
7402 ins_pipe( pipe_slow );
7403 %}
7404
7405 instruct vsrl4I_imm(vecX dst, immI8 shift) %{
7406 predicate(n->as_Vector()->length() == 4);
7407 match(Set dst (URShiftVI dst shift));
7408 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %}
7409 ins_encode %{
7410 __ psrld($dst$$XMMRegister, (int)$shift$$constant);
7411 %}
7412 ins_pipe( pipe_slow );
7413 %}
7414
7415 instruct vsrl4I_reg(vecX dst, vecX src, vecS shift) %{
7416 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
7417 match(Set dst (URShiftVI src shift));
7418 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %}
7419 ins_encode %{
7420 int vector_len = 0;
7421 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7422 %}
7423 ins_pipe( pipe_slow );
7424 %}
7425
7426 instruct vsrl4I_reg_imm(vecX dst, vecX src, immI8 shift) %{
7427 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
7428 match(Set dst (URShiftVI src shift));
7429 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %}
7430 ins_encode %{
7431 int vector_len = 0;
7432 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7433 %}
7434 ins_pipe( pipe_slow );
7435 %}
7436
7437 instruct vsrl8I_reg(vecY dst, vecY src, vecS shift) %{
7438 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
7439 match(Set dst (URShiftVI src shift));
7440 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %}
7441 ins_encode %{
7442 int vector_len = 1;
7443 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7444 %}
7445 ins_pipe( pipe_slow );
7446 %}
7447
7448 instruct vsrl8I_reg_imm(vecY dst, vecY src, immI8 shift) %{
7449 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
7450 match(Set dst (URShiftVI src shift));
7451 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %}
7452 ins_encode %{
7453 int vector_len = 1;
7454 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7455 %}
7456 ins_pipe( pipe_slow );
7457 %}
7458
7459 instruct vsrl16I_reg(vecZ dst, vecZ src, vecS shift) %{
7460 predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
7461 match(Set dst (URShiftVI src shift));
7462 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed16I" %}
7463 ins_encode %{
7464 int vector_len = 2;
7465 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7466 %}
7467 ins_pipe( pipe_slow );
7468 %}
7469
7470 instruct vsrl16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{
7471 predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
7472 match(Set dst (URShiftVI src shift));
7473 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed16I" %}
7474 ins_encode %{
7475 int vector_len = 2;
7476 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7477 %}
7478 ins_pipe( pipe_slow );
7479 %}
7480
7481 // Longs vector logical right shift
7482 instruct vsrl2L(vecX dst, vecS shift) %{
7483 predicate(n->as_Vector()->length() == 2);
7484 match(Set dst (URShiftVL dst shift));
7485 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %}
7486 ins_encode %{
7487 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister);
7488 %}
7489 ins_pipe( pipe_slow );
7490 %}
7491
7492 instruct vsrl2L_imm(vecX dst, immI8 shift) %{
7493 predicate(n->as_Vector()->length() == 2);
7494 match(Set dst (URShiftVL dst shift));
7495 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %}
7496 ins_encode %{
7497 __ psrlq($dst$$XMMRegister, (int)$shift$$constant);
7498 %}
7499 ins_pipe( pipe_slow );
7500 %}
7501
7502 instruct vsrl2L_reg(vecX dst, vecX src, vecS shift) %{
7503 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
7504 match(Set dst (URShiftVL src shift));
7505 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %}
7506 ins_encode %{
7507 int vector_len = 0;
7508 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7509 %}
7510 ins_pipe( pipe_slow );
7511 %}
7512
7513 instruct vsrl2L_reg_imm(vecX dst, vecX src, immI8 shift) %{
7514 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
7515 match(Set dst (URShiftVL src shift));
7516 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %}
7517 ins_encode %{
7518 int vector_len = 0;
7519 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7520 %}
7521 ins_pipe( pipe_slow );
7522 %}
7523
7524 instruct vsrl4L_reg(vecY dst, vecY src, vecS shift) %{
7525 predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
7526 match(Set dst (URShiftVL src shift));
7527 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %}
7528 ins_encode %{
7529 int vector_len = 1;
7530 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7531 %}
7532 ins_pipe( pipe_slow );
7533 %}
7534
7535 instruct vsrl4L_reg_imm(vecY dst, vecY src, immI8 shift) %{
7536 predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
7537 match(Set dst (URShiftVL src shift));
7538 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %}
7539 ins_encode %{
7540 int vector_len = 1;
7541 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7542 %}
7543 ins_pipe( pipe_slow );
7544 %}
7545
7546 instruct vsrl8L_reg(vecZ dst, vecZ src, vecS shift) %{
7547 predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
7548 match(Set dst (URShiftVL src shift));
7549 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed8L" %}
7550 ins_encode %{
7551 int vector_len = 2;
7552 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7553 %}
7554 ins_pipe( pipe_slow );
7555 %}
7556
7557 instruct vsrl8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{
7558 predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
7559 match(Set dst (URShiftVL src shift));
7560 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed8L" %}
7561 ins_encode %{
7562 int vector_len = 2;
7563 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7564 %}
7565 ins_pipe( pipe_slow );
7566 %}
7567
7568 // ------------------- ArithmeticRightShift -----------------------------------
7569
7570 // Shorts/Chars vector arithmetic right shift
7571 instruct vsra2S(vecS dst, vecS shift) %{
7572 predicate(n->as_Vector()->length() == 2);
7573 match(Set dst (RShiftVS dst shift));
7574 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %}
7575 ins_encode %{
7576 __ psraw($dst$$XMMRegister, $shift$$XMMRegister);
7577 %}
7578 ins_pipe( pipe_slow );
7579 %}
7580
7581 instruct vsra2S_imm(vecS dst, immI8 shift) %{
7582 predicate(n->as_Vector()->length() == 2);
7583 match(Set dst (RShiftVS dst shift));
7584 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %}
7585 ins_encode %{
7586 __ psraw($dst$$XMMRegister, (int)$shift$$constant);
7587 %}
7588 ins_pipe( pipe_slow );
7589 %}
7590
7591 instruct vsra2S_reg(vecS dst, vecS src, vecS shift) %{
7592 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
7593 match(Set dst (RShiftVS src shift));
7594 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %}
7595 ins_encode %{
7596 int vector_len = 0;
7597 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7598 %}
7599 ins_pipe( pipe_slow );
7600 %}
7601
7602 instruct vsra2S_reg_imm(vecS dst, vecS src, immI8 shift) %{
7603 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
7604 match(Set dst (RShiftVS src shift));
7605 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %}
7606 ins_encode %{
7607 int vector_len = 0;
7608 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7609 %}
7610 ins_pipe( pipe_slow );
7611 %}
7612
7613 instruct vsra4S(vecD dst, vecS shift) %{
7614 predicate(n->as_Vector()->length() == 4);
7615 match(Set dst (RShiftVS dst shift));
7616 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %}
7617 ins_encode %{
7618 __ psraw($dst$$XMMRegister, $shift$$XMMRegister);
7619 %}
7620 ins_pipe( pipe_slow );
7621 %}
7622
7623 instruct vsra4S_imm(vecD dst, immI8 shift) %{
7624 predicate(n->as_Vector()->length() == 4);
7625 match(Set dst (RShiftVS dst shift));
7626 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %}
7627 ins_encode %{
7628 __ psraw($dst$$XMMRegister, (int)$shift$$constant);
7629 %}
7630 ins_pipe( pipe_slow );
7631 %}
7632
7633 instruct vsra4S_reg(vecD dst, vecD src, vecS shift) %{
7634 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
7635 match(Set dst (RShiftVS src shift));
7636 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %}
7637 ins_encode %{
7638 int vector_len = 0;
7639 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7640 %}
7641 ins_pipe( pipe_slow );
7642 %}
7643
7644 instruct vsra4S_reg_imm(vecD dst, vecD src, immI8 shift) %{
7645 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
7646 match(Set dst (RShiftVS src shift));
7647 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %}
7648 ins_encode %{
7649 int vector_len = 0;
7650 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7651 %}
7652 ins_pipe( pipe_slow );
7653 %}
7654
7655 instruct vsra8S(vecX dst, vecS shift) %{
7656 predicate(n->as_Vector()->length() == 8);
7657 match(Set dst (RShiftVS dst shift));
7658 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %}
7659 ins_encode %{
7660 __ psraw($dst$$XMMRegister, $shift$$XMMRegister);
7661 %}
7662 ins_pipe( pipe_slow );
7663 %}
7664
7665 instruct vsra8S_imm(vecX dst, immI8 shift) %{
7666 predicate(n->as_Vector()->length() == 8);
7667 match(Set dst (RShiftVS dst shift));
7668 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %}
7669 ins_encode %{
7670 __ psraw($dst$$XMMRegister, (int)$shift$$constant);
7671 %}
7672 ins_pipe( pipe_slow );
7673 %}
7674
7675 instruct vsra8S_reg(vecX dst, vecX src, vecS shift) %{
7676 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
7677 match(Set dst (RShiftVS src shift));
7678 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %}
7679 ins_encode %{
7680 int vector_len = 0;
7681 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7682 %}
7683 ins_pipe( pipe_slow );
7684 %}
7685
7686 instruct vsra8S_reg_imm(vecX dst, vecX src, immI8 shift) %{
7687 predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
7688 match(Set dst (RShiftVS src shift));
7689 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %}
7690 ins_encode %{
7691 int vector_len = 0;
7692 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7693 %}
7694 ins_pipe( pipe_slow );
7695 %}
7696
7697 instruct vsra16S_reg(vecY dst, vecY src, vecS shift) %{
7698 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
7699 match(Set dst (RShiftVS src shift));
7700 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %}
7701 ins_encode %{
7702 int vector_len = 1;
7703 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7704 %}
7705 ins_pipe( pipe_slow );
7706 %}
7707
7708 instruct vsra16S_reg_imm(vecY dst, vecY src, immI8 shift) %{
7709 predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
7710 match(Set dst (RShiftVS src shift));
7711 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %}
7712 ins_encode %{
7713 int vector_len = 1;
7714 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7715 %}
7716 ins_pipe( pipe_slow );
7717 %}
7718
7719 instruct vsra32S_reg(vecZ dst, vecZ src, vecS shift) %{
7720 predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
7721 match(Set dst (RShiftVS src shift));
7722 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %}
7723 ins_encode %{
7724 int vector_len = 2;
7725 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7726 %}
7727 ins_pipe( pipe_slow );
7728 %}
7729
7730 instruct vsra32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{
7731 predicate(UseAVX > 2 && n->as_Vector()->length() == 32);
7732 match(Set dst (RShiftVS src shift));
7733 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %}
7734 ins_encode %{
7735 int vector_len = 2;
7736 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7737 %}
7738 ins_pipe( pipe_slow );
7739 %}
7740
7741 // Integers vector arithmetic right shift
7742 instruct vsra2I(vecD dst, vecS shift) %{
7743 predicate(n->as_Vector()->length() == 2);
7744 match(Set dst (RShiftVI dst shift));
7745 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %}
7746 ins_encode %{
7747 __ psrad($dst$$XMMRegister, $shift$$XMMRegister);
7748 %}
7749 ins_pipe( pipe_slow );
7750 %}
7751
7752 instruct vsra2I_imm(vecD dst, immI8 shift) %{
7753 predicate(n->as_Vector()->length() == 2);
7754 match(Set dst (RShiftVI dst shift));
7755 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %}
7756 ins_encode %{
7757 __ psrad($dst$$XMMRegister, (int)$shift$$constant);
7758 %}
7759 ins_pipe( pipe_slow );
7760 %}
7761
7762 instruct vsra2I_reg(vecD dst, vecD src, vecS shift) %{
7763 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
7764 match(Set dst (RShiftVI src shift));
7765 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %}
7766 ins_encode %{
7767 int vector_len = 0;
7768 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7769 %}
7770 ins_pipe( pipe_slow );
7771 %}
7772
7773 instruct vsra2I_reg_imm(vecD dst, vecD src, immI8 shift) %{
7774 predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
7775 match(Set dst (RShiftVI src shift));
7776 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %}
7777 ins_encode %{
7778 int vector_len = 0;
7779 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7780 %}
7781 ins_pipe( pipe_slow );
7782 %}
7783
7784 instruct vsra4I(vecX dst, vecS shift) %{
7785 predicate(n->as_Vector()->length() == 4);
7786 match(Set dst (RShiftVI dst shift));
7787 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %}
7788 ins_encode %{
7789 __ psrad($dst$$XMMRegister, $shift$$XMMRegister);
7790 %}
7791 ins_pipe( pipe_slow );
7792 %}
7793
7794 instruct vsra4I_imm(vecX dst, immI8 shift) %{
7795 predicate(n->as_Vector()->length() == 4);
7796 match(Set dst (RShiftVI dst shift));
7797 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %}
7798 ins_encode %{
7799 __ psrad($dst$$XMMRegister, (int)$shift$$constant);
7800 %}
7801 ins_pipe( pipe_slow );
7802 %}
7803
7804 instruct vsra4I_reg(vecX dst, vecX src, vecS shift) %{
7805 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
7806 match(Set dst (RShiftVI src shift));
7807 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %}
7808 ins_encode %{
7809 int vector_len = 0;
7810 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7811 %}
7812 ins_pipe( pipe_slow );
7813 %}
7814
7815 instruct vsra4I_reg_imm(vecX dst, vecX src, immI8 shift) %{
7816 predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
7817 match(Set dst (RShiftVI src shift));
7818 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %}
7819 ins_encode %{
7820 int vector_len = 0;
7821 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7822 %}
7823 ins_pipe( pipe_slow );
7824 %}
7825
7826 instruct vsra8I_reg(vecY dst, vecY src, vecS shift) %{
7827 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
7828 match(Set dst (RShiftVI src shift));
7829 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %}
7830 ins_encode %{
7831 int vector_len = 1;
7832 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7833 %}
7834 ins_pipe( pipe_slow );
7835 %}
7836
7837 instruct vsra8I_reg_imm(vecY dst, vecY src, immI8 shift) %{
7838 predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
7839 match(Set dst (RShiftVI src shift));
7840 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %}
7841 ins_encode %{
7842 int vector_len = 1;
7843 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7844 %}
7845 ins_pipe( pipe_slow );
7846 %}
7847
7848 instruct vsra16I_reg(vecZ dst, vecZ src, vecS shift) %{
7849 predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
7850 match(Set dst (RShiftVI src shift));
7851 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed16I" %}
7852 ins_encode %{
7853 int vector_len = 2;
7854 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
7855 %}
7856 ins_pipe( pipe_slow );
7857 %}
7858
7859 instruct vsra16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{
7860 predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
7861 match(Set dst (RShiftVI src shift));
7862 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed16I" %}
7863 ins_encode %{
7864 int vector_len = 2;
7865 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
7866 %}
7867 ins_pipe( pipe_slow );
7868 %}
7869
7870 // There are no longs vector arithmetic right shift instructions.
7871
7872
7873 // --------------------------------- AND --------------------------------------
7874
7875 instruct vand4B(vecS dst, vecS src) %{
7876 predicate(n->as_Vector()->length_in_bytes() == 4);
7877 match(Set dst (AndV dst src));
7878 format %{ "pand $dst,$src\t! and vectors (4 bytes)" %}
7879 ins_encode %{
7880 __ pand($dst$$XMMRegister, $src$$XMMRegister);
7881 %}
7882 ins_pipe( pipe_slow );
7883 %}
7884
7885 instruct vand4B_reg(vecS dst, vecS src1, vecS src2) %{
7886 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4);
7887 match(Set dst (AndV src1 src2));
7888 format %{ "vpand $dst,$src1,$src2\t! and vectors (4 bytes)" %}
7889 ins_encode %{
7890 int vector_len = 0;
7891 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
7892 %}
7893 ins_pipe( pipe_slow );
7894 %}
7895
7896 instruct vand8B(vecD dst, vecD src) %{
7897 predicate(n->as_Vector()->length_in_bytes() == 8);
7898 match(Set dst (AndV dst src));
7899 format %{ "pand $dst,$src\t! and vectors (8 bytes)" %}
7900 ins_encode %{
7901 __ pand($dst$$XMMRegister, $src$$XMMRegister);
7902 %}
7903 ins_pipe( pipe_slow );
7904 %}
7905
7906 instruct vand8B_reg(vecD dst, vecD src1, vecD src2) %{
7907 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8);
7908 match(Set dst (AndV src1 src2));
7909 format %{ "vpand $dst,$src1,$src2\t! and vectors (8 bytes)" %}
7910 ins_encode %{
7911 int vector_len = 0;
7912 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
7913 %}
7914 ins_pipe( pipe_slow );
7915 %}
7916
7917 instruct vand16B(vecX dst, vecX src) %{
7918 predicate(n->as_Vector()->length_in_bytes() == 16);
7919 match(Set dst (AndV dst src));
7920 format %{ "pand $dst,$src\t! and vectors (16 bytes)" %}
7921 ins_encode %{
7922 __ pand($dst$$XMMRegister, $src$$XMMRegister);
7923 %}
7924 ins_pipe( pipe_slow );
7925 %}
7926
7927 instruct vand16B_reg(vecX dst, vecX src1, vecX src2) %{
7928 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
7929 match(Set dst (AndV src1 src2));
7930 format %{ "vpand $dst,$src1,$src2\t! and vectors (16 bytes)" %}
7931 ins_encode %{
7932 int vector_len = 0;
7933 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
7934 %}
7935 ins_pipe( pipe_slow );
7936 %}
7937
7938 instruct vand16B_mem(vecX dst, vecX src, memory mem) %{
7939 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
7940 match(Set dst (AndV src (LoadVector mem)));
7941 format %{ "vpand $dst,$src,$mem\t! and vectors (16 bytes)" %}
7942 ins_encode %{
7943 int vector_len = 0;
7944 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
7945 %}
7946 ins_pipe( pipe_slow );
7947 %}
7948
7949 instruct vand32B_reg(vecY dst, vecY src1, vecY src2) %{
7950 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
7951 match(Set dst (AndV src1 src2));
7952 format %{ "vpand $dst,$src1,$src2\t! and vectors (32 bytes)" %}
7953 ins_encode %{
7954 int vector_len = 1;
7955 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
7956 %}
7957 ins_pipe( pipe_slow );
7958 %}
7959
7960 instruct vand32B_mem(vecY dst, vecY src, memory mem) %{
7961 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
7962 match(Set dst (AndV src (LoadVector mem)));
7963 format %{ "vpand $dst,$src,$mem\t! and vectors (32 bytes)" %}
7964 ins_encode %{
7965 int vector_len = 1;
7966 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
7967 %}
7968 ins_pipe( pipe_slow );
7969 %}
7970
7971 instruct vand64B_reg(vecZ dst, vecZ src1, vecZ src2) %{
7972 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64);
7973 match(Set dst (AndV src1 src2));
7974 format %{ "vpand $dst,$src1,$src2\t! and vectors (64 bytes)" %}
7975 ins_encode %{
7976 int vector_len = 2;
7977 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
7978 %}
7979 ins_pipe( pipe_slow );
7980 %}
7981
7982 instruct vand64B_mem(vecZ dst, vecZ src, memory mem) %{
7983 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64);
7984 match(Set dst (AndV src (LoadVector mem)));
7985 format %{ "vpand $dst,$src,$mem\t! and vectors (64 bytes)" %}
7986 ins_encode %{
7987 int vector_len = 2;
7988 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
7989 %}
7990 ins_pipe( pipe_slow );
7991 %}
7992
7993 // --------------------------------- OR ---------------------------------------
7994
7995 instruct vor4B(vecS dst, vecS src) %{
7996 predicate(n->as_Vector()->length_in_bytes() == 4);
7997 match(Set dst (OrV dst src));
7998 format %{ "por $dst,$src\t! or vectors (4 bytes)" %}
7999 ins_encode %{
8000 __ por($dst$$XMMRegister, $src$$XMMRegister);
8001 %}
8002 ins_pipe( pipe_slow );
8003 %}
8004
8005 instruct vor4B_reg(vecS dst, vecS src1, vecS src2) %{
8006 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4);
8007 match(Set dst (OrV src1 src2));
8008 format %{ "vpor $dst,$src1,$src2\t! or vectors (4 bytes)" %}
8009 ins_encode %{
8010 int vector_len = 0;
8011 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
8012 %}
8013 ins_pipe( pipe_slow );
8014 %}
8015
8016 instruct vor8B(vecD dst, vecD src) %{
8017 predicate(n->as_Vector()->length_in_bytes() == 8);
8018 match(Set dst (OrV dst src));
8019 format %{ "por $dst,$src\t! or vectors (8 bytes)" %}
8020 ins_encode %{
8021 __ por($dst$$XMMRegister, $src$$XMMRegister);
8022 %}
8023 ins_pipe( pipe_slow );
8024 %}
8025
8026 instruct vor8B_reg(vecD dst, vecD src1, vecD src2) %{
8027 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8);
8028 match(Set dst (OrV src1 src2));
8029 format %{ "vpor $dst,$src1,$src2\t! or vectors (8 bytes)" %}
8030 ins_encode %{
8031 int vector_len = 0;
8032 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
8033 %}
8034 ins_pipe( pipe_slow );
8035 %}
8036
8037 instruct vor16B(vecX dst, vecX src) %{
8038 predicate(n->as_Vector()->length_in_bytes() == 16);
8039 match(Set dst (OrV dst src));
8040 format %{ "por $dst,$src\t! or vectors (16 bytes)" %}
8041 ins_encode %{
8042 __ por($dst$$XMMRegister, $src$$XMMRegister);
8043 %}
8044 ins_pipe( pipe_slow );
8045 %}
8046
8047 instruct vor16B_reg(vecX dst, vecX src1, vecX src2) %{
8048 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
8049 match(Set dst (OrV src1 src2));
8050 format %{ "vpor $dst,$src1,$src2\t! or vectors (16 bytes)" %}
8051 ins_encode %{
8052 int vector_len = 0;
8053 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
8054 %}
8055 ins_pipe( pipe_slow );
8056 %}
8057
8058 instruct vor16B_mem(vecX dst, vecX src, memory mem) %{
8059 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
8060 match(Set dst (OrV src (LoadVector mem)));
8061 format %{ "vpor $dst,$src,$mem\t! or vectors (16 bytes)" %}
8062 ins_encode %{
8063 int vector_len = 0;
8064 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
8065 %}
8066 ins_pipe( pipe_slow );
8067 %}
8068
8069 instruct vor32B_reg(vecY dst, vecY src1, vecY src2) %{
8070 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
8071 match(Set dst (OrV src1 src2));
8072 format %{ "vpor $dst,$src1,$src2\t! or vectors (32 bytes)" %}
8073 ins_encode %{
8074 int vector_len = 1;
8075 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
8076 %}
8077 ins_pipe( pipe_slow );
8078 %}
8079
8080 instruct vor32B_mem(vecY dst, vecY src, memory mem) %{
8081 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
8082 match(Set dst (OrV src (LoadVector mem)));
8083 format %{ "vpor $dst,$src,$mem\t! or vectors (32 bytes)" %}
8084 ins_encode %{
8085 int vector_len = 1;
8086 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
8087 %}
8088 ins_pipe( pipe_slow );
8089 %}
8090
8091 instruct vor64B_reg(vecZ dst, vecZ src1, vecZ src2) %{
8092 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64);
8093 match(Set dst (OrV src1 src2));
8094 format %{ "vpor $dst,$src1,$src2\t! or vectors (64 bytes)" %}
8095 ins_encode %{
8096 int vector_len = 2;
8097 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
8098 %}
8099 ins_pipe( pipe_slow );
8100 %}
8101
8102 instruct vor64B_mem(vecZ dst, vecZ src, memory mem) %{
8103 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64);
8104 match(Set dst (OrV src (LoadVector mem)));
8105 format %{ "vpor $dst,$src,$mem\t! or vectors (64 bytes)" %}
8106 ins_encode %{
8107 int vector_len = 2;
8108 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
8109 %}
8110 ins_pipe( pipe_slow );
8111 %}
8112
8113 // --------------------------------- XOR --------------------------------------
8114
8115 instruct vxor4B(vecS dst, vecS src) %{
8116 predicate(n->as_Vector()->length_in_bytes() == 4);
8117 match(Set dst (XorV dst src));
8118 format %{ "pxor $dst,$src\t! xor vectors (4 bytes)" %}
8119 ins_encode %{
8120 __ pxor($dst$$XMMRegister, $src$$XMMRegister);
8121 %}
8122 ins_pipe( pipe_slow );
8123 %}
8124
8125 instruct vxor4B_reg(vecS dst, vecS src1, vecS src2) %{
8126 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4);
8127 match(Set dst (XorV src1 src2));
8128 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (4 bytes)" %}
8129 ins_encode %{
8130 int vector_len = 0;
8131 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
8132 %}
8133 ins_pipe( pipe_slow );
8134 %}
8135
8136 instruct vxor8B(vecD dst, vecD src) %{
8137 predicate(n->as_Vector()->length_in_bytes() == 8);
8138 match(Set dst (XorV dst src));
8139 format %{ "pxor $dst,$src\t! xor vectors (8 bytes)" %}
8140 ins_encode %{
8141 __ pxor($dst$$XMMRegister, $src$$XMMRegister);
8142 %}
8143 ins_pipe( pipe_slow );
8144 %}
8145
8146 instruct vxor8B_reg(vecD dst, vecD src1, vecD src2) %{
8147 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8);
8148 match(Set dst (XorV src1 src2));
8149 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (8 bytes)" %}
8150 ins_encode %{
8151 int vector_len = 0;
8152 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
8153 %}
8154 ins_pipe( pipe_slow );
8155 %}
8156
8157 instruct vxor16B(vecX dst, vecX src) %{
8158 predicate(n->as_Vector()->length_in_bytes() == 16);
8159 match(Set dst (XorV dst src));
8160 format %{ "pxor $dst,$src\t! xor vectors (16 bytes)" %}
8161 ins_encode %{
8162 __ pxor($dst$$XMMRegister, $src$$XMMRegister);
8163 %}
8164 ins_pipe( pipe_slow );
8165 %}
8166
8167 instruct vxor16B_reg(vecX dst, vecX src1, vecX src2) %{
8168 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
8169 match(Set dst (XorV src1 src2));
8170 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (16 bytes)" %}
8171 ins_encode %{
8172 int vector_len = 0;
8173 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
8174 %}
8175 ins_pipe( pipe_slow );
8176 %}
8177
8178 instruct vxor16B_mem(vecX dst, vecX src, memory mem) %{
8179 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
8180 match(Set dst (XorV src (LoadVector mem)));
8181 format %{ "vpxor $dst,$src,$mem\t! xor vectors (16 bytes)" %}
8182 ins_encode %{
8183 int vector_len = 0;
8184 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
8185 %}
8186 ins_pipe( pipe_slow );
8187 %}
8188
8189 instruct vxor32B_reg(vecY dst, vecY src1, vecY src2) %{
8190 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
8191 match(Set dst (XorV src1 src2));
8192 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (32 bytes)" %}
8193 ins_encode %{
8194 int vector_len = 1;
8195 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
8196 %}
8197 ins_pipe( pipe_slow );
8198 %}
8199
8200 instruct vxor32B_mem(vecY dst, vecY src, memory mem) %{
8201 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
8202 match(Set dst (XorV src (LoadVector mem)));
8203 format %{ "vpxor $dst,$src,$mem\t! xor vectors (32 bytes)" %}
8204 ins_encode %{
8205 int vector_len = 1;
8206 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
8207 %}
8208 ins_pipe( pipe_slow );
8209 %}
8210
8211 instruct vxor64B_reg(vecZ dst, vecZ src1, vecZ src2) %{
8212 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64);
8213 match(Set dst (XorV src1 src2));
8214 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (64 bytes)" %}
8215 ins_encode %{
8216 int vector_len = 2;
8217 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
8218 %}
8219 ins_pipe( pipe_slow );
8220 %}
8221
8222 instruct vxor64B_mem(vecZ dst, vecZ src, memory mem) %{
8223 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64);
8224 match(Set dst (XorV src (LoadVector mem)));
8225 format %{ "vpxor $dst,$src,$mem\t! xor vectors (64 bytes)" %}
8226 ins_encode %{
8227 int vector_len = 2;
8228 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
8229 %}
8230 ins_pipe( pipe_slow );
8231 %}
8232
|