1 // 2 // Copyright (c) 2011, 2016, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Common Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // archtecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // XMM registers. 512-bit registers or 8 words each, labeled (a)-p. 63 // Word a in each register holds a Float, words ab hold a Double. 64 // The whole registers are used in SSE4.2 version intrinsics, 65 // array copy stubs and superword operations (see UseSSE42Intrinsics, 66 // UseXMMForArrayCopy and UseSuperword flags). 67 // For pre EVEX enabled architectures: 68 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX) 69 // For EVEX enabled architectures: 70 // XMM8-XMM31 must be encoded with REX (EVEX for UseAVX). 71 // 72 // Linux ABI: No register preserved across function calls 73 // XMM0-XMM7 might hold parameters 74 // Windows ABI: XMM6-XMM31 preserved across function calls 75 // XMM0-XMM3 might hold parameters 76 77 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()); 78 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1)); 79 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2)); 80 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3)); 81 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4)); 82 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5)); 83 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6)); 84 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7)); 85 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8)); 86 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9)); 87 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10)); 88 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11)); 89 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12)); 90 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13)); 91 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14)); 92 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15)); 93 94 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()); 95 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1)); 96 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2)); 97 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3)); 98 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4)); 99 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5)); 100 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6)); 101 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7)); 102 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8)); 103 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9)); 104 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10)); 105 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11)); 106 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12)); 107 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13)); 108 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14)); 109 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15)); 110 111 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()); 112 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1)); 113 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2)); 114 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3)); 115 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4)); 116 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5)); 117 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6)); 118 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7)); 119 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8)); 120 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9)); 121 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10)); 122 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11)); 123 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12)); 124 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13)); 125 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14)); 126 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15)); 127 128 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()); 129 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1)); 130 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2)); 131 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3)); 132 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4)); 133 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5)); 134 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6)); 135 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7)); 136 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8)); 137 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9)); 138 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10)); 139 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11)); 140 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12)); 141 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13)); 142 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14)); 143 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15)); 144 145 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()); 146 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1)); 147 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2)); 148 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3)); 149 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4)); 150 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5)); 151 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6)); 152 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7)); 153 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8)); 154 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9)); 155 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10)); 156 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11)); 157 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12)); 158 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13)); 159 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14)); 160 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15)); 161 162 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()); 163 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1)); 164 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2)); 165 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3)); 166 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4)); 167 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5)); 168 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6)); 169 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7)); 170 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8)); 171 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9)); 172 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10)); 173 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11)); 174 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12)); 175 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13)); 176 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14)); 177 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15)); 178 179 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()); 180 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1)); 181 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2)); 182 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3)); 183 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4)); 184 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5)); 185 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6)); 186 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7)); 187 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8)); 188 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9)); 189 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10)); 190 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11)); 191 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12)); 192 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13)); 193 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14)); 194 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15)); 195 196 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()); 197 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1)); 198 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2)); 199 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3)); 200 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4)); 201 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5)); 202 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6)); 203 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7)); 204 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8)); 205 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9)); 206 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10)); 207 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11)); 208 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12)); 209 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13)); 210 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14)); 211 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15)); 212 213 #ifdef _LP64 214 215 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()); 216 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1)); 217 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2)); 218 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3)); 219 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4)); 220 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5)); 221 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6)); 222 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7)); 223 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8)); 224 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9)); 225 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10)); 226 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11)); 227 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12)); 228 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13)); 229 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14)); 230 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15)); 231 232 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()); 233 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1)); 234 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2)); 235 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3)); 236 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4)); 237 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5)); 238 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6)); 239 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7)); 240 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8)); 241 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9)); 242 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10)); 243 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11)); 244 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12)); 245 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13)); 246 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14)); 247 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15)); 248 249 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()); 250 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1)); 251 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2)); 252 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3)); 253 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4)); 254 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5)); 255 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6)); 256 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7)); 257 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8)); 258 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9)); 259 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10)); 260 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11)); 261 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12)); 262 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13)); 263 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14)); 264 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15)); 265 266 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()); 267 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1)); 268 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2)); 269 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3)); 270 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4)); 271 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5)); 272 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6)); 273 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7)); 274 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8)); 275 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9)); 276 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10)); 277 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11)); 278 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12)); 279 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13)); 280 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14)); 281 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15)); 282 283 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()); 284 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1)); 285 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2)); 286 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3)); 287 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4)); 288 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5)); 289 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6)); 290 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7)); 291 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8)); 292 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9)); 293 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10)); 294 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11)); 295 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12)); 296 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13)); 297 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14)); 298 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15)); 299 300 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()); 301 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1)); 302 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2)); 303 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3)); 304 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4)); 305 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5)); 306 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6)); 307 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7)); 308 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8)); 309 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9)); 310 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10)); 311 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11)); 312 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12)); 313 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13)); 314 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14)); 315 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15)); 316 317 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()); 318 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1)); 319 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2)); 320 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3)); 321 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4)); 322 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5)); 323 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6)); 324 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7)); 325 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8)); 326 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9)); 327 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10)); 328 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11)); 329 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12)); 330 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13)); 331 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14)); 332 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15)); 333 334 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()); 335 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1)); 336 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2)); 337 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3)); 338 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4)); 339 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5)); 340 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6)); 341 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7)); 342 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8)); 343 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9)); 344 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10)); 345 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11)); 346 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12)); 347 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13)); 348 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14)); 349 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15)); 350 351 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()); 352 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1)); 353 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2)); 354 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3)); 355 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4)); 356 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5)); 357 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6)); 358 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7)); 359 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8)); 360 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9)); 361 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10)); 362 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11)); 363 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12)); 364 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13)); 365 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14)); 366 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15)); 367 368 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()); 369 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1)); 370 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2)); 371 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3)); 372 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4)); 373 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5)); 374 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6)); 375 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7)); 376 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8)); 377 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9)); 378 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10)); 379 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11)); 380 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12)); 381 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13)); 382 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14)); 383 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15)); 384 385 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()); 386 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1)); 387 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2)); 388 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3)); 389 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4)); 390 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5)); 391 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6)); 392 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7)); 393 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8)); 394 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9)); 395 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10)); 396 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11)); 397 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12)); 398 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13)); 399 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14)); 400 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15)); 401 402 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()); 403 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1)); 404 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2)); 405 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3)); 406 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4)); 407 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5)); 408 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6)); 409 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7)); 410 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8)); 411 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9)); 412 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10)); 413 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11)); 414 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12)); 415 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13)); 416 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14)); 417 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15)); 418 419 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()); 420 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1)); 421 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2)); 422 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3)); 423 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4)); 424 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5)); 425 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6)); 426 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7)); 427 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8)); 428 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9)); 429 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10)); 430 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11)); 431 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12)); 432 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13)); 433 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14)); 434 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15)); 435 436 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()); 437 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1)); 438 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2)); 439 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3)); 440 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4)); 441 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5)); 442 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6)); 443 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7)); 444 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8)); 445 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9)); 446 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10)); 447 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11)); 448 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12)); 449 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13)); 450 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14)); 451 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15)); 452 453 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()); 454 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1)); 455 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2)); 456 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3)); 457 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4)); 458 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5)); 459 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6)); 460 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7)); 461 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8)); 462 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9)); 463 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10)); 464 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11)); 465 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12)); 466 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13)); 467 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14)); 468 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15)); 469 470 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()); 471 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1)); 472 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2)); 473 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3)); 474 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4)); 475 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5)); 476 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6)); 477 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7)); 478 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8)); 479 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9)); 480 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10)); 481 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11)); 482 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12)); 483 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13)); 484 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14)); 485 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15)); 486 487 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()); 488 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1)); 489 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2)); 490 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3)); 491 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4)); 492 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5)); 493 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6)); 494 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7)); 495 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8)); 496 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9)); 497 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10)); 498 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11)); 499 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12)); 500 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13)); 501 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14)); 502 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15)); 503 504 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()); 505 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1)); 506 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2)); 507 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3)); 508 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4)); 509 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5)); 510 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6)); 511 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7)); 512 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8)); 513 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9)); 514 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10)); 515 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11)); 516 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12)); 517 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13)); 518 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14)); 519 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15)); 520 521 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()); 522 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1)); 523 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2)); 524 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3)); 525 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4)); 526 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5)); 527 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6)); 528 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7)); 529 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8)); 530 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9)); 531 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10)); 532 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11)); 533 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12)); 534 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13)); 535 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14)); 536 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15)); 537 538 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()); 539 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1)); 540 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2)); 541 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3)); 542 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4)); 543 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5)); 544 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6)); 545 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7)); 546 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8)); 547 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9)); 548 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10)); 549 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11)); 550 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12)); 551 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13)); 552 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14)); 553 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15)); 554 555 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()); 556 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1)); 557 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2)); 558 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3)); 559 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4)); 560 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5)); 561 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6)); 562 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7)); 563 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8)); 564 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9)); 565 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10)); 566 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11)); 567 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12)); 568 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13)); 569 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14)); 570 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15)); 571 572 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()); 573 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1)); 574 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2)); 575 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3)); 576 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4)); 577 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5)); 578 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6)); 579 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7)); 580 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8)); 581 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9)); 582 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10)); 583 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11)); 584 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12)); 585 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13)); 586 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14)); 587 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15)); 588 589 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()); 590 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1)); 591 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2)); 592 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3)); 593 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4)); 594 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5)); 595 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6)); 596 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7)); 597 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8)); 598 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9)); 599 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10)); 600 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11)); 601 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12)); 602 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13)); 603 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14)); 604 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15)); 605 606 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()); 607 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1)); 608 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2)); 609 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3)); 610 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4)); 611 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5)); 612 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6)); 613 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7)); 614 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8)); 615 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9)); 616 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10)); 617 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11)); 618 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12)); 619 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13)); 620 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14)); 621 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15)); 622 623 #endif // _LP64 624 625 #ifdef _LP64 626 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad()); 627 #else 628 reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad()); 629 #endif // _LP64 630 631 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 632 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 633 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 634 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 635 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 636 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 637 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 638 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 639 #ifdef _LP64 640 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 641 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 642 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 643 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 644 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 645 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 646 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 647 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 648 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 649 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 650 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 651 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 652 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 653 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 654 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 655 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 656 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 657 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 658 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 659 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 660 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 661 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 662 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 663 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 664 #endif 665 ); 666 667 // flags allocation class should be last. 668 alloc_class chunk2(RFLAGS); 669 670 // Singleton class for condition codes 671 reg_class int_flags(RFLAGS); 672 673 // Class for pre evex float registers 674 reg_class float_reg_legacy(XMM0, 675 XMM1, 676 XMM2, 677 XMM3, 678 XMM4, 679 XMM5, 680 XMM6, 681 XMM7 682 #ifdef _LP64 683 ,XMM8, 684 XMM9, 685 XMM10, 686 XMM11, 687 XMM12, 688 XMM13, 689 XMM14, 690 XMM15 691 #endif 692 ); 693 694 // Class for evex float registers 695 reg_class float_reg_evex(XMM0, 696 XMM1, 697 XMM2, 698 XMM3, 699 XMM4, 700 XMM5, 701 XMM6, 702 XMM7 703 #ifdef _LP64 704 ,XMM8, 705 XMM9, 706 XMM10, 707 XMM11, 708 XMM12, 709 XMM13, 710 XMM14, 711 XMM15, 712 XMM16, 713 XMM17, 714 XMM18, 715 XMM19, 716 XMM20, 717 XMM21, 718 XMM22, 719 XMM23, 720 XMM24, 721 XMM25, 722 XMM26, 723 XMM27, 724 XMM28, 725 XMM29, 726 XMM30, 727 XMM31 728 #endif 729 ); 730 731 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} ); 732 733 // Class for pre evex double registers 734 reg_class double_reg_legacy(XMM0, XMM0b, 735 XMM1, XMM1b, 736 XMM2, XMM2b, 737 XMM3, XMM3b, 738 XMM4, XMM4b, 739 XMM5, XMM5b, 740 XMM6, XMM6b, 741 XMM7, XMM7b 742 #ifdef _LP64 743 ,XMM8, XMM8b, 744 XMM9, XMM9b, 745 XMM10, XMM10b, 746 XMM11, XMM11b, 747 XMM12, XMM12b, 748 XMM13, XMM13b, 749 XMM14, XMM14b, 750 XMM15, XMM15b 751 #endif 752 ); 753 754 // Class for evex double registers 755 reg_class double_reg_evex(XMM0, XMM0b, 756 XMM1, XMM1b, 757 XMM2, XMM2b, 758 XMM3, XMM3b, 759 XMM4, XMM4b, 760 XMM5, XMM5b, 761 XMM6, XMM6b, 762 XMM7, XMM7b 763 #ifdef _LP64 764 ,XMM8, XMM8b, 765 XMM9, XMM9b, 766 XMM10, XMM10b, 767 XMM11, XMM11b, 768 XMM12, XMM12b, 769 XMM13, XMM13b, 770 XMM14, XMM14b, 771 XMM15, XMM15b, 772 XMM16, XMM16b, 773 XMM17, XMM17b, 774 XMM18, XMM18b, 775 XMM19, XMM19b, 776 XMM20, XMM20b, 777 XMM21, XMM21b, 778 XMM22, XMM22b, 779 XMM23, XMM23b, 780 XMM24, XMM24b, 781 XMM25, XMM25b, 782 XMM26, XMM26b, 783 XMM27, XMM27b, 784 XMM28, XMM28b, 785 XMM29, XMM29b, 786 XMM30, XMM30b, 787 XMM31, XMM31b 788 #endif 789 ); 790 791 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} ); 792 793 // Class for pre evex 32bit vector registers 794 reg_class vectors_reg_legacy(XMM0, 795 XMM1, 796 XMM2, 797 XMM3, 798 XMM4, 799 XMM5, 800 XMM6, 801 XMM7 802 #ifdef _LP64 803 ,XMM8, 804 XMM9, 805 XMM10, 806 XMM11, 807 XMM12, 808 XMM13, 809 XMM14, 810 XMM15 811 #endif 812 ); 813 814 // Class for evex 32bit vector registers 815 reg_class vectors_reg_evex(XMM0, 816 XMM1, 817 XMM2, 818 XMM3, 819 XMM4, 820 XMM5, 821 XMM6, 822 XMM7 823 #ifdef _LP64 824 ,XMM8, 825 XMM9, 826 XMM10, 827 XMM11, 828 XMM12, 829 XMM13, 830 XMM14, 831 XMM15, 832 XMM16, 833 XMM17, 834 XMM18, 835 XMM19, 836 XMM20, 837 XMM21, 838 XMM22, 839 XMM23, 840 XMM24, 841 XMM25, 842 XMM26, 843 XMM27, 844 XMM28, 845 XMM29, 846 XMM30, 847 XMM31 848 #endif 849 ); 850 851 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} ); 852 853 // Class for all 64bit vector registers 854 reg_class vectord_reg_legacy(XMM0, XMM0b, 855 XMM1, XMM1b, 856 XMM2, XMM2b, 857 XMM3, XMM3b, 858 XMM4, XMM4b, 859 XMM5, XMM5b, 860 XMM6, XMM6b, 861 XMM7, XMM7b 862 #ifdef _LP64 863 ,XMM8, XMM8b, 864 XMM9, XMM9b, 865 XMM10, XMM10b, 866 XMM11, XMM11b, 867 XMM12, XMM12b, 868 XMM13, XMM13b, 869 XMM14, XMM14b, 870 XMM15, XMM15b 871 #endif 872 ); 873 874 // Class for all 64bit vector registers 875 reg_class vectord_reg_evex(XMM0, XMM0b, 876 XMM1, XMM1b, 877 XMM2, XMM2b, 878 XMM3, XMM3b, 879 XMM4, XMM4b, 880 XMM5, XMM5b, 881 XMM6, XMM6b, 882 XMM7, XMM7b 883 #ifdef _LP64 884 ,XMM8, XMM8b, 885 XMM9, XMM9b, 886 XMM10, XMM10b, 887 XMM11, XMM11b, 888 XMM12, XMM12b, 889 XMM13, XMM13b, 890 XMM14, XMM14b, 891 XMM15, XMM15b, 892 XMM16, XMM16b, 893 XMM17, XMM17b, 894 XMM18, XMM18b, 895 XMM19, XMM19b, 896 XMM20, XMM20b, 897 XMM21, XMM21b, 898 XMM22, XMM22b, 899 XMM23, XMM23b, 900 XMM24, XMM24b, 901 XMM25, XMM25b, 902 XMM26, XMM26b, 903 XMM27, XMM27b, 904 XMM28, XMM28b, 905 XMM29, XMM29b, 906 XMM30, XMM30b, 907 XMM31, XMM31b 908 #endif 909 ); 910 911 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} ); 912 913 // Class for all 128bit vector registers 914 reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, 915 XMM1, XMM1b, XMM1c, XMM1d, 916 XMM2, XMM2b, XMM2c, XMM2d, 917 XMM3, XMM3b, XMM3c, XMM3d, 918 XMM4, XMM4b, XMM4c, XMM4d, 919 XMM5, XMM5b, XMM5c, XMM5d, 920 XMM6, XMM6b, XMM6c, XMM6d, 921 XMM7, XMM7b, XMM7c, XMM7d 922 #ifdef _LP64 923 ,XMM8, XMM8b, XMM8c, XMM8d, 924 XMM9, XMM9b, XMM9c, XMM9d, 925 XMM10, XMM10b, XMM10c, XMM10d, 926 XMM11, XMM11b, XMM11c, XMM11d, 927 XMM12, XMM12b, XMM12c, XMM12d, 928 XMM13, XMM13b, XMM13c, XMM13d, 929 XMM14, XMM14b, XMM14c, XMM14d, 930 XMM15, XMM15b, XMM15c, XMM15d 931 #endif 932 ); 933 934 // Class for all 128bit vector registers 935 reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, 936 XMM1, XMM1b, XMM1c, XMM1d, 937 XMM2, XMM2b, XMM2c, XMM2d, 938 XMM3, XMM3b, XMM3c, XMM3d, 939 XMM4, XMM4b, XMM4c, XMM4d, 940 XMM5, XMM5b, XMM5c, XMM5d, 941 XMM6, XMM6b, XMM6c, XMM6d, 942 XMM7, XMM7b, XMM7c, XMM7d 943 #ifdef _LP64 944 ,XMM8, XMM8b, XMM8c, XMM8d, 945 XMM9, XMM9b, XMM9c, XMM9d, 946 XMM10, XMM10b, XMM10c, XMM10d, 947 XMM11, XMM11b, XMM11c, XMM11d, 948 XMM12, XMM12b, XMM12c, XMM12d, 949 XMM13, XMM13b, XMM13c, XMM13d, 950 XMM14, XMM14b, XMM14c, XMM14d, 951 XMM15, XMM15b, XMM15c, XMM15d, 952 XMM16, XMM16b, XMM16c, XMM16d, 953 XMM17, XMM17b, XMM17c, XMM17d, 954 XMM18, XMM18b, XMM18c, XMM18d, 955 XMM19, XMM19b, XMM19c, XMM19d, 956 XMM20, XMM20b, XMM20c, XMM20d, 957 XMM21, XMM21b, XMM21c, XMM21d, 958 XMM22, XMM22b, XMM22c, XMM22d, 959 XMM23, XMM23b, XMM23c, XMM23d, 960 XMM24, XMM24b, XMM24c, XMM24d, 961 XMM25, XMM25b, XMM25c, XMM25d, 962 XMM26, XMM26b, XMM26c, XMM26d, 963 XMM27, XMM27b, XMM27c, XMM27d, 964 XMM28, XMM28b, XMM28c, XMM28d, 965 XMM29, XMM29b, XMM29c, XMM29d, 966 XMM30, XMM30b, XMM30c, XMM30d, 967 XMM31, XMM31b, XMM31c, XMM31d 968 #endif 969 ); 970 971 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} ); 972 973 // Class for all 256bit vector registers 974 reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 975 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 976 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 977 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 978 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 979 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 980 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 981 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 982 #ifdef _LP64 983 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 984 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 985 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 986 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 987 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 988 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 989 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 990 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h 991 #endif 992 ); 993 994 // Class for all 256bit vector registers 995 reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 996 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 997 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 998 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 999 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 1000 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 1001 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 1002 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 1003 #ifdef _LP64 1004 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 1005 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 1006 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 1007 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 1008 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 1009 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1010 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1011 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, 1012 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, 1013 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, 1014 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, 1015 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, 1016 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, 1017 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, 1018 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, 1019 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, 1020 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, 1021 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, 1022 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, 1023 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, 1024 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, 1025 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, 1026 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, 1027 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h 1028 #endif 1029 ); 1030 1031 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} ); 1032 1033 // Class for all 512bit vector registers 1034 reg_class vectorz_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1035 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1036 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1037 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1038 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1039 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1040 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1041 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1042 #ifdef _LP64 1043 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1044 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1045 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1046 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1047 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1048 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1049 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1050 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1051 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 1052 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 1053 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 1054 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 1055 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 1056 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 1057 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 1058 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 1059 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 1060 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 1061 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 1062 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 1063 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 1064 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 1065 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 1066 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 1067 #endif 1068 ); 1069 1070 %} 1071 1072 1073 //----------SOURCE BLOCK------------------------------------------------------- 1074 // This is a block of C++ code which provides values, functions, and 1075 // definitions necessary in the rest of the architecture description 1076 1077 source_hpp %{ 1078 // Header information of the source block. 1079 // Method declarations/definitions which are used outside 1080 // the ad-scope can conveniently be defined here. 1081 // 1082 // To keep related declarations/definitions/uses close together, 1083 // we switch between source %{ }% and source_hpp %{ }% freely as needed. 1084 1085 class NativeJump; 1086 1087 class CallStubImpl { 1088 1089 //-------------------------------------------------------------- 1090 //---< Used for optimization in Compile::shorten_branches >--- 1091 //-------------------------------------------------------------- 1092 1093 public: 1094 // Size of call trampoline stub. 1095 static uint size_call_trampoline() { 1096 return 0; // no call trampolines on this platform 1097 } 1098 1099 // number of relocations needed by a call trampoline stub 1100 static uint reloc_call_trampoline() { 1101 return 0; // no call trampolines on this platform 1102 } 1103 }; 1104 1105 class HandlerImpl { 1106 1107 public: 1108 1109 static int emit_exception_handler(CodeBuffer &cbuf); 1110 static int emit_deopt_handler(CodeBuffer& cbuf); 1111 1112 static uint size_exception_handler() { 1113 // NativeCall instruction size is the same as NativeJump. 1114 // exception handler starts out as jump and can be patched to 1115 // a call be deoptimization. (4932387) 1116 // Note that this value is also credited (in output.cpp) to 1117 // the size of the code section. 1118 return NativeJump::instruction_size; 1119 } 1120 1121 #ifdef _LP64 1122 static uint size_deopt_handler() { 1123 // three 5 byte instructions 1124 return 15; 1125 } 1126 #else 1127 static uint size_deopt_handler() { 1128 // NativeCall instruction size is the same as NativeJump. 1129 // exception handler starts out as jump and can be patched to 1130 // a call be deoptimization. (4932387) 1131 // Note that this value is also credited (in output.cpp) to 1132 // the size of the code section. 1133 return 5 + NativeJump::instruction_size; // pushl(); jmp; 1134 } 1135 #endif 1136 }; 1137 1138 %} // end source_hpp 1139 1140 source %{ 1141 1142 #include "opto/addnode.hpp" 1143 1144 // Emit exception handler code. 1145 // Stuff framesize into a register and call a VM stub routine. 1146 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) { 1147 1148 // Note that the code buffer's insts_mark is always relative to insts. 1149 // That's why we must use the macroassembler to generate a handler. 1150 MacroAssembler _masm(&cbuf); 1151 address base = __ start_a_stub(size_exception_handler()); 1152 if (base == NULL) { 1153 ciEnv::current()->record_failure("CodeCache is full"); 1154 return 0; // CodeBuffer::expand failed 1155 } 1156 int offset = __ offset(); 1157 __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point())); 1158 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); 1159 __ end_a_stub(); 1160 return offset; 1161 } 1162 1163 // Emit deopt handler code. 1164 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) { 1165 1166 // Note that the code buffer's insts_mark is always relative to insts. 1167 // That's why we must use the macroassembler to generate a handler. 1168 MacroAssembler _masm(&cbuf); 1169 address base = __ start_a_stub(size_deopt_handler()); 1170 if (base == NULL) { 1171 ciEnv::current()->record_failure("CodeCache is full"); 1172 return 0; // CodeBuffer::expand failed 1173 } 1174 int offset = __ offset(); 1175 1176 #ifdef _LP64 1177 address the_pc = (address) __ pc(); 1178 Label next; 1179 // push a "the_pc" on the stack without destroying any registers 1180 // as they all may be live. 1181 1182 // push address of "next" 1183 __ call(next, relocInfo::none); // reloc none is fine since it is a disp32 1184 __ bind(next); 1185 // adjust it so it matches "the_pc" 1186 __ subptr(Address(rsp, 0), __ offset() - offset); 1187 #else 1188 InternalAddress here(__ pc()); 1189 __ pushptr(here.addr()); 1190 #endif 1191 1192 __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack())); 1193 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow"); 1194 __ end_a_stub(); 1195 return offset; 1196 } 1197 1198 1199 //============================================================================= 1200 1201 // Float masks come from different places depending on platform. 1202 #ifdef _LP64 1203 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); } 1204 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); } 1205 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); } 1206 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); } 1207 #else 1208 static address float_signmask() { return (address)float_signmask_pool; } 1209 static address float_signflip() { return (address)float_signflip_pool; } 1210 static address double_signmask() { return (address)double_signmask_pool; } 1211 static address double_signflip() { return (address)double_signflip_pool; } 1212 #endif 1213 1214 1215 const bool Matcher::match_rule_supported(int opcode) { 1216 if (!has_match_rule(opcode)) 1217 return false; 1218 1219 bool ret_value = true; 1220 switch (opcode) { 1221 case Op_PopCountI: 1222 case Op_PopCountL: 1223 if (!UsePopCountInstruction) 1224 ret_value = false; 1225 break; 1226 case Op_MulVI: 1227 if ((UseSSE < 4) && (UseAVX < 1)) // only with SSE4_1 or AVX 1228 ret_value = false; 1229 break; 1230 case Op_MulVL: 1231 case Op_MulReductionVL: 1232 if (VM_Version::supports_avx512dq() == false) 1233 ret_value = false; 1234 break; 1235 case Op_AddReductionVL: 1236 if (UseAVX < 3) // only EVEX : vector connectivity becomes an issue here 1237 ret_value = false; 1238 break; 1239 case Op_AddReductionVI: 1240 if (UseSSE < 3) // requires at least SSE3 1241 ret_value = false; 1242 break; 1243 case Op_MulReductionVI: 1244 if (UseSSE < 4) // requires at least SSE4 1245 ret_value = false; 1246 break; 1247 case Op_AddReductionVF: 1248 case Op_AddReductionVD: 1249 case Op_MulReductionVF: 1250 case Op_MulReductionVD: 1251 if (UseSSE < 1) // requires at least SSE 1252 ret_value = false; 1253 break; 1254 case Op_SqrtVD: 1255 if (UseAVX < 1) // enabled for AVX only 1256 ret_value = false; 1257 break; 1258 case Op_CompareAndSwapL: 1259 #ifdef _LP64 1260 case Op_CompareAndSwapP: 1261 #endif 1262 if (!VM_Version::supports_cx8()) 1263 ret_value = false; 1264 break; 1265 case Op_CMoveVD: 1266 if (UseAVX < 1 || UseAVX > 2) 1267 ret_value = false; 1268 break; 1269 case Op_StrIndexOf: 1270 if (!UseSSE42Intrinsics) 1271 ret_value = false; 1272 break; 1273 case Op_StrIndexOfChar: 1274 if (!UseSSE42Intrinsics) 1275 ret_value = false; 1276 break; 1277 case Op_OnSpinWait: 1278 if (VM_Version::supports_on_spin_wait() == false) 1279 ret_value = false; 1280 break; 1281 } 1282 1283 return ret_value; // Per default match rules are supported. 1284 } 1285 1286 const bool Matcher::match_rule_supported_vector(int opcode, int vlen) { 1287 // identify extra cases that we might want to provide match rules for 1288 // e.g. Op_ vector nodes and other intrinsics while guarding with vlen 1289 bool ret_value = match_rule_supported(opcode); 1290 if (ret_value) { 1291 switch (opcode) { 1292 case Op_AddVB: 1293 case Op_SubVB: 1294 if ((vlen == 64) && (VM_Version::supports_avx512bw() == false)) 1295 ret_value = false; 1296 break; 1297 case Op_URShiftVS: 1298 case Op_RShiftVS: 1299 case Op_LShiftVS: 1300 case Op_MulVS: 1301 case Op_AddVS: 1302 case Op_SubVS: 1303 if ((vlen == 32) && (VM_Version::supports_avx512bw() == false)) 1304 ret_value = false; 1305 break; 1306 case Op_CMoveVD: 1307 if (vlen != 4) 1308 ret_value = false; 1309 break; 1310 } 1311 } 1312 1313 return ret_value; // Per default match rules are supported. 1314 } 1315 1316 const bool Matcher::has_predicated_vectors(void) { 1317 bool ret_value = false; 1318 if (UseAVX > 2) { 1319 ret_value = VM_Version::supports_avx512vl(); 1320 } 1321 1322 return ret_value; 1323 } 1324 1325 const int Matcher::float_pressure(int default_pressure_threshold) { 1326 int float_pressure_threshold = default_pressure_threshold; 1327 #ifdef _LP64 1328 if (UseAVX > 2) { 1329 // Increase pressure threshold on machines with AVX3 which have 1330 // 2x more XMM registers. 1331 float_pressure_threshold = default_pressure_threshold * 2; 1332 } 1333 #endif 1334 return float_pressure_threshold; 1335 } 1336 1337 // Max vector size in bytes. 0 if not supported. 1338 const int Matcher::vector_width_in_bytes(BasicType bt) { 1339 assert(is_java_primitive(bt), "only primitive type vectors"); 1340 if (UseSSE < 2) return 0; 1341 // SSE2 supports 128bit vectors for all types. 1342 // AVX2 supports 256bit vectors for all types. 1343 // AVX2/EVEX supports 512bit vectors for all types. 1344 int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16; 1345 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE. 1346 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE)) 1347 size = (UseAVX > 2) ? 64 : 32; 1348 // Use flag to limit vector size. 1349 size = MIN2(size,(int)MaxVectorSize); 1350 // Minimum 2 values in vector (or 4 for bytes). 1351 switch (bt) { 1352 case T_DOUBLE: 1353 case T_LONG: 1354 if (size < 16) return 0; 1355 break; 1356 case T_FLOAT: 1357 case T_INT: 1358 if (size < 8) return 0; 1359 break; 1360 case T_BOOLEAN: 1361 if (size < 4) return 0; 1362 break; 1363 case T_CHAR: 1364 if (size < 4) return 0; 1365 break; 1366 case T_BYTE: 1367 if (size < 4) return 0; 1368 break; 1369 case T_SHORT: 1370 if (size < 4) return 0; 1371 break; 1372 default: 1373 ShouldNotReachHere(); 1374 } 1375 return size; 1376 } 1377 1378 // Limits on vector size (number of elements) loaded into vector. 1379 const int Matcher::max_vector_size(const BasicType bt) { 1380 return vector_width_in_bytes(bt)/type2aelembytes(bt); 1381 } 1382 const int Matcher::min_vector_size(const BasicType bt) { 1383 int max_size = max_vector_size(bt); 1384 // Min size which can be loaded into vector is 4 bytes. 1385 int size = (type2aelembytes(bt) == 1) ? 4 : 2; 1386 return MIN2(size,max_size); 1387 } 1388 1389 // Vector ideal reg corresponding to specidied size in bytes 1390 const int Matcher::vector_ideal_reg(int size) { 1391 assert(MaxVectorSize >= size, ""); 1392 switch(size) { 1393 case 4: return Op_VecS; 1394 case 8: return Op_VecD; 1395 case 16: return Op_VecX; 1396 case 32: return Op_VecY; 1397 case 64: return Op_VecZ; 1398 } 1399 ShouldNotReachHere(); 1400 return 0; 1401 } 1402 1403 // Only lowest bits of xmm reg are used for vector shift count. 1404 const int Matcher::vector_shift_count_ideal_reg(int size) { 1405 return Op_VecS; 1406 } 1407 1408 // x86 supports misaligned vectors store/load. 1409 const bool Matcher::misaligned_vectors_ok() { 1410 return !AlignVector; // can be changed by flag 1411 } 1412 1413 // x86 AES instructions are compatible with SunJCE expanded 1414 // keys, hence we do not need to pass the original key to stubs 1415 const bool Matcher::pass_original_key_for_aes() { 1416 return false; 1417 } 1418 1419 1420 const bool Matcher::convi2l_type_required = true; 1421 1422 // Check for shift by small constant as well 1423 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) { 1424 if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() && 1425 shift->in(2)->get_int() <= 3 && 1426 // Are there other uses besides address expressions? 1427 !matcher->is_visited(shift)) { 1428 address_visited.set(shift->_idx); // Flag as address_visited 1429 mstack.push(shift->in(2), Matcher::Visit); 1430 Node *conv = shift->in(1); 1431 #ifdef _LP64 1432 // Allow Matcher to match the rule which bypass 1433 // ConvI2L operation for an array index on LP64 1434 // if the index value is positive. 1435 if (conv->Opcode() == Op_ConvI2L && 1436 conv->as_Type()->type()->is_long()->_lo >= 0 && 1437 // Are there other uses besides address expressions? 1438 !matcher->is_visited(conv)) { 1439 address_visited.set(conv->_idx); // Flag as address_visited 1440 mstack.push(conv->in(1), Matcher::Pre_Visit); 1441 } else 1442 #endif 1443 mstack.push(conv, Matcher::Pre_Visit); 1444 return true; 1445 } 1446 return false; 1447 } 1448 1449 // Should the Matcher clone shifts on addressing modes, expecting them 1450 // to be subsumed into complex addressing expressions or compute them 1451 // into registers? 1452 bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { 1453 Node *off = m->in(AddPNode::Offset); 1454 if (off->is_Con()) { 1455 address_visited.test_set(m->_idx); // Flag as address_visited 1456 Node *adr = m->in(AddPNode::Address); 1457 1458 // Intel can handle 2 adds in addressing mode 1459 // AtomicAdd is not an addressing expression. 1460 // Cheap to find it by looking for screwy base. 1461 if (adr->is_AddP() && 1462 !adr->in(AddPNode::Base)->is_top() && 1463 // Are there other uses besides address expressions? 1464 !is_visited(adr)) { 1465 address_visited.set(adr->_idx); // Flag as address_visited 1466 Node *shift = adr->in(AddPNode::Offset); 1467 if (!clone_shift(shift, this, mstack, address_visited)) { 1468 mstack.push(shift, Pre_Visit); 1469 } 1470 mstack.push(adr->in(AddPNode::Address), Pre_Visit); 1471 mstack.push(adr->in(AddPNode::Base), Pre_Visit); 1472 } else { 1473 mstack.push(adr, Pre_Visit); 1474 } 1475 1476 // Clone X+offset as it also folds into most addressing expressions 1477 mstack.push(off, Visit); 1478 mstack.push(m->in(AddPNode::Base), Pre_Visit); 1479 return true; 1480 } else if (clone_shift(off, this, mstack, address_visited)) { 1481 address_visited.test_set(m->_idx); // Flag as address_visited 1482 mstack.push(m->in(AddPNode::Address), Pre_Visit); 1483 mstack.push(m->in(AddPNode::Base), Pre_Visit); 1484 return true; 1485 } 1486 return false; 1487 } 1488 1489 void Compile::reshape_address(AddPNode* addp) { 1490 } 1491 1492 // Helper methods for MachSpillCopyNode::implementation(). 1493 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 1494 int src_hi, int dst_hi, uint ireg, outputStream* st) { 1495 // In 64-bit VM size calculation is very complex. Emitting instructions 1496 // into scratch buffer is used to get size in 64-bit VM. 1497 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 1498 assert(ireg == Op_VecS || // 32bit vector 1499 (src_lo & 1) == 0 && (src_lo + 1) == src_hi && 1500 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi, 1501 "no non-adjacent vector moves" ); 1502 if (cbuf) { 1503 MacroAssembler _masm(cbuf); 1504 int offset = __ offset(); 1505 switch (ireg) { 1506 case Op_VecS: // copy whole register 1507 case Op_VecD: 1508 case Op_VecX: 1509 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1510 break; 1511 case Op_VecY: 1512 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1513 break; 1514 case Op_VecZ: 1515 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2); 1516 break; 1517 default: 1518 ShouldNotReachHere(); 1519 } 1520 int size = __ offset() - offset; 1521 #ifdef ASSERT 1522 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1523 assert(!do_size || size == 4, "incorrect size calculattion"); 1524 #endif 1525 return size; 1526 #ifndef PRODUCT 1527 } else if (!do_size) { 1528 switch (ireg) { 1529 case Op_VecS: 1530 case Op_VecD: 1531 case Op_VecX: 1532 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 1533 break; 1534 case Op_VecY: 1535 case Op_VecZ: 1536 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 1537 break; 1538 default: 1539 ShouldNotReachHere(); 1540 } 1541 #endif 1542 } 1543 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 1544 return (UseAVX > 2) ? 6 : 4; 1545 } 1546 1547 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, 1548 int stack_offset, int reg, uint ireg, outputStream* st) { 1549 // In 64-bit VM size calculation is very complex. Emitting instructions 1550 // into scratch buffer is used to get size in 64-bit VM. 1551 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 1552 if (cbuf) { 1553 MacroAssembler _masm(cbuf); 1554 int offset = __ offset(); 1555 if (is_load) { 1556 switch (ireg) { 1557 case Op_VecS: 1558 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1559 break; 1560 case Op_VecD: 1561 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1562 break; 1563 case Op_VecX: 1564 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1565 break; 1566 case Op_VecY: 1567 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1568 break; 1569 case Op_VecZ: 1570 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2); 1571 break; 1572 default: 1573 ShouldNotReachHere(); 1574 } 1575 } else { // store 1576 switch (ireg) { 1577 case Op_VecS: 1578 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1579 break; 1580 case Op_VecD: 1581 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1582 break; 1583 case Op_VecX: 1584 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1585 break; 1586 case Op_VecY: 1587 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1588 break; 1589 case Op_VecZ: 1590 __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2); 1591 break; 1592 default: 1593 ShouldNotReachHere(); 1594 } 1595 } 1596 int size = __ offset() - offset; 1597 #ifdef ASSERT 1598 int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : (UseAVX > 2) ? 6 : 4); 1599 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1600 assert(!do_size || size == (5+offset_size), "incorrect size calculattion"); 1601 #endif 1602 return size; 1603 #ifndef PRODUCT 1604 } else if (!do_size) { 1605 if (is_load) { 1606 switch (ireg) { 1607 case Op_VecS: 1608 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1609 break; 1610 case Op_VecD: 1611 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1612 break; 1613 case Op_VecX: 1614 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1615 break; 1616 case Op_VecY: 1617 case Op_VecZ: 1618 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1619 break; 1620 default: 1621 ShouldNotReachHere(); 1622 } 1623 } else { // store 1624 switch (ireg) { 1625 case Op_VecS: 1626 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1627 break; 1628 case Op_VecD: 1629 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1630 break; 1631 case Op_VecX: 1632 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1633 break; 1634 case Op_VecY: 1635 case Op_VecZ: 1636 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1637 break; 1638 default: 1639 ShouldNotReachHere(); 1640 } 1641 } 1642 #endif 1643 } 1644 bool is_single_byte = false; 1645 int vec_len = 0; 1646 if ((UseAVX > 2) && (stack_offset != 0)) { 1647 int tuple_type = Assembler::EVEX_FVM; 1648 int input_size = Assembler::EVEX_32bit; 1649 switch (ireg) { 1650 case Op_VecS: 1651 tuple_type = Assembler::EVEX_T1S; 1652 break; 1653 case Op_VecD: 1654 tuple_type = Assembler::EVEX_T1S; 1655 input_size = Assembler::EVEX_64bit; 1656 break; 1657 case Op_VecX: 1658 break; 1659 case Op_VecY: 1660 vec_len = 1; 1661 break; 1662 case Op_VecZ: 1663 vec_len = 2; 1664 break; 1665 } 1666 is_single_byte = Assembler::query_compressed_disp_byte(stack_offset, true, vec_len, tuple_type, input_size, 0); 1667 } 1668 int offset_size = 0; 1669 int size = 5; 1670 if (UseAVX > 2 ) { 1671 if (VM_Version::supports_avx512novl() && (vec_len == 2)) { 1672 offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 1673 size += 2; // Need an additional two bytes for EVEX encoding 1674 } else if (VM_Version::supports_avx512novl() && (vec_len < 2)) { 1675 offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4); 1676 } else { 1677 offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 1678 size += 2; // Need an additional two bytes for EVEX encodding 1679 } 1680 } else { 1681 offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4); 1682 } 1683 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1684 return size+offset_size; 1685 } 1686 1687 static inline jint replicate4_imm(int con, int width) { 1688 // Load a constant of "width" (in bytes) and replicate it to fill 32bit. 1689 assert(width == 1 || width == 2, "only byte or short types here"); 1690 int bit_width = width * 8; 1691 jint val = con; 1692 val &= (1 << bit_width) - 1; // mask off sign bits 1693 while(bit_width < 32) { 1694 val |= (val << bit_width); 1695 bit_width <<= 1; 1696 } 1697 return val; 1698 } 1699 1700 static inline jlong replicate8_imm(int con, int width) { 1701 // Load a constant of "width" (in bytes) and replicate it to fill 64bit. 1702 assert(width == 1 || width == 2 || width == 4, "only byte, short or int types here"); 1703 int bit_width = width * 8; 1704 jlong val = con; 1705 val &= (((jlong) 1) << bit_width) - 1; // mask off sign bits 1706 while(bit_width < 64) { 1707 val |= (val << bit_width); 1708 bit_width <<= 1; 1709 } 1710 return val; 1711 } 1712 1713 #ifndef PRODUCT 1714 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const { 1715 st->print("nop \t# %d bytes pad for loops and calls", _count); 1716 } 1717 #endif 1718 1719 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const { 1720 MacroAssembler _masm(&cbuf); 1721 __ nop(_count); 1722 } 1723 1724 uint MachNopNode::size(PhaseRegAlloc*) const { 1725 return _count; 1726 } 1727 1728 #ifndef PRODUCT 1729 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const { 1730 st->print("# breakpoint"); 1731 } 1732 #endif 1733 1734 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const { 1735 MacroAssembler _masm(&cbuf); 1736 __ int3(); 1737 } 1738 1739 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const { 1740 return MachNode::size(ra_); 1741 } 1742 1743 %} 1744 1745 encode %{ 1746 1747 enc_class call_epilog %{ 1748 if (VerifyStackAtCalls) { 1749 // Check that stack depth is unchanged: find majik cookie on stack 1750 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word)); 1751 MacroAssembler _masm(&cbuf); 1752 Label L; 1753 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d); 1754 __ jccb(Assembler::equal, L); 1755 // Die if stack mismatch 1756 __ int3(); 1757 __ bind(L); 1758 } 1759 %} 1760 1761 %} 1762 1763 1764 //----------OPERANDS----------------------------------------------------------- 1765 // Operand definitions must precede instruction definitions for correct parsing 1766 // in the ADLC because operands constitute user defined types which are used in 1767 // instruction definitions. 1768 1769 // This one generically applies only for evex, so only one version 1770 operand vecZ() %{ 1771 constraint(ALLOC_IN_RC(vectorz_reg)); 1772 match(VecZ); 1773 1774 format %{ %} 1775 interface(REG_INTER); 1776 %} 1777 1778 // Comparison Code for FP conditional move 1779 operand cmpOp_vcmppd() %{ 1780 match(Bool); 1781 1782 predicate(n->as_Bool()->_test._test != BoolTest::overflow && 1783 n->as_Bool()->_test._test != BoolTest::no_overflow); 1784 format %{ "" %} 1785 interface(COND_INTER) %{ 1786 equal (0x0, "eq"); 1787 less (0x1, "lt"); 1788 less_equal (0x2, "le"); 1789 not_equal (0xC, "ne"); 1790 greater_equal(0xD, "ge"); 1791 greater (0xE, "gt"); 1792 //TODO cannot compile (adlc breaks) without two next lines with error: 1793 // x86_64.ad(13987) Syntax Error: :In operand cmpOp_vcmppd: Do not support this encode constant: ' %{ 1794 // equal' for overflow. 1795 overflow (0x20, "o"); // not really supported by the instruction 1796 no_overflow (0x21, "no"); // not really supported by the instruction 1797 %} 1798 %} 1799 1800 1801 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit) 1802 1803 // ============================================================================ 1804 1805 instruct ShouldNotReachHere() %{ 1806 match(Halt); 1807 format %{ "int3\t# ShouldNotReachHere" %} 1808 ins_encode %{ 1809 __ int3(); 1810 %} 1811 ins_pipe(pipe_slow); 1812 %} 1813 1814 // =================================EVEX special=============================== 1815 1816 instruct setMask(rRegI dst, rRegI src) %{ 1817 predicate(Matcher::has_predicated_vectors()); 1818 match(Set dst (SetVectMaskI src)); 1819 effect(TEMP dst); 1820 format %{ "setvectmask $dst, $src" %} 1821 ins_encode %{ 1822 __ setvectmask($dst$$Register, $src$$Register); 1823 %} 1824 ins_pipe(pipe_slow); 1825 %} 1826 1827 // ============================================================================ 1828 1829 instruct addF_reg(regF dst, regF src) %{ 1830 predicate((UseSSE>=1) && (UseAVX == 0)); 1831 match(Set dst (AddF dst src)); 1832 1833 format %{ "addss $dst, $src" %} 1834 ins_cost(150); 1835 ins_encode %{ 1836 __ addss($dst$$XMMRegister, $src$$XMMRegister); 1837 %} 1838 ins_pipe(pipe_slow); 1839 %} 1840 1841 instruct addF_mem(regF dst, memory src) %{ 1842 predicate((UseSSE>=1) && (UseAVX == 0)); 1843 match(Set dst (AddF dst (LoadF src))); 1844 1845 format %{ "addss $dst, $src" %} 1846 ins_cost(150); 1847 ins_encode %{ 1848 __ addss($dst$$XMMRegister, $src$$Address); 1849 %} 1850 ins_pipe(pipe_slow); 1851 %} 1852 1853 instruct addF_imm(regF dst, immF con) %{ 1854 predicate((UseSSE>=1) && (UseAVX == 0)); 1855 match(Set dst (AddF dst con)); 1856 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 1857 ins_cost(150); 1858 ins_encode %{ 1859 __ addss($dst$$XMMRegister, $constantaddress($con)); 1860 %} 1861 ins_pipe(pipe_slow); 1862 %} 1863 1864 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{ 1865 predicate(UseAVX > 0); 1866 match(Set dst (AddF src1 src2)); 1867 1868 format %{ "vaddss $dst, $src1, $src2" %} 1869 ins_cost(150); 1870 ins_encode %{ 1871 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1872 %} 1873 ins_pipe(pipe_slow); 1874 %} 1875 1876 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{ 1877 predicate(UseAVX > 0); 1878 match(Set dst (AddF src1 (LoadF src2))); 1879 1880 format %{ "vaddss $dst, $src1, $src2" %} 1881 ins_cost(150); 1882 ins_encode %{ 1883 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1884 %} 1885 ins_pipe(pipe_slow); 1886 %} 1887 1888 instruct addF_reg_imm(regF dst, regF src, immF con) %{ 1889 predicate(UseAVX > 0); 1890 match(Set dst (AddF src con)); 1891 1892 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 1893 ins_cost(150); 1894 ins_encode %{ 1895 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1896 %} 1897 ins_pipe(pipe_slow); 1898 %} 1899 1900 instruct addD_reg(regD dst, regD src) %{ 1901 predicate((UseSSE>=2) && (UseAVX == 0)); 1902 match(Set dst (AddD dst src)); 1903 1904 format %{ "addsd $dst, $src" %} 1905 ins_cost(150); 1906 ins_encode %{ 1907 __ addsd($dst$$XMMRegister, $src$$XMMRegister); 1908 %} 1909 ins_pipe(pipe_slow); 1910 %} 1911 1912 instruct addD_mem(regD dst, memory src) %{ 1913 predicate((UseSSE>=2) && (UseAVX == 0)); 1914 match(Set dst (AddD dst (LoadD src))); 1915 1916 format %{ "addsd $dst, $src" %} 1917 ins_cost(150); 1918 ins_encode %{ 1919 __ addsd($dst$$XMMRegister, $src$$Address); 1920 %} 1921 ins_pipe(pipe_slow); 1922 %} 1923 1924 instruct addD_imm(regD dst, immD con) %{ 1925 predicate((UseSSE>=2) && (UseAVX == 0)); 1926 match(Set dst (AddD dst con)); 1927 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 1928 ins_cost(150); 1929 ins_encode %{ 1930 __ addsd($dst$$XMMRegister, $constantaddress($con)); 1931 %} 1932 ins_pipe(pipe_slow); 1933 %} 1934 1935 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{ 1936 predicate(UseAVX > 0); 1937 match(Set dst (AddD src1 src2)); 1938 1939 format %{ "vaddsd $dst, $src1, $src2" %} 1940 ins_cost(150); 1941 ins_encode %{ 1942 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1943 %} 1944 ins_pipe(pipe_slow); 1945 %} 1946 1947 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{ 1948 predicate(UseAVX > 0); 1949 match(Set dst (AddD src1 (LoadD src2))); 1950 1951 format %{ "vaddsd $dst, $src1, $src2" %} 1952 ins_cost(150); 1953 ins_encode %{ 1954 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1955 %} 1956 ins_pipe(pipe_slow); 1957 %} 1958 1959 instruct addD_reg_imm(regD dst, regD src, immD con) %{ 1960 predicate(UseAVX > 0); 1961 match(Set dst (AddD src con)); 1962 1963 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 1964 ins_cost(150); 1965 ins_encode %{ 1966 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1967 %} 1968 ins_pipe(pipe_slow); 1969 %} 1970 1971 instruct subF_reg(regF dst, regF src) %{ 1972 predicate((UseSSE>=1) && (UseAVX == 0)); 1973 match(Set dst (SubF dst src)); 1974 1975 format %{ "subss $dst, $src" %} 1976 ins_cost(150); 1977 ins_encode %{ 1978 __ subss($dst$$XMMRegister, $src$$XMMRegister); 1979 %} 1980 ins_pipe(pipe_slow); 1981 %} 1982 1983 instruct subF_mem(regF dst, memory src) %{ 1984 predicate((UseSSE>=1) && (UseAVX == 0)); 1985 match(Set dst (SubF dst (LoadF src))); 1986 1987 format %{ "subss $dst, $src" %} 1988 ins_cost(150); 1989 ins_encode %{ 1990 __ subss($dst$$XMMRegister, $src$$Address); 1991 %} 1992 ins_pipe(pipe_slow); 1993 %} 1994 1995 instruct subF_imm(regF dst, immF con) %{ 1996 predicate((UseSSE>=1) && (UseAVX == 0)); 1997 match(Set dst (SubF dst con)); 1998 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 1999 ins_cost(150); 2000 ins_encode %{ 2001 __ subss($dst$$XMMRegister, $constantaddress($con)); 2002 %} 2003 ins_pipe(pipe_slow); 2004 %} 2005 2006 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{ 2007 predicate(UseAVX > 0); 2008 match(Set dst (SubF src1 src2)); 2009 2010 format %{ "vsubss $dst, $src1, $src2" %} 2011 ins_cost(150); 2012 ins_encode %{ 2013 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2014 %} 2015 ins_pipe(pipe_slow); 2016 %} 2017 2018 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{ 2019 predicate(UseAVX > 0); 2020 match(Set dst (SubF src1 (LoadF src2))); 2021 2022 format %{ "vsubss $dst, $src1, $src2" %} 2023 ins_cost(150); 2024 ins_encode %{ 2025 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2026 %} 2027 ins_pipe(pipe_slow); 2028 %} 2029 2030 instruct subF_reg_imm(regF dst, regF src, immF con) %{ 2031 predicate(UseAVX > 0); 2032 match(Set dst (SubF src con)); 2033 2034 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2035 ins_cost(150); 2036 ins_encode %{ 2037 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2038 %} 2039 ins_pipe(pipe_slow); 2040 %} 2041 2042 instruct subD_reg(regD dst, regD src) %{ 2043 predicate((UseSSE>=2) && (UseAVX == 0)); 2044 match(Set dst (SubD dst src)); 2045 2046 format %{ "subsd $dst, $src" %} 2047 ins_cost(150); 2048 ins_encode %{ 2049 __ subsd($dst$$XMMRegister, $src$$XMMRegister); 2050 %} 2051 ins_pipe(pipe_slow); 2052 %} 2053 2054 instruct subD_mem(regD dst, memory src) %{ 2055 predicate((UseSSE>=2) && (UseAVX == 0)); 2056 match(Set dst (SubD dst (LoadD src))); 2057 2058 format %{ "subsd $dst, $src" %} 2059 ins_cost(150); 2060 ins_encode %{ 2061 __ subsd($dst$$XMMRegister, $src$$Address); 2062 %} 2063 ins_pipe(pipe_slow); 2064 %} 2065 2066 instruct subD_imm(regD dst, immD con) %{ 2067 predicate((UseSSE>=2) && (UseAVX == 0)); 2068 match(Set dst (SubD dst con)); 2069 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2070 ins_cost(150); 2071 ins_encode %{ 2072 __ subsd($dst$$XMMRegister, $constantaddress($con)); 2073 %} 2074 ins_pipe(pipe_slow); 2075 %} 2076 2077 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{ 2078 predicate(UseAVX > 0); 2079 match(Set dst (SubD src1 src2)); 2080 2081 format %{ "vsubsd $dst, $src1, $src2" %} 2082 ins_cost(150); 2083 ins_encode %{ 2084 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2085 %} 2086 ins_pipe(pipe_slow); 2087 %} 2088 2089 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{ 2090 predicate(UseAVX > 0); 2091 match(Set dst (SubD src1 (LoadD src2))); 2092 2093 format %{ "vsubsd $dst, $src1, $src2" %} 2094 ins_cost(150); 2095 ins_encode %{ 2096 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2097 %} 2098 ins_pipe(pipe_slow); 2099 %} 2100 2101 instruct subD_reg_imm(regD dst, regD src, immD con) %{ 2102 predicate(UseAVX > 0); 2103 match(Set dst (SubD src con)); 2104 2105 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2106 ins_cost(150); 2107 ins_encode %{ 2108 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2109 %} 2110 ins_pipe(pipe_slow); 2111 %} 2112 2113 instruct mulF_reg(regF dst, regF src) %{ 2114 predicate((UseSSE>=1) && (UseAVX == 0)); 2115 match(Set dst (MulF dst src)); 2116 2117 format %{ "mulss $dst, $src" %} 2118 ins_cost(150); 2119 ins_encode %{ 2120 __ mulss($dst$$XMMRegister, $src$$XMMRegister); 2121 %} 2122 ins_pipe(pipe_slow); 2123 %} 2124 2125 instruct mulF_mem(regF dst, memory src) %{ 2126 predicate((UseSSE>=1) && (UseAVX == 0)); 2127 match(Set dst (MulF dst (LoadF src))); 2128 2129 format %{ "mulss $dst, $src" %} 2130 ins_cost(150); 2131 ins_encode %{ 2132 __ mulss($dst$$XMMRegister, $src$$Address); 2133 %} 2134 ins_pipe(pipe_slow); 2135 %} 2136 2137 instruct mulF_imm(regF dst, immF con) %{ 2138 predicate((UseSSE>=1) && (UseAVX == 0)); 2139 match(Set dst (MulF dst con)); 2140 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2141 ins_cost(150); 2142 ins_encode %{ 2143 __ mulss($dst$$XMMRegister, $constantaddress($con)); 2144 %} 2145 ins_pipe(pipe_slow); 2146 %} 2147 2148 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{ 2149 predicate(UseAVX > 0); 2150 match(Set dst (MulF src1 src2)); 2151 2152 format %{ "vmulss $dst, $src1, $src2" %} 2153 ins_cost(150); 2154 ins_encode %{ 2155 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2156 %} 2157 ins_pipe(pipe_slow); 2158 %} 2159 2160 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{ 2161 predicate(UseAVX > 0); 2162 match(Set dst (MulF src1 (LoadF src2))); 2163 2164 format %{ "vmulss $dst, $src1, $src2" %} 2165 ins_cost(150); 2166 ins_encode %{ 2167 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2168 %} 2169 ins_pipe(pipe_slow); 2170 %} 2171 2172 instruct mulF_reg_imm(regF dst, regF src, immF con) %{ 2173 predicate(UseAVX > 0); 2174 match(Set dst (MulF src con)); 2175 2176 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2177 ins_cost(150); 2178 ins_encode %{ 2179 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2180 %} 2181 ins_pipe(pipe_slow); 2182 %} 2183 2184 instruct mulD_reg(regD dst, regD src) %{ 2185 predicate((UseSSE>=2) && (UseAVX == 0)); 2186 match(Set dst (MulD dst src)); 2187 2188 format %{ "mulsd $dst, $src" %} 2189 ins_cost(150); 2190 ins_encode %{ 2191 __ mulsd($dst$$XMMRegister, $src$$XMMRegister); 2192 %} 2193 ins_pipe(pipe_slow); 2194 %} 2195 2196 instruct mulD_mem(regD dst, memory src) %{ 2197 predicate((UseSSE>=2) && (UseAVX == 0)); 2198 match(Set dst (MulD dst (LoadD src))); 2199 2200 format %{ "mulsd $dst, $src" %} 2201 ins_cost(150); 2202 ins_encode %{ 2203 __ mulsd($dst$$XMMRegister, $src$$Address); 2204 %} 2205 ins_pipe(pipe_slow); 2206 %} 2207 2208 instruct mulD_imm(regD dst, immD con) %{ 2209 predicate((UseSSE>=2) && (UseAVX == 0)); 2210 match(Set dst (MulD dst con)); 2211 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2212 ins_cost(150); 2213 ins_encode %{ 2214 __ mulsd($dst$$XMMRegister, $constantaddress($con)); 2215 %} 2216 ins_pipe(pipe_slow); 2217 %} 2218 2219 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{ 2220 predicate(UseAVX > 0); 2221 match(Set dst (MulD src1 src2)); 2222 2223 format %{ "vmulsd $dst, $src1, $src2" %} 2224 ins_cost(150); 2225 ins_encode %{ 2226 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2227 %} 2228 ins_pipe(pipe_slow); 2229 %} 2230 2231 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{ 2232 predicate(UseAVX > 0); 2233 match(Set dst (MulD src1 (LoadD src2))); 2234 2235 format %{ "vmulsd $dst, $src1, $src2" %} 2236 ins_cost(150); 2237 ins_encode %{ 2238 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2239 %} 2240 ins_pipe(pipe_slow); 2241 %} 2242 2243 instruct mulD_reg_imm(regD dst, regD src, immD con) %{ 2244 predicate(UseAVX > 0); 2245 match(Set dst (MulD src con)); 2246 2247 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2248 ins_cost(150); 2249 ins_encode %{ 2250 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2251 %} 2252 ins_pipe(pipe_slow); 2253 %} 2254 2255 instruct divF_reg(regF dst, regF src) %{ 2256 predicate((UseSSE>=1) && (UseAVX == 0)); 2257 match(Set dst (DivF dst src)); 2258 2259 format %{ "divss $dst, $src" %} 2260 ins_cost(150); 2261 ins_encode %{ 2262 __ divss($dst$$XMMRegister, $src$$XMMRegister); 2263 %} 2264 ins_pipe(pipe_slow); 2265 %} 2266 2267 instruct divF_mem(regF dst, memory src) %{ 2268 predicate((UseSSE>=1) && (UseAVX == 0)); 2269 match(Set dst (DivF dst (LoadF src))); 2270 2271 format %{ "divss $dst, $src" %} 2272 ins_cost(150); 2273 ins_encode %{ 2274 __ divss($dst$$XMMRegister, $src$$Address); 2275 %} 2276 ins_pipe(pipe_slow); 2277 %} 2278 2279 instruct divF_imm(regF dst, immF con) %{ 2280 predicate((UseSSE>=1) && (UseAVX == 0)); 2281 match(Set dst (DivF dst con)); 2282 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2283 ins_cost(150); 2284 ins_encode %{ 2285 __ divss($dst$$XMMRegister, $constantaddress($con)); 2286 %} 2287 ins_pipe(pipe_slow); 2288 %} 2289 2290 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{ 2291 predicate(UseAVX > 0); 2292 match(Set dst (DivF src1 src2)); 2293 2294 format %{ "vdivss $dst, $src1, $src2" %} 2295 ins_cost(150); 2296 ins_encode %{ 2297 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2298 %} 2299 ins_pipe(pipe_slow); 2300 %} 2301 2302 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{ 2303 predicate(UseAVX > 0); 2304 match(Set dst (DivF src1 (LoadF src2))); 2305 2306 format %{ "vdivss $dst, $src1, $src2" %} 2307 ins_cost(150); 2308 ins_encode %{ 2309 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2310 %} 2311 ins_pipe(pipe_slow); 2312 %} 2313 2314 instruct divF_reg_imm(regF dst, regF src, immF con) %{ 2315 predicate(UseAVX > 0); 2316 match(Set dst (DivF src con)); 2317 2318 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2319 ins_cost(150); 2320 ins_encode %{ 2321 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2322 %} 2323 ins_pipe(pipe_slow); 2324 %} 2325 2326 instruct divD_reg(regD dst, regD src) %{ 2327 predicate((UseSSE>=2) && (UseAVX == 0)); 2328 match(Set dst (DivD dst src)); 2329 2330 format %{ "divsd $dst, $src" %} 2331 ins_cost(150); 2332 ins_encode %{ 2333 __ divsd($dst$$XMMRegister, $src$$XMMRegister); 2334 %} 2335 ins_pipe(pipe_slow); 2336 %} 2337 2338 instruct divD_mem(regD dst, memory src) %{ 2339 predicate((UseSSE>=2) && (UseAVX == 0)); 2340 match(Set dst (DivD dst (LoadD src))); 2341 2342 format %{ "divsd $dst, $src" %} 2343 ins_cost(150); 2344 ins_encode %{ 2345 __ divsd($dst$$XMMRegister, $src$$Address); 2346 %} 2347 ins_pipe(pipe_slow); 2348 %} 2349 2350 instruct divD_imm(regD dst, immD con) %{ 2351 predicate((UseSSE>=2) && (UseAVX == 0)); 2352 match(Set dst (DivD dst con)); 2353 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2354 ins_cost(150); 2355 ins_encode %{ 2356 __ divsd($dst$$XMMRegister, $constantaddress($con)); 2357 %} 2358 ins_pipe(pipe_slow); 2359 %} 2360 2361 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{ 2362 predicate(UseAVX > 0); 2363 match(Set dst (DivD src1 src2)); 2364 2365 format %{ "vdivsd $dst, $src1, $src2" %} 2366 ins_cost(150); 2367 ins_encode %{ 2368 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2369 %} 2370 ins_pipe(pipe_slow); 2371 %} 2372 2373 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{ 2374 predicate(UseAVX > 0); 2375 match(Set dst (DivD src1 (LoadD src2))); 2376 2377 format %{ "vdivsd $dst, $src1, $src2" %} 2378 ins_cost(150); 2379 ins_encode %{ 2380 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2381 %} 2382 ins_pipe(pipe_slow); 2383 %} 2384 2385 instruct divD_reg_imm(regD dst, regD src, immD con) %{ 2386 predicate(UseAVX > 0); 2387 match(Set dst (DivD src con)); 2388 2389 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2390 ins_cost(150); 2391 ins_encode %{ 2392 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2393 %} 2394 ins_pipe(pipe_slow); 2395 %} 2396 2397 instruct absF_reg(regF dst) %{ 2398 predicate((UseSSE>=1) && (UseAVX == 0)); 2399 match(Set dst (AbsF dst)); 2400 ins_cost(150); 2401 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %} 2402 ins_encode %{ 2403 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask())); 2404 %} 2405 ins_pipe(pipe_slow); 2406 %} 2407 2408 instruct absF_reg_reg(regF dst, regF src) %{ 2409 predicate(VM_Version::supports_avxonly()); 2410 match(Set dst (AbsF src)); 2411 ins_cost(150); 2412 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 2413 ins_encode %{ 2414 int vector_len = 0; 2415 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 2416 ExternalAddress(float_signmask()), vector_len); 2417 %} 2418 ins_pipe(pipe_slow); 2419 %} 2420 2421 #ifdef _LP64 2422 instruct absF_reg_reg_evex(regF dst, regF src) %{ 2423 predicate(UseAVX > 2 && VM_Version::supports_avx512vl()); 2424 match(Set dst (AbsF src)); 2425 ins_cost(150); 2426 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 2427 ins_encode %{ 2428 int vector_len = 0; 2429 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 2430 ExternalAddress(float_signmask()), vector_len); 2431 %} 2432 ins_pipe(pipe_slow); 2433 %} 2434 2435 instruct absF_reg_reg_evex_special(regF dst, regF src1, regF src2) %{ 2436 predicate(VM_Version::supports_avx512novl()); 2437 match(Set dst (AbsF src1)); 2438 effect(TEMP src2); 2439 ins_cost(150); 2440 format %{ "vabsss $dst, $src1, $src2, [0x7fffffff]\t# abs float by sign masking" %} 2441 ins_encode %{ 2442 int vector_len = 0; 2443 __ vabsss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 2444 ExternalAddress(float_signmask()), vector_len); 2445 %} 2446 ins_pipe(pipe_slow); 2447 %} 2448 #else // _LP64 2449 instruct absF_reg_reg_evex(regF dst, regF src) %{ 2450 predicate(UseAVX > 2); 2451 match(Set dst (AbsF src)); 2452 ins_cost(150); 2453 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 2454 ins_encode %{ 2455 int vector_len = 0; 2456 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 2457 ExternalAddress(float_signmask()), vector_len); 2458 %} 2459 ins_pipe(pipe_slow); 2460 %} 2461 #endif 2462 2463 instruct absD_reg(regD dst) %{ 2464 predicate((UseSSE>=2) && (UseAVX == 0)); 2465 match(Set dst (AbsD dst)); 2466 ins_cost(150); 2467 format %{ "andpd $dst, [0x7fffffffffffffff]\t" 2468 "# abs double by sign masking" %} 2469 ins_encode %{ 2470 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask())); 2471 %} 2472 ins_pipe(pipe_slow); 2473 %} 2474 2475 instruct absD_reg_reg(regD dst, regD src) %{ 2476 predicate(VM_Version::supports_avxonly()); 2477 match(Set dst (AbsD src)); 2478 ins_cost(150); 2479 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 2480 "# abs double by sign masking" %} 2481 ins_encode %{ 2482 int vector_len = 0; 2483 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 2484 ExternalAddress(double_signmask()), vector_len); 2485 %} 2486 ins_pipe(pipe_slow); 2487 %} 2488 2489 #ifdef _LP64 2490 instruct absD_reg_reg_evex(regD dst, regD src) %{ 2491 predicate(UseAVX > 2 && VM_Version::supports_avx512vl()); 2492 match(Set dst (AbsD src)); 2493 ins_cost(150); 2494 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 2495 "# abs double by sign masking" %} 2496 ins_encode %{ 2497 int vector_len = 0; 2498 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 2499 ExternalAddress(double_signmask()), vector_len); 2500 %} 2501 ins_pipe(pipe_slow); 2502 %} 2503 2504 instruct absD_reg_reg_evex_special(regD dst, regD src1, regD src2) %{ 2505 predicate(VM_Version::supports_avx512novl()); 2506 match(Set dst (AbsD src1)); 2507 effect(TEMP src2); 2508 ins_cost(150); 2509 format %{ "vabssd $dst, $src1, $src2, [0x7fffffffffffffff]\t# abs float by sign masking" %} 2510 ins_encode %{ 2511 int vector_len = 0; 2512 __ vabssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 2513 ExternalAddress(double_signmask()), vector_len); 2514 %} 2515 ins_pipe(pipe_slow); 2516 %} 2517 #else // _LP64 2518 instruct absD_reg_reg_evex(regD dst, regD src) %{ 2519 predicate(UseAVX > 2); 2520 match(Set dst (AbsD src)); 2521 ins_cost(150); 2522 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 2523 "# abs double by sign masking" %} 2524 ins_encode %{ 2525 int vector_len = 0; 2526 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 2527 ExternalAddress(double_signmask()), vector_len); 2528 %} 2529 ins_pipe(pipe_slow); 2530 %} 2531 #endif 2532 2533 instruct negF_reg(regF dst) %{ 2534 predicate((UseSSE>=1) && (UseAVX == 0)); 2535 match(Set dst (NegF dst)); 2536 ins_cost(150); 2537 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %} 2538 ins_encode %{ 2539 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip())); 2540 %} 2541 ins_pipe(pipe_slow); 2542 %} 2543 2544 instruct negF_reg_reg(regF dst, regF src) %{ 2545 predicate(UseAVX > 0); 2546 match(Set dst (NegF src)); 2547 ins_cost(150); 2548 format %{ "vnegatess $dst, $src, [0x80000000]\t# neg float by sign flipping" %} 2549 ins_encode %{ 2550 __ vnegatess($dst$$XMMRegister, $src$$XMMRegister, 2551 ExternalAddress(float_signflip())); 2552 %} 2553 ins_pipe(pipe_slow); 2554 %} 2555 2556 instruct negD_reg(regD dst) %{ 2557 predicate((UseSSE>=2) && (UseAVX == 0)); 2558 match(Set dst (NegD dst)); 2559 ins_cost(150); 2560 format %{ "xorpd $dst, [0x8000000000000000]\t" 2561 "# neg double by sign flipping" %} 2562 ins_encode %{ 2563 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip())); 2564 %} 2565 ins_pipe(pipe_slow); 2566 %} 2567 2568 instruct negD_reg_reg(regD dst, regD src) %{ 2569 predicate(UseAVX > 0); 2570 match(Set dst (NegD src)); 2571 ins_cost(150); 2572 format %{ "vnegatess $dst, $src, [0x8000000000000000]\t" 2573 "# neg double by sign flipping" %} 2574 ins_encode %{ 2575 __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister, 2576 ExternalAddress(double_signflip())); 2577 %} 2578 ins_pipe(pipe_slow); 2579 %} 2580 2581 instruct sqrtF_reg(regF dst, regF src) %{ 2582 predicate(UseSSE>=1); 2583 match(Set dst (ConvD2F (SqrtD (ConvF2D src)))); 2584 2585 format %{ "sqrtss $dst, $src" %} 2586 ins_cost(150); 2587 ins_encode %{ 2588 __ sqrtss($dst$$XMMRegister, $src$$XMMRegister); 2589 %} 2590 ins_pipe(pipe_slow); 2591 %} 2592 2593 instruct sqrtF_mem(regF dst, memory src) %{ 2594 predicate(UseSSE>=1); 2595 match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF src))))); 2596 2597 format %{ "sqrtss $dst, $src" %} 2598 ins_cost(150); 2599 ins_encode %{ 2600 __ sqrtss($dst$$XMMRegister, $src$$Address); 2601 %} 2602 ins_pipe(pipe_slow); 2603 %} 2604 2605 instruct sqrtF_imm(regF dst, immF con) %{ 2606 predicate(UseSSE>=1); 2607 match(Set dst (ConvD2F (SqrtD (ConvF2D con)))); 2608 format %{ "sqrtss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2609 ins_cost(150); 2610 ins_encode %{ 2611 __ sqrtss($dst$$XMMRegister, $constantaddress($con)); 2612 %} 2613 ins_pipe(pipe_slow); 2614 %} 2615 2616 instruct sqrtD_reg(regD dst, regD src) %{ 2617 predicate(UseSSE>=2); 2618 match(Set dst (SqrtD src)); 2619 2620 format %{ "sqrtsd $dst, $src" %} 2621 ins_cost(150); 2622 ins_encode %{ 2623 __ sqrtsd($dst$$XMMRegister, $src$$XMMRegister); 2624 %} 2625 ins_pipe(pipe_slow); 2626 %} 2627 2628 instruct sqrtD_mem(regD dst, memory src) %{ 2629 predicate(UseSSE>=2); 2630 match(Set dst (SqrtD (LoadD src))); 2631 2632 format %{ "sqrtsd $dst, $src" %} 2633 ins_cost(150); 2634 ins_encode %{ 2635 __ sqrtsd($dst$$XMMRegister, $src$$Address); 2636 %} 2637 ins_pipe(pipe_slow); 2638 %} 2639 2640 instruct sqrtD_imm(regD dst, immD con) %{ 2641 predicate(UseSSE>=2); 2642 match(Set dst (SqrtD con)); 2643 format %{ "sqrtsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2644 ins_cost(150); 2645 ins_encode %{ 2646 __ sqrtsd($dst$$XMMRegister, $constantaddress($con)); 2647 %} 2648 ins_pipe(pipe_slow); 2649 %} 2650 2651 instruct onspinwait() %{ 2652 match(OnSpinWait); 2653 ins_cost(200); 2654 2655 format %{ 2656 $$template 2657 if (os::is_MP()) { 2658 $$emit$$"pause\t! membar_onspinwait" 2659 } else { 2660 $$emit$$"MEMBAR-onspinwait ! (empty encoding)" 2661 } 2662 %} 2663 ins_encode %{ 2664 __ pause(); 2665 %} 2666 ins_pipe(pipe_slow); 2667 %} 2668 2669 // ====================VECTOR INSTRUCTIONS===================================== 2670 2671 // Load vectors (4 bytes long) 2672 instruct loadV4(vecS dst, memory mem) %{ 2673 predicate(n->as_LoadVector()->memory_size() == 4); 2674 match(Set dst (LoadVector mem)); 2675 ins_cost(125); 2676 format %{ "movd $dst,$mem\t! load vector (4 bytes)" %} 2677 ins_encode %{ 2678 __ movdl($dst$$XMMRegister, $mem$$Address); 2679 %} 2680 ins_pipe( pipe_slow ); 2681 %} 2682 2683 // Load vectors (8 bytes long) 2684 instruct loadV8(vecD dst, memory mem) %{ 2685 predicate(n->as_LoadVector()->memory_size() == 8); 2686 match(Set dst (LoadVector mem)); 2687 ins_cost(125); 2688 format %{ "movq $dst,$mem\t! load vector (8 bytes)" %} 2689 ins_encode %{ 2690 __ movq($dst$$XMMRegister, $mem$$Address); 2691 %} 2692 ins_pipe( pipe_slow ); 2693 %} 2694 2695 // Load vectors (16 bytes long) 2696 instruct loadV16(vecX dst, memory mem) %{ 2697 predicate(n->as_LoadVector()->memory_size() == 16); 2698 match(Set dst (LoadVector mem)); 2699 ins_cost(125); 2700 format %{ "movdqu $dst,$mem\t! load vector (16 bytes)" %} 2701 ins_encode %{ 2702 __ movdqu($dst$$XMMRegister, $mem$$Address); 2703 %} 2704 ins_pipe( pipe_slow ); 2705 %} 2706 2707 // Load vectors (32 bytes long) 2708 instruct loadV32(vecY dst, memory mem) %{ 2709 predicate(n->as_LoadVector()->memory_size() == 32); 2710 match(Set dst (LoadVector mem)); 2711 ins_cost(125); 2712 format %{ "vmovdqu $dst,$mem\t! load vector (32 bytes)" %} 2713 ins_encode %{ 2714 __ vmovdqu($dst$$XMMRegister, $mem$$Address); 2715 %} 2716 ins_pipe( pipe_slow ); 2717 %} 2718 2719 // Load vectors (64 bytes long) 2720 instruct loadV64_dword(vecZ dst, memory mem) %{ 2721 predicate(n->as_LoadVector()->memory_size() == 64 && n->as_LoadVector()->element_size() <= 4); 2722 match(Set dst (LoadVector mem)); 2723 ins_cost(125); 2724 format %{ "vmovdqul $dst k0,$mem\t! load vector (64 bytes)" %} 2725 ins_encode %{ 2726 int vector_len = 2; 2727 __ evmovdqul($dst$$XMMRegister, $mem$$Address, vector_len); 2728 %} 2729 ins_pipe( pipe_slow ); 2730 %} 2731 2732 // Load vectors (64 bytes long) 2733 instruct loadV64_qword(vecZ dst, memory mem) %{ 2734 predicate(n->as_LoadVector()->memory_size() == 64 && n->as_LoadVector()->element_size() > 4); 2735 match(Set dst (LoadVector mem)); 2736 ins_cost(125); 2737 format %{ "vmovdquq $dst k0,$mem\t! load vector (64 bytes)" %} 2738 ins_encode %{ 2739 int vector_len = 2; 2740 __ evmovdquq($dst$$XMMRegister, $mem$$Address, vector_len); 2741 %} 2742 ins_pipe( pipe_slow ); 2743 %} 2744 2745 // Store vectors 2746 instruct storeV4(memory mem, vecS src) %{ 2747 predicate(n->as_StoreVector()->memory_size() == 4); 2748 match(Set mem (StoreVector mem src)); 2749 ins_cost(145); 2750 format %{ "movd $mem,$src\t! store vector (4 bytes)" %} 2751 ins_encode %{ 2752 __ movdl($mem$$Address, $src$$XMMRegister); 2753 %} 2754 ins_pipe( pipe_slow ); 2755 %} 2756 2757 instruct storeV8(memory mem, vecD src) %{ 2758 predicate(n->as_StoreVector()->memory_size() == 8); 2759 match(Set mem (StoreVector mem src)); 2760 ins_cost(145); 2761 format %{ "movq $mem,$src\t! store vector (8 bytes)" %} 2762 ins_encode %{ 2763 __ movq($mem$$Address, $src$$XMMRegister); 2764 %} 2765 ins_pipe( pipe_slow ); 2766 %} 2767 2768 instruct storeV16(memory mem, vecX src) %{ 2769 predicate(n->as_StoreVector()->memory_size() == 16); 2770 match(Set mem (StoreVector mem src)); 2771 ins_cost(145); 2772 format %{ "movdqu $mem,$src\t! store vector (16 bytes)" %} 2773 ins_encode %{ 2774 __ movdqu($mem$$Address, $src$$XMMRegister); 2775 %} 2776 ins_pipe( pipe_slow ); 2777 %} 2778 2779 instruct storeV32(memory mem, vecY src) %{ 2780 predicate(n->as_StoreVector()->memory_size() == 32); 2781 match(Set mem (StoreVector mem src)); 2782 ins_cost(145); 2783 format %{ "vmovdqu $mem,$src\t! store vector (32 bytes)" %} 2784 ins_encode %{ 2785 __ vmovdqu($mem$$Address, $src$$XMMRegister); 2786 %} 2787 ins_pipe( pipe_slow ); 2788 %} 2789 2790 instruct storeV64_dword(memory mem, vecZ src) %{ 2791 predicate(n->as_StoreVector()->memory_size() == 64 && n->as_StoreVector()->element_size() <= 4); 2792 match(Set mem (StoreVector mem src)); 2793 ins_cost(145); 2794 format %{ "vmovdqul $mem k0,$src\t! store vector (64 bytes)" %} 2795 ins_encode %{ 2796 int vector_len = 2; 2797 __ evmovdqul($mem$$Address, $src$$XMMRegister, vector_len); 2798 %} 2799 ins_pipe( pipe_slow ); 2800 %} 2801 2802 instruct storeV64_qword(memory mem, vecZ src) %{ 2803 predicate(n->as_StoreVector()->memory_size() == 64 && n->as_StoreVector()->element_size() > 4); 2804 match(Set mem (StoreVector mem src)); 2805 ins_cost(145); 2806 format %{ "vmovdquq $mem k0,$src\t! store vector (64 bytes)" %} 2807 ins_encode %{ 2808 int vector_len = 2; 2809 __ evmovdquq($mem$$Address, $src$$XMMRegister, vector_len); 2810 %} 2811 ins_pipe( pipe_slow ); 2812 %} 2813 2814 // ====================LEGACY REPLICATE======================================= 2815 2816 instruct Repl4B_mem(vecS dst, memory mem) %{ 2817 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 2818 match(Set dst (ReplicateB (LoadB mem))); 2819 format %{ "punpcklbw $dst,$mem\n\t" 2820 "pshuflw $dst,$dst,0x00\t! replicate4B" %} 2821 ins_encode %{ 2822 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 2823 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2824 %} 2825 ins_pipe( pipe_slow ); 2826 %} 2827 2828 instruct Repl8B_mem(vecD dst, memory mem) %{ 2829 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 2830 match(Set dst (ReplicateB (LoadB mem))); 2831 format %{ "punpcklbw $dst,$mem\n\t" 2832 "pshuflw $dst,$dst,0x00\t! replicate8B" %} 2833 ins_encode %{ 2834 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 2835 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2836 %} 2837 ins_pipe( pipe_slow ); 2838 %} 2839 2840 instruct Repl16B(vecX dst, rRegI src) %{ 2841 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 2842 match(Set dst (ReplicateB src)); 2843 format %{ "movd $dst,$src\n\t" 2844 "punpcklbw $dst,$dst\n\t" 2845 "pshuflw $dst,$dst,0x00\n\t" 2846 "punpcklqdq $dst,$dst\t! replicate16B" %} 2847 ins_encode %{ 2848 __ movdl($dst$$XMMRegister, $src$$Register); 2849 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 2850 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2851 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2852 %} 2853 ins_pipe( pipe_slow ); 2854 %} 2855 2856 instruct Repl16B_mem(vecX dst, memory mem) %{ 2857 predicate(n->as_Vector()->length() == 16 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 2858 match(Set dst (ReplicateB (LoadB mem))); 2859 format %{ "punpcklbw $dst,$mem\n\t" 2860 "pshuflw $dst,$dst,0x00\n\t" 2861 "punpcklqdq $dst,$dst\t! replicate16B" %} 2862 ins_encode %{ 2863 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 2864 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2865 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2866 %} 2867 ins_pipe( pipe_slow ); 2868 %} 2869 2870 instruct Repl32B(vecY dst, rRegI src) %{ 2871 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 2872 match(Set dst (ReplicateB src)); 2873 format %{ "movd $dst,$src\n\t" 2874 "punpcklbw $dst,$dst\n\t" 2875 "pshuflw $dst,$dst,0x00\n\t" 2876 "punpcklqdq $dst,$dst\n\t" 2877 "vinserti128_high $dst,$dst\t! replicate32B" %} 2878 ins_encode %{ 2879 __ movdl($dst$$XMMRegister, $src$$Register); 2880 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 2881 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2882 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2883 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 2884 %} 2885 ins_pipe( pipe_slow ); 2886 %} 2887 2888 instruct Repl32B_mem(vecY dst, memory mem) %{ 2889 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 2890 match(Set dst (ReplicateB (LoadB mem))); 2891 format %{ "punpcklbw $dst,$mem\n\t" 2892 "pshuflw $dst,$dst,0x00\n\t" 2893 "punpcklqdq $dst,$dst\n\t" 2894 "vinserti128_high $dst,$dst\t! replicate32B" %} 2895 ins_encode %{ 2896 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 2897 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2898 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2899 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 2900 %} 2901 ins_pipe( pipe_slow ); 2902 %} 2903 2904 instruct Repl16B_imm(vecX dst, immI con) %{ 2905 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 2906 match(Set dst (ReplicateB con)); 2907 format %{ "movq $dst,[$constantaddress]\n\t" 2908 "punpcklqdq $dst,$dst\t! replicate16B($con)" %} 2909 ins_encode %{ 2910 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 2911 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2912 %} 2913 ins_pipe( pipe_slow ); 2914 %} 2915 2916 instruct Repl32B_imm(vecY dst, immI con) %{ 2917 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 2918 match(Set dst (ReplicateB con)); 2919 format %{ "movq $dst,[$constantaddress]\n\t" 2920 "punpcklqdq $dst,$dst\n\t" 2921 "vinserti128_high $dst,$dst\t! lreplicate32B($con)" %} 2922 ins_encode %{ 2923 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 2924 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2925 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 2926 %} 2927 ins_pipe( pipe_slow ); 2928 %} 2929 2930 instruct Repl4S(vecD dst, rRegI src) %{ 2931 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vlbw()); 2932 match(Set dst (ReplicateS src)); 2933 format %{ "movd $dst,$src\n\t" 2934 "pshuflw $dst,$dst,0x00\t! replicate4S" %} 2935 ins_encode %{ 2936 __ movdl($dst$$XMMRegister, $src$$Register); 2937 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2938 %} 2939 ins_pipe( pipe_slow ); 2940 %} 2941 2942 instruct Repl4S_mem(vecD dst, memory mem) %{ 2943 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 2944 match(Set dst (ReplicateS (LoadS mem))); 2945 format %{ "pshuflw $dst,$mem,0x00\t! replicate4S" %} 2946 ins_encode %{ 2947 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 2948 %} 2949 ins_pipe( pipe_slow ); 2950 %} 2951 2952 instruct Repl8S(vecX dst, rRegI src) %{ 2953 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vlbw()); 2954 match(Set dst (ReplicateS src)); 2955 format %{ "movd $dst,$src\n\t" 2956 "pshuflw $dst,$dst,0x00\n\t" 2957 "punpcklqdq $dst,$dst\t! replicate8S" %} 2958 ins_encode %{ 2959 __ movdl($dst$$XMMRegister, $src$$Register); 2960 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2961 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2962 %} 2963 ins_pipe( pipe_slow ); 2964 %} 2965 2966 instruct Repl8S_mem(vecX dst, memory mem) %{ 2967 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 2968 match(Set dst (ReplicateS (LoadS mem))); 2969 format %{ "pshuflw $dst,$mem,0x00\n\t" 2970 "punpcklqdq $dst,$dst\t! replicate8S" %} 2971 ins_encode %{ 2972 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 2973 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2974 %} 2975 ins_pipe( pipe_slow ); 2976 %} 2977 2978 instruct Repl8S_imm(vecX dst, immI con) %{ 2979 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vlbw()); 2980 match(Set dst (ReplicateS con)); 2981 format %{ "movq $dst,[$constantaddress]\n\t" 2982 "punpcklqdq $dst,$dst\t! replicate8S($con)" %} 2983 ins_encode %{ 2984 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 2985 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2986 %} 2987 ins_pipe( pipe_slow ); 2988 %} 2989 2990 instruct Repl16S(vecY dst, rRegI src) %{ 2991 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 2992 match(Set dst (ReplicateS src)); 2993 format %{ "movd $dst,$src\n\t" 2994 "pshuflw $dst,$dst,0x00\n\t" 2995 "punpcklqdq $dst,$dst\n\t" 2996 "vinserti128_high $dst,$dst\t! replicate16S" %} 2997 ins_encode %{ 2998 __ movdl($dst$$XMMRegister, $src$$Register); 2999 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3000 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3001 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3002 %} 3003 ins_pipe( pipe_slow ); 3004 %} 3005 3006 instruct Repl16S_mem(vecY dst, memory mem) %{ 3007 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3008 match(Set dst (ReplicateS (LoadS mem))); 3009 format %{ "pshuflw $dst,$mem,0x00\n\t" 3010 "punpcklqdq $dst,$dst\n\t" 3011 "vinserti128_high $dst,$dst\t! replicate16S" %} 3012 ins_encode %{ 3013 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3014 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3015 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3016 %} 3017 ins_pipe( pipe_slow ); 3018 %} 3019 3020 instruct Repl16S_imm(vecY dst, immI con) %{ 3021 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3022 match(Set dst (ReplicateS con)); 3023 format %{ "movq $dst,[$constantaddress]\n\t" 3024 "punpcklqdq $dst,$dst\n\t" 3025 "vinserti128_high $dst,$dst\t! replicate16S($con)" %} 3026 ins_encode %{ 3027 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3028 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3029 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3030 %} 3031 ins_pipe( pipe_slow ); 3032 %} 3033 3034 instruct Repl4I(vecX dst, rRegI src) %{ 3035 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3036 match(Set dst (ReplicateI src)); 3037 format %{ "movd $dst,$src\n\t" 3038 "pshufd $dst,$dst,0x00\t! replicate4I" %} 3039 ins_encode %{ 3040 __ movdl($dst$$XMMRegister, $src$$Register); 3041 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3042 %} 3043 ins_pipe( pipe_slow ); 3044 %} 3045 3046 instruct Repl4I_mem(vecX dst, memory mem) %{ 3047 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3048 match(Set dst (ReplicateI (LoadI mem))); 3049 format %{ "pshufd $dst,$mem,0x00\t! replicate4I" %} 3050 ins_encode %{ 3051 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3052 %} 3053 ins_pipe( pipe_slow ); 3054 %} 3055 3056 instruct Repl8I(vecY dst, rRegI src) %{ 3057 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3058 match(Set dst (ReplicateI src)); 3059 format %{ "movd $dst,$src\n\t" 3060 "pshufd $dst,$dst,0x00\n\t" 3061 "vinserti128_high $dst,$dst\t! replicate8I" %} 3062 ins_encode %{ 3063 __ movdl($dst$$XMMRegister, $src$$Register); 3064 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3065 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3066 %} 3067 ins_pipe( pipe_slow ); 3068 %} 3069 3070 instruct Repl8I_mem(vecY dst, memory mem) %{ 3071 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3072 match(Set dst (ReplicateI (LoadI mem))); 3073 format %{ "pshufd $dst,$mem,0x00\n\t" 3074 "vinserti128_high $dst,$dst\t! replicate8I" %} 3075 ins_encode %{ 3076 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3077 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3078 %} 3079 ins_pipe( pipe_slow ); 3080 %} 3081 3082 instruct Repl4I_imm(vecX dst, immI con) %{ 3083 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3084 match(Set dst (ReplicateI con)); 3085 format %{ "movq $dst,[$constantaddress]\t! replicate4I($con)\n\t" 3086 "punpcklqdq $dst,$dst" %} 3087 ins_encode %{ 3088 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3089 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3090 %} 3091 ins_pipe( pipe_slow ); 3092 %} 3093 3094 instruct Repl8I_imm(vecY dst, immI con) %{ 3095 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3096 match(Set dst (ReplicateI con)); 3097 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 3098 "punpcklqdq $dst,$dst\n\t" 3099 "vinserti128_high $dst,$dst" %} 3100 ins_encode %{ 3101 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3102 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3103 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3104 %} 3105 ins_pipe( pipe_slow ); 3106 %} 3107 3108 // Long could be loaded into xmm register directly from memory. 3109 instruct Repl2L_mem(vecX dst, memory mem) %{ 3110 predicate(n->as_Vector()->length() == 2 && !VM_Version::supports_avx512vlbw()); 3111 match(Set dst (ReplicateL (LoadL mem))); 3112 format %{ "movq $dst,$mem\n\t" 3113 "punpcklqdq $dst,$dst\t! replicate2L" %} 3114 ins_encode %{ 3115 __ movq($dst$$XMMRegister, $mem$$Address); 3116 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3117 %} 3118 ins_pipe( pipe_slow ); 3119 %} 3120 3121 // Replicate long (8 byte) scalar to be vector 3122 #ifdef _LP64 3123 instruct Repl4L(vecY dst, rRegL src) %{ 3124 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3125 match(Set dst (ReplicateL src)); 3126 format %{ "movdq $dst,$src\n\t" 3127 "punpcklqdq $dst,$dst\n\t" 3128 "vinserti128_high $dst,$dst\t! replicate4L" %} 3129 ins_encode %{ 3130 __ movdq($dst$$XMMRegister, $src$$Register); 3131 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3132 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3133 %} 3134 ins_pipe( pipe_slow ); 3135 %} 3136 #else // _LP64 3137 instruct Repl4L(vecY dst, eRegL src, regD tmp) %{ 3138 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3139 match(Set dst (ReplicateL src)); 3140 effect(TEMP dst, USE src, TEMP tmp); 3141 format %{ "movdl $dst,$src.lo\n\t" 3142 "movdl $tmp,$src.hi\n\t" 3143 "punpckldq $dst,$tmp\n\t" 3144 "punpcklqdq $dst,$dst\n\t" 3145 "vinserti128_high $dst,$dst\t! replicate4L" %} 3146 ins_encode %{ 3147 __ movdl($dst$$XMMRegister, $src$$Register); 3148 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3149 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3150 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3151 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3152 %} 3153 ins_pipe( pipe_slow ); 3154 %} 3155 #endif // _LP64 3156 3157 instruct Repl4L_imm(vecY dst, immL con) %{ 3158 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3159 match(Set dst (ReplicateL con)); 3160 format %{ "movq $dst,[$constantaddress]\n\t" 3161 "punpcklqdq $dst,$dst\n\t" 3162 "vinserti128_high $dst,$dst\t! replicate4L($con)" %} 3163 ins_encode %{ 3164 __ movq($dst$$XMMRegister, $constantaddress($con)); 3165 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3166 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3167 %} 3168 ins_pipe( pipe_slow ); 3169 %} 3170 3171 instruct Repl4L_mem(vecY dst, memory mem) %{ 3172 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3173 match(Set dst (ReplicateL (LoadL mem))); 3174 format %{ "movq $dst,$mem\n\t" 3175 "punpcklqdq $dst,$dst\n\t" 3176 "vinserti128_high $dst,$dst\t! replicate4L" %} 3177 ins_encode %{ 3178 __ movq($dst$$XMMRegister, $mem$$Address); 3179 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3180 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3181 %} 3182 ins_pipe( pipe_slow ); 3183 %} 3184 3185 instruct Repl2F_mem(vecD dst, memory mem) %{ 3186 predicate(n->as_Vector()->length() == 2 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3187 match(Set dst (ReplicateF (LoadF mem))); 3188 format %{ "pshufd $dst,$mem,0x00\t! replicate2F" %} 3189 ins_encode %{ 3190 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3191 %} 3192 ins_pipe( pipe_slow ); 3193 %} 3194 3195 instruct Repl4F_mem(vecX dst, memory mem) %{ 3196 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3197 match(Set dst (ReplicateF (LoadF mem))); 3198 format %{ "pshufd $dst,$mem,0x00\t! replicate4F" %} 3199 ins_encode %{ 3200 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3201 %} 3202 ins_pipe( pipe_slow ); 3203 %} 3204 3205 instruct Repl8F(vecY dst, regF src) %{ 3206 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3207 match(Set dst (ReplicateF src)); 3208 format %{ "pshufd $dst,$src,0x00\n\t" 3209 "vinsertf128_high $dst,$dst\t! replicate8F" %} 3210 ins_encode %{ 3211 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3212 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3213 %} 3214 ins_pipe( pipe_slow ); 3215 %} 3216 3217 instruct Repl8F_mem(vecY dst, memory mem) %{ 3218 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3219 match(Set dst (ReplicateF (LoadF mem))); 3220 format %{ "pshufd $dst,$mem,0x00\n\t" 3221 "vinsertf128_high $dst,$dst\t! replicate8F" %} 3222 ins_encode %{ 3223 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3224 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3225 %} 3226 ins_pipe( pipe_slow ); 3227 %} 3228 3229 instruct Repl2F_zero(vecD dst, immF0 zero) %{ 3230 predicate(n->as_Vector()->length() == 2 && UseAVX < 3); 3231 match(Set dst (ReplicateF zero)); 3232 format %{ "xorps $dst,$dst\t! replicate2F zero" %} 3233 ins_encode %{ 3234 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 3235 %} 3236 ins_pipe( fpu_reg_reg ); 3237 %} 3238 3239 instruct Repl4F_zero(vecX dst, immF0 zero) %{ 3240 predicate(n->as_Vector()->length() == 4 && UseAVX < 3); 3241 match(Set dst (ReplicateF zero)); 3242 format %{ "xorps $dst,$dst\t! replicate4F zero" %} 3243 ins_encode %{ 3244 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 3245 %} 3246 ins_pipe( fpu_reg_reg ); 3247 %} 3248 3249 instruct Repl8F_zero(vecY dst, immF0 zero) %{ 3250 predicate(n->as_Vector()->length() == 8 && UseAVX < 3); 3251 match(Set dst (ReplicateF zero)); 3252 format %{ "vxorps $dst,$dst,$dst\t! replicate8F zero" %} 3253 ins_encode %{ 3254 int vector_len = 1; 3255 __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3256 %} 3257 ins_pipe( fpu_reg_reg ); 3258 %} 3259 3260 instruct Repl2D_mem(vecX dst, memory mem) %{ 3261 predicate(n->as_Vector()->length() == 2 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3262 match(Set dst (ReplicateD (LoadD mem))); 3263 format %{ "pshufd $dst,$mem,0x44\t! replicate2D" %} 3264 ins_encode %{ 3265 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44); 3266 %} 3267 ins_pipe( pipe_slow ); 3268 %} 3269 3270 instruct Repl4D(vecY dst, regD src) %{ 3271 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3272 match(Set dst (ReplicateD src)); 3273 format %{ "pshufd $dst,$src,0x44\n\t" 3274 "vinsertf128_high $dst,$dst\t! replicate4D" %} 3275 ins_encode %{ 3276 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 3277 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3278 %} 3279 ins_pipe( pipe_slow ); 3280 %} 3281 3282 instruct Repl4D_mem(vecY dst, memory mem) %{ 3283 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3284 match(Set dst (ReplicateD (LoadD mem))); 3285 format %{ "pshufd $dst,$mem,0x44\n\t" 3286 "vinsertf128_high $dst,$dst\t! replicate4D" %} 3287 ins_encode %{ 3288 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44); 3289 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3290 %} 3291 ins_pipe( pipe_slow ); 3292 %} 3293 3294 // Replicate double (8 byte) scalar zero to be vector 3295 instruct Repl2D_zero(vecX dst, immD0 zero) %{ 3296 predicate(n->as_Vector()->length() == 2 && UseAVX < 3); 3297 match(Set dst (ReplicateD zero)); 3298 format %{ "xorpd $dst,$dst\t! replicate2D zero" %} 3299 ins_encode %{ 3300 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister); 3301 %} 3302 ins_pipe( fpu_reg_reg ); 3303 %} 3304 3305 instruct Repl4D_zero(vecY dst, immD0 zero) %{ 3306 predicate(n->as_Vector()->length() == 4 && UseAVX < 3); 3307 match(Set dst (ReplicateD zero)); 3308 format %{ "vxorpd $dst,$dst,$dst,vect256\t! replicate4D zero" %} 3309 ins_encode %{ 3310 int vector_len = 1; 3311 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3312 %} 3313 ins_pipe( fpu_reg_reg ); 3314 %} 3315 3316 // ====================GENERIC REPLICATE========================================== 3317 3318 // Replicate byte scalar to be vector 3319 instruct Repl4B(vecS dst, rRegI src) %{ 3320 predicate(n->as_Vector()->length() == 4); 3321 match(Set dst (ReplicateB src)); 3322 format %{ "movd $dst,$src\n\t" 3323 "punpcklbw $dst,$dst\n\t" 3324 "pshuflw $dst,$dst,0x00\t! replicate4B" %} 3325 ins_encode %{ 3326 __ movdl($dst$$XMMRegister, $src$$Register); 3327 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3328 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3329 %} 3330 ins_pipe( pipe_slow ); 3331 %} 3332 3333 instruct Repl8B(vecD dst, rRegI src) %{ 3334 predicate(n->as_Vector()->length() == 8); 3335 match(Set dst (ReplicateB src)); 3336 format %{ "movd $dst,$src\n\t" 3337 "punpcklbw $dst,$dst\n\t" 3338 "pshuflw $dst,$dst,0x00\t! replicate8B" %} 3339 ins_encode %{ 3340 __ movdl($dst$$XMMRegister, $src$$Register); 3341 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3342 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3343 %} 3344 ins_pipe( pipe_slow ); 3345 %} 3346 3347 // Replicate byte scalar immediate to be vector by loading from const table. 3348 instruct Repl4B_imm(vecS dst, immI con) %{ 3349 predicate(n->as_Vector()->length() == 4); 3350 match(Set dst (ReplicateB con)); 3351 format %{ "movdl $dst,[$constantaddress]\t! replicate4B($con)" %} 3352 ins_encode %{ 3353 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 1))); 3354 %} 3355 ins_pipe( pipe_slow ); 3356 %} 3357 3358 instruct Repl8B_imm(vecD dst, immI con) %{ 3359 predicate(n->as_Vector()->length() == 8); 3360 match(Set dst (ReplicateB con)); 3361 format %{ "movq $dst,[$constantaddress]\t! replicate8B($con)" %} 3362 ins_encode %{ 3363 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3364 %} 3365 ins_pipe( pipe_slow ); 3366 %} 3367 3368 // Replicate byte scalar zero to be vector 3369 instruct Repl4B_zero(vecS dst, immI0 zero) %{ 3370 predicate(n->as_Vector()->length() == 4); 3371 match(Set dst (ReplicateB zero)); 3372 format %{ "pxor $dst,$dst\t! replicate4B zero" %} 3373 ins_encode %{ 3374 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3375 %} 3376 ins_pipe( fpu_reg_reg ); 3377 %} 3378 3379 instruct Repl8B_zero(vecD dst, immI0 zero) %{ 3380 predicate(n->as_Vector()->length() == 8); 3381 match(Set dst (ReplicateB zero)); 3382 format %{ "pxor $dst,$dst\t! replicate8B zero" %} 3383 ins_encode %{ 3384 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3385 %} 3386 ins_pipe( fpu_reg_reg ); 3387 %} 3388 3389 instruct Repl16B_zero(vecX dst, immI0 zero) %{ 3390 predicate(n->as_Vector()->length() == 16); 3391 match(Set dst (ReplicateB zero)); 3392 format %{ "pxor $dst,$dst\t! replicate16B zero" %} 3393 ins_encode %{ 3394 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3395 %} 3396 ins_pipe( fpu_reg_reg ); 3397 %} 3398 3399 instruct Repl32B_zero(vecY dst, immI0 zero) %{ 3400 predicate(n->as_Vector()->length() == 32); 3401 match(Set dst (ReplicateB zero)); 3402 format %{ "vpxor $dst,$dst,$dst\t! replicate32B zero" %} 3403 ins_encode %{ 3404 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3405 int vector_len = 1; 3406 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3407 %} 3408 ins_pipe( fpu_reg_reg ); 3409 %} 3410 3411 // Replicate char/short (2 byte) scalar to be vector 3412 instruct Repl2S(vecS dst, rRegI src) %{ 3413 predicate(n->as_Vector()->length() == 2); 3414 match(Set dst (ReplicateS src)); 3415 format %{ "movd $dst,$src\n\t" 3416 "pshuflw $dst,$dst,0x00\t! replicate2S" %} 3417 ins_encode %{ 3418 __ movdl($dst$$XMMRegister, $src$$Register); 3419 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3420 %} 3421 ins_pipe( fpu_reg_reg ); 3422 %} 3423 3424 // Replicate char/short (2 byte) scalar immediate to be vector by loading from const table. 3425 instruct Repl2S_imm(vecS dst, immI con) %{ 3426 predicate(n->as_Vector()->length() == 2); 3427 match(Set dst (ReplicateS con)); 3428 format %{ "movdl $dst,[$constantaddress]\t! replicate2S($con)" %} 3429 ins_encode %{ 3430 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 2))); 3431 %} 3432 ins_pipe( fpu_reg_reg ); 3433 %} 3434 3435 instruct Repl4S_imm(vecD dst, immI con) %{ 3436 predicate(n->as_Vector()->length() == 4); 3437 match(Set dst (ReplicateS con)); 3438 format %{ "movq $dst,[$constantaddress]\t! replicate4S($con)" %} 3439 ins_encode %{ 3440 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3441 %} 3442 ins_pipe( fpu_reg_reg ); 3443 %} 3444 3445 // Replicate char/short (2 byte) scalar zero to be vector 3446 instruct Repl2S_zero(vecS dst, immI0 zero) %{ 3447 predicate(n->as_Vector()->length() == 2); 3448 match(Set dst (ReplicateS zero)); 3449 format %{ "pxor $dst,$dst\t! replicate2S zero" %} 3450 ins_encode %{ 3451 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3452 %} 3453 ins_pipe( fpu_reg_reg ); 3454 %} 3455 3456 instruct Repl4S_zero(vecD dst, immI0 zero) %{ 3457 predicate(n->as_Vector()->length() == 4); 3458 match(Set dst (ReplicateS zero)); 3459 format %{ "pxor $dst,$dst\t! replicate4S zero" %} 3460 ins_encode %{ 3461 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3462 %} 3463 ins_pipe( fpu_reg_reg ); 3464 %} 3465 3466 instruct Repl8S_zero(vecX dst, immI0 zero) %{ 3467 predicate(n->as_Vector()->length() == 8); 3468 match(Set dst (ReplicateS zero)); 3469 format %{ "pxor $dst,$dst\t! replicate8S zero" %} 3470 ins_encode %{ 3471 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3472 %} 3473 ins_pipe( fpu_reg_reg ); 3474 %} 3475 3476 instruct Repl16S_zero(vecY dst, immI0 zero) %{ 3477 predicate(n->as_Vector()->length() == 16); 3478 match(Set dst (ReplicateS zero)); 3479 format %{ "vpxor $dst,$dst,$dst\t! replicate16S zero" %} 3480 ins_encode %{ 3481 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3482 int vector_len = 1; 3483 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3484 %} 3485 ins_pipe( fpu_reg_reg ); 3486 %} 3487 3488 // Replicate integer (4 byte) scalar to be vector 3489 instruct Repl2I(vecD dst, rRegI src) %{ 3490 predicate(n->as_Vector()->length() == 2); 3491 match(Set dst (ReplicateI src)); 3492 format %{ "movd $dst,$src\n\t" 3493 "pshufd $dst,$dst,0x00\t! replicate2I" %} 3494 ins_encode %{ 3495 __ movdl($dst$$XMMRegister, $src$$Register); 3496 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3497 %} 3498 ins_pipe( fpu_reg_reg ); 3499 %} 3500 3501 // Integer could be loaded into xmm register directly from memory. 3502 instruct Repl2I_mem(vecD dst, memory mem) %{ 3503 predicate(n->as_Vector()->length() == 2); 3504 match(Set dst (ReplicateI (LoadI mem))); 3505 format %{ "movd $dst,$mem\n\t" 3506 "pshufd $dst,$dst,0x00\t! replicate2I" %} 3507 ins_encode %{ 3508 __ movdl($dst$$XMMRegister, $mem$$Address); 3509 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3510 %} 3511 ins_pipe( fpu_reg_reg ); 3512 %} 3513 3514 // Replicate integer (4 byte) scalar immediate to be vector by loading from const table. 3515 instruct Repl2I_imm(vecD dst, immI con) %{ 3516 predicate(n->as_Vector()->length() == 2); 3517 match(Set dst (ReplicateI con)); 3518 format %{ "movq $dst,[$constantaddress]\t! replicate2I($con)" %} 3519 ins_encode %{ 3520 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3521 %} 3522 ins_pipe( fpu_reg_reg ); 3523 %} 3524 3525 // Replicate integer (4 byte) scalar zero to be vector 3526 instruct Repl2I_zero(vecD dst, immI0 zero) %{ 3527 predicate(n->as_Vector()->length() == 2); 3528 match(Set dst (ReplicateI zero)); 3529 format %{ "pxor $dst,$dst\t! replicate2I" %} 3530 ins_encode %{ 3531 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3532 %} 3533 ins_pipe( fpu_reg_reg ); 3534 %} 3535 3536 instruct Repl4I_zero(vecX dst, immI0 zero) %{ 3537 predicate(n->as_Vector()->length() == 4); 3538 match(Set dst (ReplicateI zero)); 3539 format %{ "pxor $dst,$dst\t! replicate4I zero)" %} 3540 ins_encode %{ 3541 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3542 %} 3543 ins_pipe( fpu_reg_reg ); 3544 %} 3545 3546 instruct Repl8I_zero(vecY dst, immI0 zero) %{ 3547 predicate(n->as_Vector()->length() == 8); 3548 match(Set dst (ReplicateI zero)); 3549 format %{ "vpxor $dst,$dst,$dst\t! replicate8I zero" %} 3550 ins_encode %{ 3551 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3552 int vector_len = 1; 3553 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3554 %} 3555 ins_pipe( fpu_reg_reg ); 3556 %} 3557 3558 // Replicate long (8 byte) scalar to be vector 3559 #ifdef _LP64 3560 instruct Repl2L(vecX dst, rRegL src) %{ 3561 predicate(n->as_Vector()->length() == 2); 3562 match(Set dst (ReplicateL src)); 3563 format %{ "movdq $dst,$src\n\t" 3564 "punpcklqdq $dst,$dst\t! replicate2L" %} 3565 ins_encode %{ 3566 __ movdq($dst$$XMMRegister, $src$$Register); 3567 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3568 %} 3569 ins_pipe( pipe_slow ); 3570 %} 3571 #else // _LP64 3572 instruct Repl2L(vecX dst, eRegL src, regD tmp) %{ 3573 predicate(n->as_Vector()->length() == 2); 3574 match(Set dst (ReplicateL src)); 3575 effect(TEMP dst, USE src, TEMP tmp); 3576 format %{ "movdl $dst,$src.lo\n\t" 3577 "movdl $tmp,$src.hi\n\t" 3578 "punpckldq $dst,$tmp\n\t" 3579 "punpcklqdq $dst,$dst\t! replicate2L"%} 3580 ins_encode %{ 3581 __ movdl($dst$$XMMRegister, $src$$Register); 3582 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3583 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3584 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3585 %} 3586 ins_pipe( pipe_slow ); 3587 %} 3588 #endif // _LP64 3589 3590 // Replicate long (8 byte) scalar immediate to be vector by loading from const table. 3591 instruct Repl2L_imm(vecX dst, immL con) %{ 3592 predicate(n->as_Vector()->length() == 2); 3593 match(Set dst (ReplicateL con)); 3594 format %{ "movq $dst,[$constantaddress]\n\t" 3595 "punpcklqdq $dst,$dst\t! replicate2L($con)" %} 3596 ins_encode %{ 3597 __ movq($dst$$XMMRegister, $constantaddress($con)); 3598 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3599 %} 3600 ins_pipe( pipe_slow ); 3601 %} 3602 3603 // Replicate long (8 byte) scalar zero to be vector 3604 instruct Repl2L_zero(vecX dst, immL0 zero) %{ 3605 predicate(n->as_Vector()->length() == 2); 3606 match(Set dst (ReplicateL zero)); 3607 format %{ "pxor $dst,$dst\t! replicate2L zero" %} 3608 ins_encode %{ 3609 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3610 %} 3611 ins_pipe( fpu_reg_reg ); 3612 %} 3613 3614 instruct Repl4L_zero(vecY dst, immL0 zero) %{ 3615 predicate(n->as_Vector()->length() == 4); 3616 match(Set dst (ReplicateL zero)); 3617 format %{ "vpxor $dst,$dst,$dst\t! replicate4L zero" %} 3618 ins_encode %{ 3619 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3620 int vector_len = 1; 3621 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3622 %} 3623 ins_pipe( fpu_reg_reg ); 3624 %} 3625 3626 // Replicate float (4 byte) scalar to be vector 3627 instruct Repl2F(vecD dst, regF src) %{ 3628 predicate(n->as_Vector()->length() == 2); 3629 match(Set dst (ReplicateF src)); 3630 format %{ "pshufd $dst,$dst,0x00\t! replicate2F" %} 3631 ins_encode %{ 3632 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3633 %} 3634 ins_pipe( fpu_reg_reg ); 3635 %} 3636 3637 instruct Repl4F(vecX dst, regF src) %{ 3638 predicate(n->as_Vector()->length() == 4); 3639 match(Set dst (ReplicateF src)); 3640 format %{ "pshufd $dst,$dst,0x00\t! replicate4F" %} 3641 ins_encode %{ 3642 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3643 %} 3644 ins_pipe( pipe_slow ); 3645 %} 3646 3647 // Replicate double (8 bytes) scalar to be vector 3648 instruct Repl2D(vecX dst, regD src) %{ 3649 predicate(n->as_Vector()->length() == 2); 3650 match(Set dst (ReplicateD src)); 3651 format %{ "pshufd $dst,$src,0x44\t! replicate2D" %} 3652 ins_encode %{ 3653 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 3654 %} 3655 ins_pipe( pipe_slow ); 3656 %} 3657 3658 // ====================EVEX REPLICATE============================================= 3659 3660 instruct Repl4B_mem_evex(vecS dst, memory mem) %{ 3661 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vlbw()); 3662 match(Set dst (ReplicateB (LoadB mem))); 3663 format %{ "vpbroadcastb $dst,$mem\t! replicate4B" %} 3664 ins_encode %{ 3665 int vector_len = 0; 3666 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 3667 %} 3668 ins_pipe( pipe_slow ); 3669 %} 3670 3671 instruct Repl8B_mem_evex(vecD dst, memory mem) %{ 3672 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); 3673 match(Set dst (ReplicateB (LoadB mem))); 3674 format %{ "vpbroadcastb $dst,$mem\t! replicate8B" %} 3675 ins_encode %{ 3676 int vector_len = 0; 3677 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 3678 %} 3679 ins_pipe( pipe_slow ); 3680 %} 3681 3682 instruct Repl16B_evex(vecX dst, rRegI src) %{ 3683 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 3684 match(Set dst (ReplicateB src)); 3685 format %{ "vpbroadcastb $dst,$src\t! replicate16B" %} 3686 ins_encode %{ 3687 int vector_len = 0; 3688 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 3689 %} 3690 ins_pipe( pipe_slow ); 3691 %} 3692 3693 instruct Repl16B_mem_evex(vecX dst, memory mem) %{ 3694 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 3695 match(Set dst (ReplicateB (LoadB mem))); 3696 format %{ "vpbroadcastb $dst,$mem\t! replicate16B" %} 3697 ins_encode %{ 3698 int vector_len = 0; 3699 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 3700 %} 3701 ins_pipe( pipe_slow ); 3702 %} 3703 3704 instruct Repl32B_evex(vecY dst, rRegI src) %{ 3705 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw()); 3706 match(Set dst (ReplicateB src)); 3707 format %{ "vpbroadcastb $dst,$src\t! replicate32B" %} 3708 ins_encode %{ 3709 int vector_len = 1; 3710 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 3711 %} 3712 ins_pipe( pipe_slow ); 3713 %} 3714 3715 instruct Repl32B_mem_evex(vecY dst, memory mem) %{ 3716 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw()); 3717 match(Set dst (ReplicateB (LoadB mem))); 3718 format %{ "vpbroadcastb $dst,$mem\t! replicate32B" %} 3719 ins_encode %{ 3720 int vector_len = 1; 3721 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 3722 %} 3723 ins_pipe( pipe_slow ); 3724 %} 3725 3726 instruct Repl64B_evex(vecZ dst, rRegI src) %{ 3727 predicate(n->as_Vector()->length() == 64 && VM_Version::supports_avx512bw()); 3728 match(Set dst (ReplicateB src)); 3729 format %{ "vpbroadcastb $dst,$src\t! upper replicate64B" %} 3730 ins_encode %{ 3731 int vector_len = 2; 3732 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 3733 %} 3734 ins_pipe( pipe_slow ); 3735 %} 3736 3737 instruct Repl64B_mem_evex(vecZ dst, memory mem) %{ 3738 predicate(n->as_Vector()->length() == 64 && VM_Version::supports_avx512bw()); 3739 match(Set dst (ReplicateB (LoadB mem))); 3740 format %{ "vpbroadcastb $dst,$mem\t! replicate64B" %} 3741 ins_encode %{ 3742 int vector_len = 2; 3743 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 3744 %} 3745 ins_pipe( pipe_slow ); 3746 %} 3747 3748 instruct Repl16B_imm_evex(vecX dst, immI con) %{ 3749 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 3750 match(Set dst (ReplicateB con)); 3751 format %{ "movq $dst,[$constantaddress]\n\t" 3752 "vpbroadcastb $dst,$dst\t! replicate16B" %} 3753 ins_encode %{ 3754 int vector_len = 0; 3755 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3756 __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3757 %} 3758 ins_pipe( pipe_slow ); 3759 %} 3760 3761 instruct Repl32B_imm_evex(vecY dst, immI con) %{ 3762 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw()); 3763 match(Set dst (ReplicateB con)); 3764 format %{ "movq $dst,[$constantaddress]\n\t" 3765 "vpbroadcastb $dst,$dst\t! replicate32B" %} 3766 ins_encode %{ 3767 int vector_len = 1; 3768 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3769 __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3770 %} 3771 ins_pipe( pipe_slow ); 3772 %} 3773 3774 instruct Repl64B_imm_evex(vecZ dst, immI con) %{ 3775 predicate(n->as_Vector()->length() == 64 && VM_Version::supports_avx512bw()); 3776 match(Set dst (ReplicateB con)); 3777 format %{ "movq $dst,[$constantaddress]\n\t" 3778 "vpbroadcastb $dst,$dst\t! upper replicate64B" %} 3779 ins_encode %{ 3780 int vector_len = 2; 3781 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3782 __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3783 %} 3784 ins_pipe( pipe_slow ); 3785 %} 3786 3787 instruct Repl64B_zero_evex(vecZ dst, immI0 zero) %{ 3788 predicate(n->as_Vector()->length() == 64 && UseAVX > 2); 3789 match(Set dst (ReplicateB zero)); 3790 format %{ "vpxor $dst k0,$dst,$dst\t! replicate64B zero" %} 3791 ins_encode %{ 3792 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 3793 int vector_len = 2; 3794 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3795 %} 3796 ins_pipe( fpu_reg_reg ); 3797 %} 3798 3799 instruct Repl4S_evex(vecD dst, rRegI src) %{ 3800 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vlbw()); 3801 match(Set dst (ReplicateS src)); 3802 format %{ "vpbroadcastw $dst,$src\t! replicate4S" %} 3803 ins_encode %{ 3804 int vector_len = 0; 3805 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 3806 %} 3807 ins_pipe( pipe_slow ); 3808 %} 3809 3810 instruct Repl4S_mem_evex(vecD dst, memory mem) %{ 3811 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vlbw()); 3812 match(Set dst (ReplicateS (LoadS mem))); 3813 format %{ "vpbroadcastw $dst,$mem\t! replicate4S" %} 3814 ins_encode %{ 3815 int vector_len = 0; 3816 __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 3817 %} 3818 ins_pipe( pipe_slow ); 3819 %} 3820 3821 instruct Repl8S_evex(vecX dst, rRegI src) %{ 3822 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); 3823 match(Set dst (ReplicateS src)); 3824 format %{ "vpbroadcastw $dst,$src\t! replicate8S" %} 3825 ins_encode %{ 3826 int vector_len = 0; 3827 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 3828 %} 3829 ins_pipe( pipe_slow ); 3830 %} 3831 3832 instruct Repl8S_mem_evex(vecX dst, memory mem) %{ 3833 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); 3834 match(Set dst (ReplicateS (LoadS mem))); 3835 format %{ "vpbroadcastw $dst,$mem\t! replicate8S" %} 3836 ins_encode %{ 3837 int vector_len = 0; 3838 __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 3839 %} 3840 ins_pipe( pipe_slow ); 3841 %} 3842 3843 instruct Repl16S_evex(vecY dst, rRegI src) %{ 3844 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 3845 match(Set dst (ReplicateS src)); 3846 format %{ "vpbroadcastw $dst,$src\t! replicate16S" %} 3847 ins_encode %{ 3848 int vector_len = 1; 3849 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 3850 %} 3851 ins_pipe( pipe_slow ); 3852 %} 3853 3854 instruct Repl16S_mem_evex(vecY dst, memory mem) %{ 3855 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 3856 match(Set dst (ReplicateS (LoadS mem))); 3857 format %{ "vpbroadcastw $dst,$mem\t! replicate16S" %} 3858 ins_encode %{ 3859 int vector_len = 1; 3860 __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 3861 %} 3862 ins_pipe( pipe_slow ); 3863 %} 3864 3865 instruct Repl32S_evex(vecZ dst, rRegI src) %{ 3866 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512bw()); 3867 match(Set dst (ReplicateS src)); 3868 format %{ "vpbroadcastw $dst,$src\t! replicate32S" %} 3869 ins_encode %{ 3870 int vector_len = 2; 3871 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 3872 %} 3873 ins_pipe( pipe_slow ); 3874 %} 3875 3876 instruct Repl32S_mem_evex(vecZ dst, memory mem) %{ 3877 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512bw()); 3878 match(Set dst (ReplicateS (LoadS mem))); 3879 format %{ "vpbroadcastw $dst,$mem\t! replicate32S" %} 3880 ins_encode %{ 3881 int vector_len = 2; 3882 __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 3883 %} 3884 ins_pipe( pipe_slow ); 3885 %} 3886 3887 instruct Repl8S_imm_evex(vecX dst, immI con) %{ 3888 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); 3889 match(Set dst (ReplicateS con)); 3890 format %{ "movq $dst,[$constantaddress]\n\t" 3891 "vpbroadcastw $dst,$dst\t! replicate8S" %} 3892 ins_encode %{ 3893 int vector_len = 0; 3894 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3895 __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3896 %} 3897 ins_pipe( pipe_slow ); 3898 %} 3899 3900 instruct Repl16S_imm_evex(vecY dst, immI con) %{ 3901 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 3902 match(Set dst (ReplicateS con)); 3903 format %{ "movq $dst,[$constantaddress]\n\t" 3904 "vpbroadcastw $dst,$dst\t! replicate16S" %} 3905 ins_encode %{ 3906 int vector_len = 1; 3907 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3908 __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3909 %} 3910 ins_pipe( pipe_slow ); 3911 %} 3912 3913 instruct Repl32S_imm_evex(vecZ dst, immI con) %{ 3914 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512bw()); 3915 match(Set dst (ReplicateS con)); 3916 format %{ "movq $dst,[$constantaddress]\n\t" 3917 "vpbroadcastw $dst,$dst\t! replicate32S" %} 3918 ins_encode %{ 3919 int vector_len = 2; 3920 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3921 __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3922 %} 3923 ins_pipe( pipe_slow ); 3924 %} 3925 3926 instruct Repl32S_zero_evex(vecZ dst, immI0 zero) %{ 3927 predicate(n->as_Vector()->length() == 32 && UseAVX > 2); 3928 match(Set dst (ReplicateS zero)); 3929 format %{ "vpxor $dst k0,$dst,$dst\t! replicate32S zero" %} 3930 ins_encode %{ 3931 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 3932 int vector_len = 2; 3933 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3934 %} 3935 ins_pipe( fpu_reg_reg ); 3936 %} 3937 3938 instruct Repl4I_evex(vecX dst, rRegI src) %{ 3939 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 3940 match(Set dst (ReplicateI src)); 3941 format %{ "vpbroadcastd $dst,$src\t! replicate4I" %} 3942 ins_encode %{ 3943 int vector_len = 0; 3944 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 3945 %} 3946 ins_pipe( pipe_slow ); 3947 %} 3948 3949 instruct Repl4I_mem_evex(vecX dst, memory mem) %{ 3950 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 3951 match(Set dst (ReplicateI (LoadI mem))); 3952 format %{ "vpbroadcastd $dst,$mem\t! replicate4I" %} 3953 ins_encode %{ 3954 int vector_len = 0; 3955 __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 3956 %} 3957 ins_pipe( pipe_slow ); 3958 %} 3959 3960 instruct Repl8I_evex(vecY dst, rRegI src) %{ 3961 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 3962 match(Set dst (ReplicateI src)); 3963 format %{ "vpbroadcastd $dst,$src\t! replicate8I" %} 3964 ins_encode %{ 3965 int vector_len = 1; 3966 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 3967 %} 3968 ins_pipe( pipe_slow ); 3969 %} 3970 3971 instruct Repl8I_mem_evex(vecY dst, memory mem) %{ 3972 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 3973 match(Set dst (ReplicateI (LoadI mem))); 3974 format %{ "vpbroadcastd $dst,$mem\t! replicate8I" %} 3975 ins_encode %{ 3976 int vector_len = 1; 3977 __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 3978 %} 3979 ins_pipe( pipe_slow ); 3980 %} 3981 3982 instruct Repl16I_evex(vecZ dst, rRegI src) %{ 3983 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 3984 match(Set dst (ReplicateI src)); 3985 format %{ "vpbroadcastd $dst,$src\t! replicate16I" %} 3986 ins_encode %{ 3987 int vector_len = 2; 3988 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 3989 %} 3990 ins_pipe( pipe_slow ); 3991 %} 3992 3993 instruct Repl16I_mem_evex(vecZ dst, memory mem) %{ 3994 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 3995 match(Set dst (ReplicateI (LoadI mem))); 3996 format %{ "vpbroadcastd $dst,$mem\t! replicate16I" %} 3997 ins_encode %{ 3998 int vector_len = 2; 3999 __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 4000 %} 4001 ins_pipe( pipe_slow ); 4002 %} 4003 4004 instruct Repl4I_imm_evex(vecX dst, immI con) %{ 4005 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4006 match(Set dst (ReplicateI con)); 4007 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 4008 "vpbroadcastd $dst,$dst\t! replicate4I" %} 4009 ins_encode %{ 4010 int vector_len = 0; 4011 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4012 __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4013 %} 4014 ins_pipe( pipe_slow ); 4015 %} 4016 4017 instruct Repl8I_imm_evex(vecY dst, immI con) %{ 4018 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4019 match(Set dst (ReplicateI con)); 4020 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 4021 "vpbroadcastd $dst,$dst\t! replicate8I" %} 4022 ins_encode %{ 4023 int vector_len = 1; 4024 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4025 __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4026 %} 4027 ins_pipe( pipe_slow ); 4028 %} 4029 4030 instruct Repl16I_imm_evex(vecZ dst, immI con) %{ 4031 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4032 match(Set dst (ReplicateI con)); 4033 format %{ "movq $dst,[$constantaddress]\t! replicate16I($con)\n\t" 4034 "vpbroadcastd $dst,$dst\t! replicate16I" %} 4035 ins_encode %{ 4036 int vector_len = 2; 4037 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4038 __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4039 %} 4040 ins_pipe( pipe_slow ); 4041 %} 4042 4043 instruct Repl16I_zero_evex(vecZ dst, immI0 zero) %{ 4044 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4045 match(Set dst (ReplicateI zero)); 4046 format %{ "vpxor $dst k0,$dst,$dst\t! replicate16I zero" %} 4047 ins_encode %{ 4048 // Use vxorpd since AVX does not have vpxor for 512-bit (AVX2 will have it). 4049 int vector_len = 2; 4050 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4051 %} 4052 ins_pipe( fpu_reg_reg ); 4053 %} 4054 4055 // Replicate long (8 byte) scalar to be vector 4056 #ifdef _LP64 4057 instruct Repl4L_evex(vecY dst, rRegL src) %{ 4058 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4059 match(Set dst (ReplicateL src)); 4060 format %{ "vpbroadcastq $dst,$src\t! replicate4L" %} 4061 ins_encode %{ 4062 int vector_len = 1; 4063 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vector_len); 4064 %} 4065 ins_pipe( pipe_slow ); 4066 %} 4067 4068 instruct Repl8L_evex(vecZ dst, rRegL src) %{ 4069 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4070 match(Set dst (ReplicateL src)); 4071 format %{ "vpbroadcastq $dst,$src\t! replicate8L" %} 4072 ins_encode %{ 4073 int vector_len = 2; 4074 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vector_len); 4075 %} 4076 ins_pipe( pipe_slow ); 4077 %} 4078 #else // _LP64 4079 instruct Repl4L_evex(vecY dst, eRegL src, regD tmp) %{ 4080 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4081 match(Set dst (ReplicateL src)); 4082 effect(TEMP dst, USE src, TEMP tmp); 4083 format %{ "movdl $dst,$src.lo\n\t" 4084 "movdl $tmp,$src.hi\n\t" 4085 "punpckldq $dst,$tmp\n\t" 4086 "vpbroadcastq $dst,$dst\t! replicate4L" %} 4087 ins_encode %{ 4088 int vector_len = 1; 4089 __ movdl($dst$$XMMRegister, $src$$Register); 4090 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4091 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4092 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4093 %} 4094 ins_pipe( pipe_slow ); 4095 %} 4096 4097 instruct Repl8L_evex(vecZ dst, eRegL src, regD tmp) %{ 4098 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4099 match(Set dst (ReplicateL src)); 4100 effect(TEMP dst, USE src, TEMP tmp); 4101 format %{ "movdl $dst,$src.lo\n\t" 4102 "movdl $tmp,$src.hi\n\t" 4103 "punpckldq $dst,$tmp\n\t" 4104 "vpbroadcastq $dst,$dst\t! replicate8L" %} 4105 ins_encode %{ 4106 int vector_len = 2; 4107 __ movdl($dst$$XMMRegister, $src$$Register); 4108 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4109 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4110 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4111 %} 4112 ins_pipe( pipe_slow ); 4113 %} 4114 #endif // _LP64 4115 4116 instruct Repl4L_imm_evex(vecY dst, immL con) %{ 4117 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4118 match(Set dst (ReplicateL con)); 4119 format %{ "movq $dst,[$constantaddress]\n\t" 4120 "vpbroadcastq $dst,$dst\t! replicate4L" %} 4121 ins_encode %{ 4122 int vector_len = 1; 4123 __ movq($dst$$XMMRegister, $constantaddress($con)); 4124 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4125 %} 4126 ins_pipe( pipe_slow ); 4127 %} 4128 4129 instruct Repl8L_imm_evex(vecZ dst, immL con) %{ 4130 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4131 match(Set dst (ReplicateL con)); 4132 format %{ "movq $dst,[$constantaddress]\n\t" 4133 "vpbroadcastq $dst,$dst\t! replicate8L" %} 4134 ins_encode %{ 4135 int vector_len = 2; 4136 __ movq($dst$$XMMRegister, $constantaddress($con)); 4137 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4138 %} 4139 ins_pipe( pipe_slow ); 4140 %} 4141 4142 instruct Repl2L_mem_evex(vecX dst, memory mem) %{ 4143 predicate(n->as_Vector()->length() == 2 && VM_Version::supports_avx512vl()); 4144 match(Set dst (ReplicateL (LoadL mem))); 4145 format %{ "vpbroadcastd $dst,$mem\t! replicate2L" %} 4146 ins_encode %{ 4147 int vector_len = 0; 4148 __ evpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4149 %} 4150 ins_pipe( pipe_slow ); 4151 %} 4152 4153 instruct Repl4L_mem_evex(vecY dst, memory mem) %{ 4154 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4155 match(Set dst (ReplicateL (LoadL mem))); 4156 format %{ "vpbroadcastd $dst,$mem\t! replicate4L" %} 4157 ins_encode %{ 4158 int vector_len = 1; 4159 __ evpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4160 %} 4161 ins_pipe( pipe_slow ); 4162 %} 4163 4164 instruct Repl8L_mem_evex(vecZ dst, memory mem) %{ 4165 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4166 match(Set dst (ReplicateL (LoadL mem))); 4167 format %{ "vpbroadcastd $dst,$mem\t! replicate8L" %} 4168 ins_encode %{ 4169 int vector_len = 2; 4170 __ evpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4171 %} 4172 ins_pipe( pipe_slow ); 4173 %} 4174 4175 instruct Repl8L_zero_evex(vecZ dst, immL0 zero) %{ 4176 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4177 match(Set dst (ReplicateL zero)); 4178 format %{ "vpxor $dst k0,$dst,$dst\t! replicate8L zero" %} 4179 ins_encode %{ 4180 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 4181 int vector_len = 2; 4182 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4183 %} 4184 ins_pipe( fpu_reg_reg ); 4185 %} 4186 4187 instruct Repl8F_evex(vecY dst, regF src) %{ 4188 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4189 match(Set dst (ReplicateF src)); 4190 format %{ "vbroadcastss $dst,$src\t! replicate8F" %} 4191 ins_encode %{ 4192 int vector_len = 1; 4193 __ evpbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4194 %} 4195 ins_pipe( pipe_slow ); 4196 %} 4197 4198 instruct Repl8F_mem_evex(vecY dst, memory mem) %{ 4199 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4200 match(Set dst (ReplicateF (LoadF mem))); 4201 format %{ "vbroadcastss $dst,$mem\t! replicate8F" %} 4202 ins_encode %{ 4203 int vector_len = 1; 4204 __ evpbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len); 4205 %} 4206 ins_pipe( pipe_slow ); 4207 %} 4208 4209 instruct Repl16F_evex(vecZ dst, regF src) %{ 4210 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4211 match(Set dst (ReplicateF src)); 4212 format %{ "vbroadcastss $dst,$src\t! replicate16F" %} 4213 ins_encode %{ 4214 int vector_len = 2; 4215 __ evpbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4216 %} 4217 ins_pipe( pipe_slow ); 4218 %} 4219 4220 instruct Repl16F_mem_evex(vecZ dst, memory mem) %{ 4221 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4222 match(Set dst (ReplicateF (LoadF mem))); 4223 format %{ "vbroadcastss $dst,$mem\t! replicate16F" %} 4224 ins_encode %{ 4225 int vector_len = 2; 4226 __ evpbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len); 4227 %} 4228 ins_pipe( pipe_slow ); 4229 %} 4230 4231 instruct Repl2F_zero_evex(vecD dst, immF0 zero) %{ 4232 predicate(n->as_Vector()->length() == 2 && UseAVX > 2); 4233 match(Set dst (ReplicateF zero)); 4234 format %{ "vpxor $dst k0,$dst,$dst\t! replicate2F zero" %} 4235 ins_encode %{ 4236 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4237 int vector_len = 2; 4238 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4239 %} 4240 ins_pipe( fpu_reg_reg ); 4241 %} 4242 4243 instruct Repl4F_zero_evex(vecX dst, immF0 zero) %{ 4244 predicate(n->as_Vector()->length() == 4 && UseAVX > 2); 4245 match(Set dst (ReplicateF zero)); 4246 format %{ "vpxor $dst k0,$dst,$dst\t! replicate4F zero" %} 4247 ins_encode %{ 4248 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4249 int vector_len = 2; 4250 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4251 %} 4252 ins_pipe( fpu_reg_reg ); 4253 %} 4254 4255 instruct Repl8F_zero_evex(vecY dst, immF0 zero) %{ 4256 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4257 match(Set dst (ReplicateF zero)); 4258 format %{ "vpxor $dst k0,$dst,$dst\t! replicate8F zero" %} 4259 ins_encode %{ 4260 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4261 int vector_len = 2; 4262 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4263 %} 4264 ins_pipe( fpu_reg_reg ); 4265 %} 4266 4267 instruct Repl16F_zero_evex(vecZ dst, immF0 zero) %{ 4268 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4269 match(Set dst (ReplicateF zero)); 4270 format %{ "vpxor $dst k0,$dst,$dst\t! replicate16F zero" %} 4271 ins_encode %{ 4272 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4273 int vector_len = 2; 4274 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4275 %} 4276 ins_pipe( fpu_reg_reg ); 4277 %} 4278 4279 instruct Repl4D_evex(vecY dst, regD src) %{ 4280 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4281 match(Set dst (ReplicateD src)); 4282 format %{ "vbroadcastsd $dst,$src\t! replicate4D" %} 4283 ins_encode %{ 4284 int vector_len = 1; 4285 __ evpbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4286 %} 4287 ins_pipe( pipe_slow ); 4288 %} 4289 4290 instruct Repl4D_mem_evex(vecY dst, memory mem) %{ 4291 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4292 match(Set dst (ReplicateD (LoadD mem))); 4293 format %{ "vbroadcastsd $dst,$mem\t! replicate4D" %} 4294 ins_encode %{ 4295 int vector_len = 1; 4296 __ evpbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len); 4297 %} 4298 ins_pipe( pipe_slow ); 4299 %} 4300 4301 instruct Repl8D_evex(vecZ dst, regD src) %{ 4302 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4303 match(Set dst (ReplicateD src)); 4304 format %{ "vbroadcastsd $dst,$src\t! replicate8D" %} 4305 ins_encode %{ 4306 int vector_len = 2; 4307 __ evpbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4308 %} 4309 ins_pipe( pipe_slow ); 4310 %} 4311 4312 instruct Repl8D_mem_evex(vecZ dst, memory mem) %{ 4313 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4314 match(Set dst (ReplicateD (LoadD mem))); 4315 format %{ "vbroadcastsd $dst,$mem\t! replicate8D" %} 4316 ins_encode %{ 4317 int vector_len = 2; 4318 __ evpbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len); 4319 %} 4320 ins_pipe( pipe_slow ); 4321 %} 4322 4323 instruct Repl2D_zero_evex(vecX dst, immD0 zero) %{ 4324 predicate(n->as_Vector()->length() == 2 && UseAVX > 2); 4325 match(Set dst (ReplicateD zero)); 4326 format %{ "vpxor $dst k0,$dst,$dst\t! replicate2D zero" %} 4327 ins_encode %{ 4328 // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation 4329 int vector_len = 2; 4330 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4331 %} 4332 ins_pipe( fpu_reg_reg ); 4333 %} 4334 4335 instruct Repl4D_zero_evex(vecY dst, immD0 zero) %{ 4336 predicate(n->as_Vector()->length() == 4 && UseAVX > 2); 4337 match(Set dst (ReplicateD zero)); 4338 format %{ "vpxor $dst k0,$dst,$dst\t! replicate4D zero" %} 4339 ins_encode %{ 4340 // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation 4341 int vector_len = 2; 4342 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4343 %} 4344 ins_pipe( fpu_reg_reg ); 4345 %} 4346 4347 instruct Repl8D_zero_evex(vecZ dst, immD0 zero) %{ 4348 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4349 match(Set dst (ReplicateD zero)); 4350 format %{ "vpxor $dst k0,$dst,$dst,vect512\t! replicate8D zero" %} 4351 ins_encode %{ 4352 // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation 4353 int vector_len = 2; 4354 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4355 %} 4356 ins_pipe( fpu_reg_reg ); 4357 %} 4358 4359 // ====================REDUCTION ARITHMETIC======================================= 4360 4361 instruct rsadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 4362 predicate(UseSSE > 2 && UseAVX == 0); 4363 match(Set dst (AddReductionVI src1 src2)); 4364 effect(TEMP tmp2, TEMP tmp); 4365 format %{ "movdqu $tmp2,$src2\n\t" 4366 "phaddd $tmp2,$tmp2\n\t" 4367 "movd $tmp,$src1\n\t" 4368 "paddd $tmp,$tmp2\n\t" 4369 "movd $dst,$tmp\t! add reduction2I" %} 4370 ins_encode %{ 4371 __ movdqu($tmp2$$XMMRegister, $src2$$XMMRegister); 4372 __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister); 4373 __ movdl($tmp$$XMMRegister, $src1$$Register); 4374 __ paddd($tmp$$XMMRegister, $tmp2$$XMMRegister); 4375 __ movdl($dst$$Register, $tmp$$XMMRegister); 4376 %} 4377 ins_pipe( pipe_slow ); 4378 %} 4379 4380 instruct rvadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 4381 predicate(VM_Version::supports_avxonly()); 4382 match(Set dst (AddReductionVI src1 src2)); 4383 effect(TEMP tmp, TEMP tmp2); 4384 format %{ "vphaddd $tmp,$src2,$src2\n\t" 4385 "movd $tmp2,$src1\n\t" 4386 "vpaddd $tmp2,$tmp2,$tmp\n\t" 4387 "movd $dst,$tmp2\t! add reduction2I" %} 4388 ins_encode %{ 4389 int vector_len = 0; 4390 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 4391 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4392 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 4393 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4394 %} 4395 ins_pipe( pipe_slow ); 4396 %} 4397 4398 instruct rvadd2I_reduction_reg_evex(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 4399 predicate(UseAVX > 2); 4400 match(Set dst (AddReductionVI src1 src2)); 4401 effect(TEMP tmp, TEMP tmp2); 4402 format %{ "pshufd $tmp2,$src2,0x1\n\t" 4403 "vpaddd $tmp,$src2,$tmp2\n\t" 4404 "movd $tmp2,$src1\n\t" 4405 "vpaddd $tmp2,$tmp,$tmp2\n\t" 4406 "movd $dst,$tmp2\t! add reduction2I" %} 4407 ins_encode %{ 4408 int vector_len = 0; 4409 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 4410 __ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4411 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4412 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4413 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4414 %} 4415 ins_pipe( pipe_slow ); 4416 %} 4417 4418 instruct rsadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 4419 predicate(UseSSE > 2 && UseAVX == 0); 4420 match(Set dst (AddReductionVI src1 src2)); 4421 effect(TEMP tmp, TEMP tmp2); 4422 format %{ "movdqu $tmp,$src2\n\t" 4423 "phaddd $tmp,$tmp\n\t" 4424 "phaddd $tmp,$tmp\n\t" 4425 "movd $tmp2,$src1\n\t" 4426 "paddd $tmp2,$tmp\n\t" 4427 "movd $dst,$tmp2\t! add reduction4I" %} 4428 ins_encode %{ 4429 __ movdqu($tmp$$XMMRegister, $src2$$XMMRegister); 4430 __ phaddd($tmp$$XMMRegister, $tmp$$XMMRegister); 4431 __ phaddd($tmp$$XMMRegister, $tmp$$XMMRegister); 4432 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4433 __ paddd($tmp2$$XMMRegister, $tmp$$XMMRegister); 4434 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4435 %} 4436 ins_pipe( pipe_slow ); 4437 %} 4438 4439 instruct rvadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 4440 predicate(VM_Version::supports_avxonly()); 4441 match(Set dst (AddReductionVI src1 src2)); 4442 effect(TEMP tmp, TEMP tmp2); 4443 format %{ "vphaddd $tmp,$src2,$src2\n\t" 4444 "vphaddd $tmp,$tmp,$tmp\n\t" 4445 "movd $tmp2,$src1\n\t" 4446 "vpaddd $tmp2,$tmp2,$tmp\n\t" 4447 "movd $dst,$tmp2\t! add reduction4I" %} 4448 ins_encode %{ 4449 int vector_len = 0; 4450 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 4451 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); 4452 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4453 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 4454 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4455 %} 4456 ins_pipe( pipe_slow ); 4457 %} 4458 4459 instruct rvadd4I_reduction_reg_evex(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 4460 predicate(UseAVX > 2); 4461 match(Set dst (AddReductionVI src1 src2)); 4462 effect(TEMP tmp, TEMP tmp2); 4463 format %{ "pshufd $tmp2,$src2,0xE\n\t" 4464 "vpaddd $tmp,$src2,$tmp2\n\t" 4465 "pshufd $tmp2,$tmp,0x1\n\t" 4466 "vpaddd $tmp,$tmp,$tmp2\n\t" 4467 "movd $tmp2,$src1\n\t" 4468 "vpaddd $tmp2,$tmp,$tmp2\n\t" 4469 "movd $dst,$tmp2\t! add reduction4I" %} 4470 ins_encode %{ 4471 int vector_len = 0; 4472 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 4473 __ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4474 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 4475 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4476 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4477 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4478 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4479 %} 4480 ins_pipe( pipe_slow ); 4481 %} 4482 4483 instruct rvadd8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 4484 predicate(VM_Version::supports_avxonly()); 4485 match(Set dst (AddReductionVI src1 src2)); 4486 effect(TEMP tmp, TEMP tmp2); 4487 format %{ "vphaddd $tmp,$src2,$src2\n\t" 4488 "vphaddd $tmp,$tmp,$tmp2\n\t" 4489 "vextracti128_high $tmp2,$tmp\n\t" 4490 "vpaddd $tmp,$tmp,$tmp2\n\t" 4491 "movd $tmp2,$src1\n\t" 4492 "vpaddd $tmp2,$tmp2,$tmp\n\t" 4493 "movd $dst,$tmp2\t! add reduction8I" %} 4494 ins_encode %{ 4495 int vector_len = 1; 4496 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 4497 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4498 __ vextracti128_high($tmp2$$XMMRegister, $tmp$$XMMRegister); 4499 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4500 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4501 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4502 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4503 %} 4504 ins_pipe( pipe_slow ); 4505 %} 4506 4507 instruct rvadd8I_reduction_reg_evex(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 4508 predicate(UseAVX > 2); 4509 match(Set dst (AddReductionVI src1 src2)); 4510 effect(TEMP tmp, TEMP tmp2); 4511 format %{ "vextracti128_high $tmp,$src2\n\t" 4512 "vpaddd $tmp,$tmp,$src2\n\t" 4513 "pshufd $tmp2,$tmp,0xE\n\t" 4514 "vpaddd $tmp,$tmp,$tmp2\n\t" 4515 "pshufd $tmp2,$tmp,0x1\n\t" 4516 "vpaddd $tmp,$tmp,$tmp2\n\t" 4517 "movd $tmp2,$src1\n\t" 4518 "vpaddd $tmp2,$tmp,$tmp2\n\t" 4519 "movd $dst,$tmp2\t! add reduction8I" %} 4520 ins_encode %{ 4521 int vector_len = 0; 4522 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 4523 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 4524 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 4525 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4526 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 4527 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4528 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4529 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4530 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4531 %} 4532 ins_pipe( pipe_slow ); 4533 %} 4534 4535 instruct rvadd16I_reduction_reg_evex(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ 4536 predicate(UseAVX > 2); 4537 match(Set dst (AddReductionVI src1 src2)); 4538 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 4539 format %{ "vextracti64x4_high $tmp3,$src2\n\t" 4540 "vpaddd $tmp3,$tmp3,$src2\n\t" 4541 "vextracti128_high $tmp,$tmp3\n\t" 4542 "vpaddd $tmp,$tmp,$tmp3\n\t" 4543 "pshufd $tmp2,$tmp,0xE\n\t" 4544 "vpaddd $tmp,$tmp,$tmp2\n\t" 4545 "pshufd $tmp2,$tmp,0x1\n\t" 4546 "vpaddd $tmp,$tmp,$tmp2\n\t" 4547 "movd $tmp2,$src1\n\t" 4548 "vpaddd $tmp2,$tmp,$tmp2\n\t" 4549 "movd $dst,$tmp2\t! mul reduction16I" %} 4550 ins_encode %{ 4551 __ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister); 4552 __ vpaddd($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); 4553 __ vextracti128_high($tmp$$XMMRegister, $tmp3$$XMMRegister); 4554 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); 4555 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 4556 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4557 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 4558 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4559 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4560 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4561 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4562 %} 4563 ins_pipe( pipe_slow ); 4564 %} 4565 4566 #ifdef _LP64 4567 instruct rvadd2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, regF tmp, regF tmp2) %{ 4568 predicate(UseAVX > 2); 4569 match(Set dst (AddReductionVL src1 src2)); 4570 effect(TEMP tmp, TEMP tmp2); 4571 format %{ "pshufd $tmp2,$src2,0xE\n\t" 4572 "vpaddq $tmp,$src2,$tmp2\n\t" 4573 "movdq $tmp2,$src1\n\t" 4574 "vpaddq $tmp2,$tmp,$tmp2\n\t" 4575 "movdq $dst,$tmp2\t! add reduction2L" %} 4576 ins_encode %{ 4577 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 4578 __ vpaddq($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, 0); 4579 __ movdq($tmp2$$XMMRegister, $src1$$Register); 4580 __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4581 __ movdq($dst$$Register, $tmp2$$XMMRegister); 4582 %} 4583 ins_pipe( pipe_slow ); 4584 %} 4585 4586 instruct rvadd4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{ 4587 predicate(UseAVX > 2); 4588 match(Set dst (AddReductionVL src1 src2)); 4589 effect(TEMP tmp, TEMP tmp2); 4590 format %{ "vextracti128_high $tmp,$src2\n\t" 4591 "vpaddq $tmp2,$tmp,$src2\n\t" 4592 "pshufd $tmp,$tmp2,0xE\n\t" 4593 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4594 "movdq $tmp,$src1\n\t" 4595 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4596 "movdq $dst,$tmp2\t! add reduction4L" %} 4597 ins_encode %{ 4598 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 4599 __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); 4600 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 4601 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4602 __ movdq($tmp$$XMMRegister, $src1$$Register); 4603 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4604 __ movdq($dst$$Register, $tmp2$$XMMRegister); 4605 %} 4606 ins_pipe( pipe_slow ); 4607 %} 4608 4609 instruct rvadd8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{ 4610 predicate(UseAVX > 2); 4611 match(Set dst (AddReductionVL src1 src2)); 4612 effect(TEMP tmp, TEMP tmp2); 4613 format %{ "vextracti64x4_high $tmp2,$src2\n\t" 4614 "vpaddq $tmp2,$tmp2,$src2\n\t" 4615 "vextracti128_high $tmp,$tmp2\n\t" 4616 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4617 "pshufd $tmp,$tmp2,0xE\n\t" 4618 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4619 "movdq $tmp,$src1\n\t" 4620 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4621 "movdq $dst,$tmp2\t! add reduction8L" %} 4622 ins_encode %{ 4623 __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); 4624 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 4625 __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); 4626 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4627 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 4628 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4629 __ movdq($tmp$$XMMRegister, $src1$$Register); 4630 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4631 __ movdq($dst$$Register, $tmp2$$XMMRegister); 4632 %} 4633 ins_pipe( pipe_slow ); 4634 %} 4635 #endif 4636 4637 instruct rsadd2F_reduction_reg(regF dst, vecD src2, regF tmp) %{ 4638 predicate(UseSSE >= 1 && UseAVX == 0); 4639 match(Set dst (AddReductionVF dst src2)); 4640 effect(TEMP dst, TEMP tmp); 4641 format %{ "addss $dst,$src2\n\t" 4642 "pshufd $tmp,$src2,0x01\n\t" 4643 "addss $dst,$tmp\t! add reduction2F" %} 4644 ins_encode %{ 4645 __ addss($dst$$XMMRegister, $src2$$XMMRegister); 4646 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 4647 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 4648 %} 4649 ins_pipe( pipe_slow ); 4650 %} 4651 4652 instruct rvadd2F_reduction_reg(regF dst, vecD src2, regF tmp) %{ 4653 predicate(UseAVX > 0); 4654 match(Set dst (AddReductionVF dst src2)); 4655 effect(TEMP dst, TEMP tmp); 4656 format %{ "vaddss $dst,$dst,$src2\n\t" 4657 "pshufd $tmp,$src2,0x01\n\t" 4658 "vaddss $dst,$dst,$tmp\t! add reduction2F" %} 4659 ins_encode %{ 4660 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 4661 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 4662 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4663 %} 4664 ins_pipe( pipe_slow ); 4665 %} 4666 4667 instruct rsadd4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ 4668 predicate(UseSSE >= 1 && UseAVX == 0); 4669 match(Set dst (AddReductionVF dst src2)); 4670 effect(TEMP dst, TEMP tmp); 4671 format %{ "addss $dst,$src2\n\t" 4672 "pshufd $tmp,$src2,0x01\n\t" 4673 "addss $dst,$tmp\n\t" 4674 "pshufd $tmp,$src2,0x02\n\t" 4675 "addss $dst,$tmp\n\t" 4676 "pshufd $tmp,$src2,0x03\n\t" 4677 "addss $dst,$tmp\t! add reduction4F" %} 4678 ins_encode %{ 4679 __ addss($dst$$XMMRegister, $src2$$XMMRegister); 4680 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 4681 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 4682 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 4683 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 4684 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 4685 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 4686 %} 4687 ins_pipe( pipe_slow ); 4688 %} 4689 4690 instruct rvadd4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ 4691 predicate(UseAVX > 0); 4692 match(Set dst (AddReductionVF dst src2)); 4693 effect(TEMP tmp, TEMP dst); 4694 format %{ "vaddss $dst,dst,$src2\n\t" 4695 "pshufd $tmp,$src2,0x01\n\t" 4696 "vaddss $dst,$dst,$tmp\n\t" 4697 "pshufd $tmp,$src2,0x02\n\t" 4698 "vaddss $dst,$dst,$tmp\n\t" 4699 "pshufd $tmp,$src2,0x03\n\t" 4700 "vaddss $dst,$dst,$tmp\t! add reduction4F" %} 4701 ins_encode %{ 4702 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 4703 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 4704 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4705 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 4706 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4707 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 4708 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4709 %} 4710 ins_pipe( pipe_slow ); 4711 %} 4712 4713 instruct radd8F_reduction_reg(regF dst, vecY src2, regF tmp, regF tmp2) %{ 4714 predicate(UseAVX > 0); 4715 match(Set dst (AddReductionVF dst src2)); 4716 effect(TEMP tmp, TEMP dst, TEMP tmp2); 4717 format %{ "vaddss $dst,$dst,$src2\n\t" 4718 "pshufd $tmp,$src2,0x01\n\t" 4719 "vaddss $dst,$dst,$tmp\n\t" 4720 "pshufd $tmp,$src2,0x02\n\t" 4721 "vaddss $dst,$dst,$tmp\n\t" 4722 "pshufd $tmp,$src2,0x03\n\t" 4723 "vaddss $dst,$dst,$tmp\n\t" 4724 "vextractf128_high $tmp2,$src2\n\t" 4725 "vaddss $dst,$dst,$tmp2\n\t" 4726 "pshufd $tmp,$tmp2,0x01\n\t" 4727 "vaddss $dst,$dst,$tmp\n\t" 4728 "pshufd $tmp,$tmp2,0x02\n\t" 4729 "vaddss $dst,$dst,$tmp\n\t" 4730 "pshufd $tmp,$tmp2,0x03\n\t" 4731 "vaddss $dst,$dst,$tmp\t! add reduction8F" %} 4732 ins_encode %{ 4733 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 4734 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 4735 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4736 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 4737 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4738 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 4739 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4740 __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister); 4741 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 4742 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 4743 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4744 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 4745 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4746 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 4747 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4748 %} 4749 ins_pipe( pipe_slow ); 4750 %} 4751 4752 instruct radd16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{ 4753 predicate(UseAVX > 2); 4754 match(Set dst (AddReductionVF dst src2)); 4755 effect(TEMP tmp, TEMP dst, TEMP tmp2); 4756 format %{ "vaddss $dst,$dst,$src2\n\t" 4757 "pshufd $tmp,$src2,0x01\n\t" 4758 "vaddss $dst,$dst,$tmp\n\t" 4759 "pshufd $tmp,$src2,0x02\n\t" 4760 "vaddss $dst,$dst,$tmp\n\t" 4761 "pshufd $tmp,$src2,0x03\n\t" 4762 "vaddss $dst,$dst,$tmp\n\t" 4763 "vextractf32x4 $tmp2,$src2,0x1\n\t" 4764 "vaddss $dst,$dst,$tmp2\n\t" 4765 "pshufd $tmp,$tmp2,0x01\n\t" 4766 "vaddss $dst,$dst,$tmp\n\t" 4767 "pshufd $tmp,$tmp2,0x02\n\t" 4768 "vaddss $dst,$dst,$tmp\n\t" 4769 "pshufd $tmp,$tmp2,0x03\n\t" 4770 "vaddss $dst,$dst,$tmp\n\t" 4771 "vextractf32x4 $tmp2,$src2,0x2\n\t" 4772 "vaddss $dst,$dst,$tmp2\n\t" 4773 "pshufd $tmp,$tmp2,0x01\n\t" 4774 "vaddss $dst,$dst,$tmp\n\t" 4775 "pshufd $tmp,$tmp2,0x02\n\t" 4776 "vaddss $dst,$dst,$tmp\n\t" 4777 "pshufd $tmp,$tmp2,0x03\n\t" 4778 "vaddss $dst,$dst,$tmp\n\t" 4779 "vextractf32x4 $tmp2,$src2,0x3\n\t" 4780 "vaddss $dst,$dst,$tmp2\n\t" 4781 "pshufd $tmp,$tmp2,0x01\n\t" 4782 "vaddss $dst,$dst,$tmp\n\t" 4783 "pshufd $tmp,$tmp2,0x02\n\t" 4784 "vaddss $dst,$dst,$tmp\n\t" 4785 "pshufd $tmp,$tmp2,0x03\n\t" 4786 "vaddss $dst,$dst,$tmp\t! add reduction16F" %} 4787 ins_encode %{ 4788 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 4789 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 4790 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4791 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 4792 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4793 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 4794 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4795 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 4796 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 4797 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 4798 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4799 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 4800 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4801 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 4802 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4803 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 4804 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 4805 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 4806 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4807 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 4808 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4809 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 4810 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4811 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 4812 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 4813 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 4814 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4815 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 4816 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4817 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 4818 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4819 %} 4820 ins_pipe( pipe_slow ); 4821 %} 4822 4823 instruct rsadd2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ 4824 predicate(UseSSE >= 1 && UseAVX == 0); 4825 match(Set dst (AddReductionVD dst src2)); 4826 effect(TEMP tmp, TEMP dst); 4827 format %{ "addsd $dst,$src2\n\t" 4828 "pshufd $tmp,$src2,0xE\n\t" 4829 "addsd $dst,$tmp\t! add reduction2D" %} 4830 ins_encode %{ 4831 __ addsd($dst$$XMMRegister, $src2$$XMMRegister); 4832 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 4833 __ addsd($dst$$XMMRegister, $tmp$$XMMRegister); 4834 %} 4835 ins_pipe( pipe_slow ); 4836 %} 4837 4838 instruct rvadd2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ 4839 predicate(UseAVX > 0); 4840 match(Set dst (AddReductionVD dst src2)); 4841 effect(TEMP tmp, TEMP dst); 4842 format %{ "vaddsd $dst,$dst,$src2\n\t" 4843 "pshufd $tmp,$src2,0xE\n\t" 4844 "vaddsd $dst,$dst,$tmp\t! add reduction2D" %} 4845 ins_encode %{ 4846 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 4847 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 4848 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4849 %} 4850 ins_pipe( pipe_slow ); 4851 %} 4852 4853 instruct rvadd4D_reduction_reg(regD dst, vecY src2, regD tmp, regD tmp2) %{ 4854 predicate(UseAVX > 0); 4855 match(Set dst (AddReductionVD dst src2)); 4856 effect(TEMP tmp, TEMP dst, TEMP tmp2); 4857 format %{ "vaddsd $dst,$dst,$src2\n\t" 4858 "pshufd $tmp,$src2,0xE\n\t" 4859 "vaddsd $dst,$dst,$tmp\n\t" 4860 "vextractf32x4 $tmp2,$src2,0x1\n\t" 4861 "vaddsd $dst,$dst,$tmp2\n\t" 4862 "pshufd $tmp,$tmp2,0xE\n\t" 4863 "vaddsd $dst,$dst,$tmp\t! add reduction4D" %} 4864 ins_encode %{ 4865 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 4866 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 4867 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4868 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 4869 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 4870 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 4871 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4872 %} 4873 ins_pipe( pipe_slow ); 4874 %} 4875 4876 instruct rvadd8D_reduction_reg(regD dst, vecZ src2, regD tmp, regD tmp2) %{ 4877 predicate(UseAVX > 2); 4878 match(Set dst (AddReductionVD dst src2)); 4879 effect(TEMP tmp, TEMP dst, TEMP tmp2); 4880 format %{ "vaddsd $dst,$dst,$src2\n\t" 4881 "pshufd $tmp,$src2,0xE\n\t" 4882 "vaddsd $dst,$dst,$tmp\n\t" 4883 "vextractf32x4 $tmp2,$src2,0x1\n\t" 4884 "vaddsd $dst,$dst,$tmp2\n\t" 4885 "pshufd $tmp,$tmp2,0xE\n\t" 4886 "vaddsd $dst,$dst,$tmp\n\t" 4887 "vextractf32x4 $tmp2,$src2,0x2\n\t" 4888 "vaddsd $dst,$dst,$tmp2\n\t" 4889 "pshufd $tmp,$tmp2,0xE\n\t" 4890 "vaddsd $dst,$dst,$tmp\n\t" 4891 "vextractf32x4 $tmp2,$src2,0x3\n\t" 4892 "vaddsd $dst,$dst,$tmp2\n\t" 4893 "pshufd $tmp,$tmp2,0xE\n\t" 4894 "vaddsd $dst,$dst,$tmp\t! add reduction8D" %} 4895 ins_encode %{ 4896 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 4897 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 4898 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4899 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 4900 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 4901 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 4902 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4903 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 4904 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 4905 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 4906 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4907 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 4908 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 4909 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 4910 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4911 %} 4912 ins_pipe( pipe_slow ); 4913 %} 4914 4915 instruct rsmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 4916 predicate(UseSSE > 3 && UseAVX == 0); 4917 match(Set dst (MulReductionVI src1 src2)); 4918 effect(TEMP tmp, TEMP tmp2); 4919 format %{ "pshufd $tmp2,$src2,0x1\n\t" 4920 "pmulld $tmp2,$src2\n\t" 4921 "movd $tmp,$src1\n\t" 4922 "pmulld $tmp2,$tmp\n\t" 4923 "movd $dst,$tmp2\t! mul reduction2I" %} 4924 ins_encode %{ 4925 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 4926 __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister); 4927 __ movdl($tmp$$XMMRegister, $src1$$Register); 4928 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 4929 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4930 %} 4931 ins_pipe( pipe_slow ); 4932 %} 4933 4934 instruct rvmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 4935 predicate(UseAVX > 0); 4936 match(Set dst (MulReductionVI src1 src2)); 4937 effect(TEMP tmp, TEMP tmp2); 4938 format %{ "pshufd $tmp2,$src2,0x1\n\t" 4939 "vpmulld $tmp,$src2,$tmp2\n\t" 4940 "movd $tmp2,$src1\n\t" 4941 "vpmulld $tmp2,$tmp,$tmp2\n\t" 4942 "movd $dst,$tmp2\t! mul reduction2I" %} 4943 ins_encode %{ 4944 int vector_len = 0; 4945 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 4946 __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4947 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4948 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4949 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4950 %} 4951 ins_pipe( pipe_slow ); 4952 %} 4953 4954 instruct rsmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 4955 predicate(UseSSE > 3 && UseAVX == 0); 4956 match(Set dst (MulReductionVI src1 src2)); 4957 effect(TEMP tmp, TEMP tmp2); 4958 format %{ "pshufd $tmp2,$src2,0xE\n\t" 4959 "pmulld $tmp2,$src2\n\t" 4960 "pshufd $tmp,$tmp2,0x1\n\t" 4961 "pmulld $tmp2,$tmp\n\t" 4962 "movd $tmp,$src1\n\t" 4963 "pmulld $tmp2,$tmp\n\t" 4964 "movd $dst,$tmp2\t! mul reduction4I" %} 4965 ins_encode %{ 4966 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 4967 __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister); 4968 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x1); 4969 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 4970 __ movdl($tmp$$XMMRegister, $src1$$Register); 4971 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 4972 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4973 %} 4974 ins_pipe( pipe_slow ); 4975 %} 4976 4977 instruct rvmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 4978 predicate(UseAVX > 0); 4979 match(Set dst (MulReductionVI src1 src2)); 4980 effect(TEMP tmp, TEMP tmp2); 4981 format %{ "pshufd $tmp2,$src2,0xE\n\t" 4982 "vpmulld $tmp,$src2,$tmp2\n\t" 4983 "pshufd $tmp2,$tmp,0x1\n\t" 4984 "vpmulld $tmp,$tmp,$tmp2\n\t" 4985 "movd $tmp2,$src1\n\t" 4986 "vpmulld $tmp2,$tmp,$tmp2\n\t" 4987 "movd $dst,$tmp2\t! mul reduction4I" %} 4988 ins_encode %{ 4989 int vector_len = 0; 4990 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 4991 __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4992 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 4993 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4994 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4995 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4996 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4997 %} 4998 ins_pipe( pipe_slow ); 4999 %} 5000 5001 instruct rvmul8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 5002 predicate(UseAVX > 0); 5003 match(Set dst (MulReductionVI src1 src2)); 5004 effect(TEMP tmp, TEMP tmp2); 5005 format %{ "vextracti128_high $tmp,$src2\n\t" 5006 "vpmulld $tmp,$tmp,$src2\n\t" 5007 "pshufd $tmp2,$tmp,0xE\n\t" 5008 "vpmulld $tmp,$tmp,$tmp2\n\t" 5009 "pshufd $tmp2,$tmp,0x1\n\t" 5010 "vpmulld $tmp,$tmp,$tmp2\n\t" 5011 "movd $tmp2,$src1\n\t" 5012 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5013 "movd $dst,$tmp2\t! mul reduction8I" %} 5014 ins_encode %{ 5015 int vector_len = 0; 5016 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 5017 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 5018 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5019 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5020 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5021 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5022 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5023 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5024 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5025 %} 5026 ins_pipe( pipe_slow ); 5027 %} 5028 5029 instruct rvmul16I_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ 5030 predicate(UseAVX > 2); 5031 match(Set dst (MulReductionVI src1 src2)); 5032 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 5033 format %{ "vextracti64x4_high $tmp3,$src2\n\t" 5034 "vpmulld $tmp3,$tmp3,$src2\n\t" 5035 "vextracti128_high $tmp,$tmp3\n\t" 5036 "vpmulld $tmp,$tmp,$src2\n\t" 5037 "pshufd $tmp2,$tmp,0xE\n\t" 5038 "vpmulld $tmp,$tmp,$tmp2\n\t" 5039 "pshufd $tmp2,$tmp,0x1\n\t" 5040 "vpmulld $tmp,$tmp,$tmp2\n\t" 5041 "movd $tmp2,$src1\n\t" 5042 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5043 "movd $dst,$tmp2\t! mul reduction16I" %} 5044 ins_encode %{ 5045 __ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister); 5046 __ vpmulld($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); 5047 __ vextracti128_high($tmp$$XMMRegister, $tmp3$$XMMRegister); 5048 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); 5049 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5050 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5051 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5052 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5053 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5054 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5055 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5056 %} 5057 ins_pipe( pipe_slow ); 5058 %} 5059 5060 #ifdef _LP64 5061 instruct rvmul2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, regF tmp, regF tmp2) %{ 5062 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 5063 match(Set dst (MulReductionVL src1 src2)); 5064 effect(TEMP tmp, TEMP tmp2); 5065 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5066 "vpmullq $tmp,$src2,$tmp2\n\t" 5067 "movdq $tmp2,$src1\n\t" 5068 "vpmullq $tmp2,$tmp,$tmp2\n\t" 5069 "movdq $dst,$tmp2\t! mul reduction2L" %} 5070 ins_encode %{ 5071 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5072 __ vpmullq($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, 0); 5073 __ movdq($tmp2$$XMMRegister, $src1$$Register); 5074 __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5075 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5076 %} 5077 ins_pipe( pipe_slow ); 5078 %} 5079 5080 instruct rvmul4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{ 5081 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 5082 match(Set dst (MulReductionVL src1 src2)); 5083 effect(TEMP tmp, TEMP tmp2); 5084 format %{ "vextracti128_high $tmp,$src2\n\t" 5085 "vpmullq $tmp2,$tmp,$src2\n\t" 5086 "pshufd $tmp,$tmp2,0xE\n\t" 5087 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5088 "movdq $tmp,$src1\n\t" 5089 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5090 "movdq $dst,$tmp2\t! mul reduction4L" %} 5091 ins_encode %{ 5092 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 5093 __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); 5094 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5095 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5096 __ movdq($tmp$$XMMRegister, $src1$$Register); 5097 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5098 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5099 %} 5100 ins_pipe( pipe_slow ); 5101 %} 5102 5103 instruct rvmul8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{ 5104 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 5105 match(Set dst (MulReductionVL src1 src2)); 5106 effect(TEMP tmp, TEMP tmp2); 5107 format %{ "vextracti64x4_high $tmp2,$src2\n\t" 5108 "vpmullq $tmp2,$tmp2,$src2\n\t" 5109 "vextracti128_high $tmp,$tmp2\n\t" 5110 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5111 "pshufd $tmp,$tmp2,0xE\n\t" 5112 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5113 "movdq $tmp,$src1\n\t" 5114 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5115 "movdq $dst,$tmp2\t! mul reduction8L" %} 5116 ins_encode %{ 5117 __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5118 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 5119 __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); 5120 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5121 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5122 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5123 __ movdq($tmp$$XMMRegister, $src1$$Register); 5124 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5125 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5126 %} 5127 ins_pipe( pipe_slow ); 5128 %} 5129 #endif 5130 5131 instruct rsmul2F_reduction(regF dst, vecD src2, regF tmp) %{ 5132 predicate(UseSSE >= 1 && UseAVX == 0); 5133 match(Set dst (MulReductionVF dst src2)); 5134 effect(TEMP dst, TEMP tmp); 5135 format %{ "mulss $dst,$src2\n\t" 5136 "pshufd $tmp,$src2,0x01\n\t" 5137 "mulss $dst,$tmp\t! mul reduction2F" %} 5138 ins_encode %{ 5139 __ mulss($dst$$XMMRegister, $src2$$XMMRegister); 5140 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5141 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5142 %} 5143 ins_pipe( pipe_slow ); 5144 %} 5145 5146 instruct rvmul2F_reduction_reg(regF dst, vecD src2, regF tmp) %{ 5147 predicate(UseAVX > 0); 5148 match(Set dst (MulReductionVF dst src2)); 5149 effect(TEMP tmp, TEMP dst); 5150 format %{ "vmulss $dst,$dst,$src2\n\t" 5151 "pshufd $tmp,$src2,0x01\n\t" 5152 "vmulss $dst,$dst,$tmp\t! mul reduction2F" %} 5153 ins_encode %{ 5154 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5155 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5156 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5157 %} 5158 ins_pipe( pipe_slow ); 5159 %} 5160 5161 instruct rsmul4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ 5162 predicate(UseSSE >= 1 && UseAVX == 0); 5163 match(Set dst (MulReductionVF dst src2)); 5164 effect(TEMP dst, TEMP tmp); 5165 format %{ "mulss $dst,$src2\n\t" 5166 "pshufd $tmp,$src2,0x01\n\t" 5167 "mulss $dst,$tmp\n\t" 5168 "pshufd $tmp,$src2,0x02\n\t" 5169 "mulss $dst,$tmp\n\t" 5170 "pshufd $tmp,$src2,0x03\n\t" 5171 "mulss $dst,$tmp\t! mul reduction4F" %} 5172 ins_encode %{ 5173 __ mulss($dst$$XMMRegister, $src2$$XMMRegister); 5174 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5175 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5176 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5177 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5178 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5179 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5180 %} 5181 ins_pipe( pipe_slow ); 5182 %} 5183 5184 instruct rvmul4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ 5185 predicate(UseAVX > 0); 5186 match(Set dst (MulReductionVF dst src2)); 5187 effect(TEMP tmp, TEMP dst); 5188 format %{ "vmulss $dst,$dst,$src2\n\t" 5189 "pshufd $tmp,$src2,0x01\n\t" 5190 "vmulss $dst,$dst,$tmp\n\t" 5191 "pshufd $tmp,$src2,0x02\n\t" 5192 "vmulss $dst,$dst,$tmp\n\t" 5193 "pshufd $tmp,$src2,0x03\n\t" 5194 "vmulss $dst,$dst,$tmp\t! mul reduction4F" %} 5195 ins_encode %{ 5196 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5197 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5198 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5199 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5200 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5201 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5202 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5203 %} 5204 ins_pipe( pipe_slow ); 5205 %} 5206 5207 instruct rvmul8F_reduction_reg(regF dst, vecY src2, regF tmp, regF tmp2) %{ 5208 predicate(UseAVX > 0); 5209 match(Set dst (MulReductionVF dst src2)); 5210 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5211 format %{ "vmulss $dst,$dst,$src2\n\t" 5212 "pshufd $tmp,$src2,0x01\n\t" 5213 "vmulss $dst,$dst,$tmp\n\t" 5214 "pshufd $tmp,$src2,0x02\n\t" 5215 "vmulss $dst,$dst,$tmp\n\t" 5216 "pshufd $tmp,$src2,0x03\n\t" 5217 "vmulss $dst,$dst,$tmp\n\t" 5218 "vextractf128_high $tmp2,$src2\n\t" 5219 "vmulss $dst,$dst,$tmp2\n\t" 5220 "pshufd $tmp,$tmp2,0x01\n\t" 5221 "vmulss $dst,$dst,$tmp\n\t" 5222 "pshufd $tmp,$tmp2,0x02\n\t" 5223 "vmulss $dst,$dst,$tmp\n\t" 5224 "pshufd $tmp,$tmp2,0x03\n\t" 5225 "vmulss $dst,$dst,$tmp\t! mul reduction8F" %} 5226 ins_encode %{ 5227 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5228 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5229 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5230 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5231 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5232 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5233 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5234 __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5235 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5236 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5237 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5238 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5239 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5240 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5241 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5242 %} 5243 ins_pipe( pipe_slow ); 5244 %} 5245 5246 instruct rvmul16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{ 5247 predicate(UseAVX > 2); 5248 match(Set dst (MulReductionVF dst src2)); 5249 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5250 format %{ "vmulss $dst,$dst,$src2\n\t" 5251 "pshufd $tmp,$src2,0x01\n\t" 5252 "vmulss $dst,$dst,$tmp\n\t" 5253 "pshufd $tmp,$src2,0x02\n\t" 5254 "vmulss $dst,$dst,$tmp\n\t" 5255 "pshufd $tmp,$src2,0x03\n\t" 5256 "vmulss $dst,$dst,$tmp\n\t" 5257 "vextractf32x4 $tmp2,$src2,0x1\n\t" 5258 "vmulss $dst,$dst,$tmp2\n\t" 5259 "pshufd $tmp,$tmp2,0x01\n\t" 5260 "vmulss $dst,$dst,$tmp\n\t" 5261 "pshufd $tmp,$tmp2,0x02\n\t" 5262 "vmulss $dst,$dst,$tmp\n\t" 5263 "pshufd $tmp,$tmp2,0x03\n\t" 5264 "vmulss $dst,$dst,$tmp\n\t" 5265 "vextractf32x4 $tmp2,$src2,0x2\n\t" 5266 "vmulss $dst,$dst,$tmp2\n\t" 5267 "pshufd $tmp,$tmp2,0x01\n\t" 5268 "vmulss $dst,$dst,$tmp\n\t" 5269 "pshufd $tmp,$tmp2,0x02\n\t" 5270 "vmulss $dst,$dst,$tmp\n\t" 5271 "pshufd $tmp,$tmp2,0x03\n\t" 5272 "vmulss $dst,$dst,$tmp\n\t" 5273 "vextractf32x4 $tmp2,$src2,0x3\n\t" 5274 "vmulss $dst,$dst,$tmp2\n\t" 5275 "pshufd $tmp,$tmp2,0x01\n\t" 5276 "vmulss $dst,$dst,$tmp\n\t" 5277 "pshufd $tmp,$tmp2,0x02\n\t" 5278 "vmulss $dst,$dst,$tmp\n\t" 5279 "pshufd $tmp,$tmp2,0x03\n\t" 5280 "vmulss $dst,$dst,$tmp\t! mul reduction16F" %} 5281 ins_encode %{ 5282 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5283 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5284 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5285 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5286 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5287 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5288 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5289 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5290 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5291 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5292 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5293 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5294 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5295 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5296 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5297 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 5298 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5299 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5300 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5301 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5302 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5303 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5304 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5305 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 5306 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5307 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5308 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5309 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5310 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5311 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5312 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5313 %} 5314 ins_pipe( pipe_slow ); 5315 %} 5316 5317 instruct rsmul2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ 5318 predicate(UseSSE >= 1 && UseAVX == 0); 5319 match(Set dst (MulReductionVD dst src2)); 5320 effect(TEMP dst, TEMP tmp); 5321 format %{ "mulsd $dst,$src2\n\t" 5322 "pshufd $tmp,$src2,0xE\n\t" 5323 "mulsd $dst,$tmp\t! mul reduction2D" %} 5324 ins_encode %{ 5325 __ mulsd($dst$$XMMRegister, $src2$$XMMRegister); 5326 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5327 __ mulsd($dst$$XMMRegister, $tmp$$XMMRegister); 5328 %} 5329 ins_pipe( pipe_slow ); 5330 %} 5331 5332 instruct rvmul2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ 5333 predicate(UseAVX > 0); 5334 match(Set dst (MulReductionVD dst src2)); 5335 effect(TEMP tmp, TEMP dst); 5336 format %{ "vmulsd $dst,$dst,$src2\n\t" 5337 "pshufd $tmp,$src2,0xE\n\t" 5338 "vmulsd $dst,$dst,$tmp\t! mul reduction2D" %} 5339 ins_encode %{ 5340 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5341 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5342 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5343 %} 5344 ins_pipe( pipe_slow ); 5345 %} 5346 5347 instruct rvmul4D_reduction_reg(regD dst, vecY src2, regD tmp, regD tmp2) %{ 5348 predicate(UseAVX > 0); 5349 match(Set dst (MulReductionVD dst src2)); 5350 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5351 format %{ "vmulsd $dst,$dst,$src2\n\t" 5352 "pshufd $tmp,$src2,0xE\n\t" 5353 "vmulsd $dst,$dst,$tmp\n\t" 5354 "vextractf128_high $tmp2,$src2\n\t" 5355 "vmulsd $dst,$dst,$tmp2\n\t" 5356 "pshufd $tmp,$tmp2,0xE\n\t" 5357 "vmulsd $dst,$dst,$tmp\t! mul reduction4D" %} 5358 ins_encode %{ 5359 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5360 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5361 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5362 __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5363 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5364 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5365 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5366 %} 5367 ins_pipe( pipe_slow ); 5368 %} 5369 5370 instruct rvmul8D_reduction_reg(regD dst, vecZ src2, regD tmp, regD tmp2) %{ 5371 predicate(UseAVX > 2); 5372 match(Set dst (MulReductionVD dst src2)); 5373 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5374 format %{ "vmulsd $dst,$dst,$src2\n\t" 5375 "pshufd $tmp,$src2,0xE\n\t" 5376 "vmulsd $dst,$dst,$tmp\n\t" 5377 "vextractf32x4 $tmp2,$src2,0x1\n\t" 5378 "vmulsd $dst,$dst,$tmp2\n\t" 5379 "pshufd $tmp,$src2,0xE\n\t" 5380 "vmulsd $dst,$dst,$tmp\n\t" 5381 "vextractf32x4 $tmp2,$src2,0x2\n\t" 5382 "vmulsd $dst,$dst,$tmp2\n\t" 5383 "pshufd $tmp,$tmp2,0xE\n\t" 5384 "vmulsd $dst,$dst,$tmp\n\t" 5385 "vextractf32x4 $tmp2,$src2,0x3\n\t" 5386 "vmulsd $dst,$dst,$tmp2\n\t" 5387 "pshufd $tmp,$tmp2,0xE\n\t" 5388 "vmulsd $dst,$dst,$tmp\t! mul reduction8D" %} 5389 ins_encode %{ 5390 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5391 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5392 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5393 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5394 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5395 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5396 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5397 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 5398 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5399 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5400 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5401 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 5402 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5403 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5404 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5405 %} 5406 ins_pipe( pipe_slow ); 5407 %} 5408 5409 // ====================VECTOR ARITHMETIC======================================= 5410 5411 // --------------------------------- ADD -------------------------------------- 5412 5413 // Bytes vector add 5414 instruct vadd4B(vecS dst, vecS src) %{ 5415 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 5416 match(Set dst (AddVB dst src)); 5417 format %{ "paddb $dst,$src\t! add packed4B" %} 5418 ins_encode %{ 5419 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5420 %} 5421 ins_pipe( pipe_slow ); 5422 %} 5423 5424 instruct vadd4B_reg_avx(vecS dst, vecS src1, vecS src2) %{ 5425 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 5426 match(Set dst (AddVB src1 src2)); 5427 format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %} 5428 ins_encode %{ 5429 int vector_len = 0; 5430 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5431 %} 5432 ins_pipe( pipe_slow ); 5433 %} 5434 5435 instruct vadd4B_reg_evex(vecS dst, vecS src1, vecS src2) %{ 5436 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 5437 match(Set dst (AddVB src1 src2)); 5438 format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %} 5439 ins_encode %{ 5440 int vector_len = 0; 5441 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5442 %} 5443 ins_pipe( pipe_slow ); 5444 %} 5445 5446 instruct vadd4B_reg_evex_special(vecS dst, vecS src1, vecS src2) %{ 5447 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 5448 match(Set dst (AddVB dst src2)); 5449 effect(TEMP src1); 5450 format %{ "vpaddb $dst,$dst,$src2\t! add packed4B" %} 5451 ins_encode %{ 5452 int vector_len = 0; 5453 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5454 %} 5455 ins_pipe( pipe_slow ); 5456 %} 5457 5458 instruct vadd4B_mem_avx(vecS dst, vecS src, memory mem) %{ 5459 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 5460 match(Set dst (AddVB src (LoadVector mem))); 5461 format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %} 5462 ins_encode %{ 5463 int vector_len = 0; 5464 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5465 %} 5466 ins_pipe( pipe_slow ); 5467 %} 5468 5469 instruct vadd4B_mem_evex(vecS dst, vecS src, memory mem) %{ 5470 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 5471 match(Set dst (AddVB src (LoadVector mem))); 5472 format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %} 5473 ins_encode %{ 5474 int vector_len = 0; 5475 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5476 %} 5477 ins_pipe( pipe_slow ); 5478 %} 5479 5480 instruct vadd4B_mem_evex_special(vecS dst, vecS src, memory mem) %{ 5481 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 5482 match(Set dst (AddVB dst (LoadVector mem))); 5483 effect(TEMP src); 5484 format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %} 5485 ins_encode %{ 5486 int vector_len = 0; 5487 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5488 %} 5489 ins_pipe( pipe_slow ); 5490 %} 5491 5492 instruct vadd8B(vecD dst, vecD src) %{ 5493 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 5494 match(Set dst (AddVB dst src)); 5495 format %{ "paddb $dst,$src\t! add packed8B" %} 5496 ins_encode %{ 5497 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5498 %} 5499 ins_pipe( pipe_slow ); 5500 %} 5501 5502 instruct vadd8B_reg_avx(vecD dst, vecD src1, vecD src2) %{ 5503 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 5504 match(Set dst (AddVB src1 src2)); 5505 format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %} 5506 ins_encode %{ 5507 int vector_len = 0; 5508 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5509 %} 5510 ins_pipe( pipe_slow ); 5511 %} 5512 5513 instruct vadd8B_reg_evex(vecD dst, vecD src1, vecD src2) %{ 5514 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 5515 match(Set dst (AddVB src1 src2)); 5516 format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %} 5517 ins_encode %{ 5518 int vector_len = 0; 5519 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5520 %} 5521 ins_pipe( pipe_slow ); 5522 %} 5523 5524 instruct vadd8B_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 5525 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 5526 match(Set dst (AddVB dst src2)); 5527 effect(TEMP src1); 5528 format %{ "vpaddb $dst,$dst,$src2\t! add packed8B" %} 5529 ins_encode %{ 5530 int vector_len = 0; 5531 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5532 %} 5533 ins_pipe( pipe_slow ); 5534 %} 5535 5536 instruct vadd8B_mem_avx(vecD dst, vecD src, memory mem) %{ 5537 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 5538 match(Set dst (AddVB src (LoadVector mem))); 5539 format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %} 5540 ins_encode %{ 5541 int vector_len = 0; 5542 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5543 %} 5544 ins_pipe( pipe_slow ); 5545 %} 5546 5547 instruct vadd8B_mem_evex(vecD dst, vecD src, memory mem) %{ 5548 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 5549 match(Set dst (AddVB src (LoadVector mem))); 5550 format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %} 5551 ins_encode %{ 5552 int vector_len = 0; 5553 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5554 %} 5555 ins_pipe( pipe_slow ); 5556 %} 5557 5558 instruct vadd8B_mem_evex_special(vecD dst, vecD src, memory mem) %{ 5559 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 5560 match(Set dst (AddVB dst (LoadVector mem))); 5561 effect(TEMP src); 5562 format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %} 5563 ins_encode %{ 5564 int vector_len = 0; 5565 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5566 %} 5567 ins_pipe( pipe_slow ); 5568 %} 5569 5570 instruct vadd16B(vecX dst, vecX src) %{ 5571 predicate(UseAVX == 0 && n->as_Vector()->length() == 16); 5572 match(Set dst (AddVB dst src)); 5573 format %{ "paddb $dst,$src\t! add packed16B" %} 5574 ins_encode %{ 5575 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5576 %} 5577 ins_pipe( pipe_slow ); 5578 %} 5579 5580 instruct vadd16B_reg_avx(vecX dst, vecX src1, vecX src2) %{ 5581 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); 5582 match(Set dst (AddVB src1 src2)); 5583 format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %} 5584 ins_encode %{ 5585 int vector_len = 0; 5586 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5587 %} 5588 ins_pipe( pipe_slow ); 5589 %} 5590 5591 instruct vadd16B_reg_evex(vecX dst, vecX src1, vecX src2) %{ 5592 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 5593 match(Set dst (AddVB src1 src2)); 5594 format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %} 5595 ins_encode %{ 5596 int vector_len = 0; 5597 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5598 %} 5599 ins_pipe( pipe_slow ); 5600 %} 5601 5602 instruct vadd16B_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 5603 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 5604 match(Set dst (AddVB dst src2)); 5605 effect(TEMP src1); 5606 format %{ "vpaddb $dst,$dst,$src2\t! add packed16B" %} 5607 ins_encode %{ 5608 int vector_len = 0; 5609 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5610 %} 5611 ins_pipe( pipe_slow ); 5612 %} 5613 5614 instruct vadd16B_mem_avx(vecX dst, vecX src, memory mem) %{ 5615 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); 5616 match(Set dst (AddVB src (LoadVector mem))); 5617 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} 5618 ins_encode %{ 5619 int vector_len = 0; 5620 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5621 %} 5622 ins_pipe( pipe_slow ); 5623 %} 5624 5625 instruct vadd16B_mem_evex(vecX dst, vecX src, memory mem) %{ 5626 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 5627 match(Set dst (AddVB src (LoadVector mem))); 5628 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} 5629 ins_encode %{ 5630 int vector_len = 0; 5631 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5632 %} 5633 ins_pipe( pipe_slow ); 5634 %} 5635 5636 instruct vadd16B_mem_evex_special(vecX dst, vecX src, memory mem) %{ 5637 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 5638 match(Set dst (AddVB dst (LoadVector mem))); 5639 effect(TEMP src); 5640 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} 5641 ins_encode %{ 5642 int vector_len = 0; 5643 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5644 %} 5645 ins_pipe( pipe_slow ); 5646 %} 5647 5648 instruct vadd32B_reg_avx(vecY dst, vecY src1, vecY src2) %{ 5649 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32); 5650 match(Set dst (AddVB src1 src2)); 5651 format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %} 5652 ins_encode %{ 5653 int vector_len = 1; 5654 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5655 %} 5656 ins_pipe( pipe_slow ); 5657 %} 5658 5659 instruct vadd32B_reg_evex(vecY dst, vecY src1, vecY src2) %{ 5660 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 5661 match(Set dst (AddVB src1 src2)); 5662 format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %} 5663 ins_encode %{ 5664 int vector_len = 1; 5665 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5666 %} 5667 ins_pipe( pipe_slow ); 5668 %} 5669 5670 instruct vadd32B_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 5671 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32); 5672 match(Set dst (AddVB dst src2)); 5673 effect(TEMP src1); 5674 format %{ "vpaddb $dst,$dst,$src2\t! add packed32B" %} 5675 ins_encode %{ 5676 int vector_len = 1; 5677 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5678 %} 5679 ins_pipe( pipe_slow ); 5680 %} 5681 5682 instruct vadd32B_mem_avx(vecY dst, vecY src, memory mem) %{ 5683 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32); 5684 match(Set dst (AddVB src (LoadVector mem))); 5685 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} 5686 ins_encode %{ 5687 int vector_len = 1; 5688 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5689 %} 5690 ins_pipe( pipe_slow ); 5691 %} 5692 5693 instruct vadd32B_mem_evex(vecY dst, vecY src, memory mem) %{ 5694 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 5695 match(Set dst (AddVB src (LoadVector mem))); 5696 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} 5697 ins_encode %{ 5698 int vector_len = 1; 5699 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5700 %} 5701 ins_pipe( pipe_slow ); 5702 %} 5703 5704 instruct vadd32B_mem_evex_special(vecY dst, vecY src, memory mem) %{ 5705 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 5706 match(Set dst (AddVB dst (LoadVector mem))); 5707 effect(TEMP src); 5708 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} 5709 ins_encode %{ 5710 int vector_len = 1; 5711 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5712 %} 5713 ins_pipe( pipe_slow ); 5714 %} 5715 5716 instruct vadd64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 5717 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 5718 match(Set dst (AddVB src1 src2)); 5719 format %{ "vpaddb $dst,$src1,$src2\t! add packed64B" %} 5720 ins_encode %{ 5721 int vector_len = 2; 5722 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5723 %} 5724 ins_pipe( pipe_slow ); 5725 %} 5726 5727 instruct vadd64B_mem(vecZ dst, vecZ src, memory mem) %{ 5728 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 5729 match(Set dst (AddVB src (LoadVector mem))); 5730 format %{ "vpaddb $dst,$src,$mem\t! add packed64B" %} 5731 ins_encode %{ 5732 int vector_len = 2; 5733 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5734 %} 5735 ins_pipe( pipe_slow ); 5736 %} 5737 5738 // Shorts/Chars vector add 5739 instruct vadd2S(vecS dst, vecS src) %{ 5740 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 5741 match(Set dst (AddVS dst src)); 5742 format %{ "paddw $dst,$src\t! add packed2S" %} 5743 ins_encode %{ 5744 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 5745 %} 5746 ins_pipe( pipe_slow ); 5747 %} 5748 5749 instruct vadd2S_reg_avx(vecS dst, vecS src1, vecS src2) %{ 5750 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 5751 match(Set dst (AddVS src1 src2)); 5752 format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %} 5753 ins_encode %{ 5754 int vector_len = 0; 5755 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5756 %} 5757 ins_pipe( pipe_slow ); 5758 %} 5759 5760 instruct vadd2S_reg_evex(vecS dst, vecS src1, vecS src2) %{ 5761 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 5762 match(Set dst (AddVS src1 src2)); 5763 format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %} 5764 ins_encode %{ 5765 int vector_len = 0; 5766 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5767 %} 5768 ins_pipe( pipe_slow ); 5769 %} 5770 5771 instruct vadd2S_reg_evex_special(vecS dst, vecS src1, vecS src2) %{ 5772 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 5773 match(Set dst (AddVS dst src2)); 5774 effect(TEMP src1); 5775 format %{ "vpaddw $dst,$dst,$src2\t! add packed2S" %} 5776 ins_encode %{ 5777 int vector_len = 0; 5778 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5779 %} 5780 ins_pipe( pipe_slow ); 5781 %} 5782 5783 instruct vadd2S_mem_avx(vecS dst, vecS src, memory mem) %{ 5784 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 5785 match(Set dst (AddVS src (LoadVector mem))); 5786 format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %} 5787 ins_encode %{ 5788 int vector_len = 0; 5789 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5790 %} 5791 ins_pipe( pipe_slow ); 5792 %} 5793 5794 instruct vadd2S_mem_evex(vecS dst, vecS src, memory mem) %{ 5795 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 5796 match(Set dst (AddVS src (LoadVector mem))); 5797 format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %} 5798 ins_encode %{ 5799 int vector_len = 0; 5800 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5801 %} 5802 ins_pipe( pipe_slow ); 5803 %} 5804 5805 instruct vadd2S_mem_evex_special(vecS dst, vecS src, memory mem) %{ 5806 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 5807 match(Set dst (AddVS dst (LoadVector mem))); 5808 effect(TEMP src); 5809 format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %} 5810 ins_encode %{ 5811 int vector_len = 0; 5812 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5813 %} 5814 ins_pipe( pipe_slow ); 5815 %} 5816 5817 instruct vadd4S(vecD dst, vecD src) %{ 5818 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 5819 match(Set dst (AddVS dst src)); 5820 format %{ "paddw $dst,$src\t! add packed4S" %} 5821 ins_encode %{ 5822 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 5823 %} 5824 ins_pipe( pipe_slow ); 5825 %} 5826 5827 instruct vadd4S_reg_avx(vecD dst, vecD src1, vecD src2) %{ 5828 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 5829 match(Set dst (AddVS src1 src2)); 5830 format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %} 5831 ins_encode %{ 5832 int vector_len = 0; 5833 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5834 %} 5835 ins_pipe( pipe_slow ); 5836 %} 5837 5838 instruct vadd4S_reg_evex(vecD dst, vecD src1, vecD src2) %{ 5839 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 5840 match(Set dst (AddVS src1 src2)); 5841 format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %} 5842 ins_encode %{ 5843 int vector_len = 0; 5844 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5845 %} 5846 ins_pipe( pipe_slow ); 5847 %} 5848 5849 instruct vadd4S_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 5850 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 5851 match(Set dst (AddVS dst src2)); 5852 effect(TEMP src1); 5853 format %{ "vpaddw $dst,$dst,$src2\t! add packed4S" %} 5854 ins_encode %{ 5855 int vector_len = 0; 5856 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5857 %} 5858 ins_pipe( pipe_slow ); 5859 %} 5860 5861 instruct vadd4S_mem_avx(vecD dst, vecD src, memory mem) %{ 5862 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 5863 match(Set dst (AddVS src (LoadVector mem))); 5864 format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %} 5865 ins_encode %{ 5866 int vector_len = 0; 5867 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5868 %} 5869 ins_pipe( pipe_slow ); 5870 %} 5871 5872 instruct vadd4S_mem_evex(vecD dst, vecD src, memory mem) %{ 5873 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 5874 match(Set dst (AddVS src (LoadVector mem))); 5875 format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %} 5876 ins_encode %{ 5877 int vector_len = 0; 5878 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5879 %} 5880 ins_pipe( pipe_slow ); 5881 %} 5882 5883 instruct vadd4S_mem_evex_special(vecD dst, vecD src, memory mem) %{ 5884 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 5885 match(Set dst (AddVS dst (LoadVector mem))); 5886 effect(TEMP src); 5887 format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %} 5888 ins_encode %{ 5889 int vector_len = 0; 5890 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5891 %} 5892 ins_pipe( pipe_slow ); 5893 %} 5894 5895 instruct vadd8S(vecX dst, vecX src) %{ 5896 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 5897 match(Set dst (AddVS dst src)); 5898 format %{ "paddw $dst,$src\t! add packed8S" %} 5899 ins_encode %{ 5900 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 5901 %} 5902 ins_pipe( pipe_slow ); 5903 %} 5904 5905 instruct vadd8S_reg_avx(vecX dst, vecX src1, vecX src2) %{ 5906 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 5907 match(Set dst (AddVS src1 src2)); 5908 format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %} 5909 ins_encode %{ 5910 int vector_len = 0; 5911 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5912 %} 5913 ins_pipe( pipe_slow ); 5914 %} 5915 5916 instruct vadd8S_reg_evex(vecX dst, vecX src1, vecX src2) %{ 5917 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 5918 match(Set dst (AddVS src1 src2)); 5919 format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %} 5920 ins_encode %{ 5921 int vector_len = 0; 5922 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5923 %} 5924 ins_pipe( pipe_slow ); 5925 %} 5926 5927 instruct vadd8S_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 5928 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 5929 match(Set dst (AddVS dst src2)); 5930 effect(TEMP src1); 5931 format %{ "vpaddw $dst,$dst,$src2\t! add packed8S" %} 5932 ins_encode %{ 5933 int vector_len = 0; 5934 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5935 %} 5936 ins_pipe( pipe_slow ); 5937 %} 5938 5939 instruct vadd8S_mem_avx(vecX dst, vecX src, memory mem) %{ 5940 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 5941 match(Set dst (AddVS src (LoadVector mem))); 5942 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} 5943 ins_encode %{ 5944 int vector_len = 0; 5945 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5946 %} 5947 ins_pipe( pipe_slow ); 5948 %} 5949 5950 instruct vadd8S_mem_evex(vecX dst, vecX src, memory mem) %{ 5951 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 5952 match(Set dst (AddVS src (LoadVector mem))); 5953 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} 5954 ins_encode %{ 5955 int vector_len = 0; 5956 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5957 %} 5958 ins_pipe( pipe_slow ); 5959 %} 5960 5961 instruct vadd8S_mem_evex_special(vecX dst, vecX src, memory mem) %{ 5962 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 5963 match(Set dst (AddVS dst (LoadVector mem))); 5964 effect(TEMP src); 5965 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} 5966 ins_encode %{ 5967 int vector_len = 0; 5968 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5969 %} 5970 ins_pipe( pipe_slow ); 5971 %} 5972 5973 instruct vadd16S_reg_avx(vecY dst, vecY src1, vecY src2) %{ 5974 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 5975 match(Set dst (AddVS src1 src2)); 5976 format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %} 5977 ins_encode %{ 5978 int vector_len = 1; 5979 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5980 %} 5981 ins_pipe( pipe_slow ); 5982 %} 5983 5984 instruct vadd16S_reg_evex(vecY dst, vecY src1, vecY src2) %{ 5985 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 5986 match(Set dst (AddVS src1 src2)); 5987 format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %} 5988 ins_encode %{ 5989 int vector_len = 1; 5990 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5991 %} 5992 ins_pipe( pipe_slow ); 5993 %} 5994 5995 instruct vadd16S_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 5996 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 5997 match(Set dst (AddVS dst src2)); 5998 effect(TEMP src1); 5999 format %{ "vpaddw $dst,$dst,$src2\t! add packed16S" %} 6000 ins_encode %{ 6001 int vector_len = 1; 6002 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6003 %} 6004 ins_pipe( pipe_slow ); 6005 %} 6006 6007 instruct vadd16S_mem_avx(vecY dst, vecY src, memory mem) %{ 6008 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 6009 match(Set dst (AddVS src (LoadVector mem))); 6010 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} 6011 ins_encode %{ 6012 int vector_len = 1; 6013 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6014 %} 6015 ins_pipe( pipe_slow ); 6016 %} 6017 6018 instruct vadd16S_mem_evex(vecY dst, vecY src, memory mem) %{ 6019 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 6020 match(Set dst (AddVS src (LoadVector mem))); 6021 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} 6022 ins_encode %{ 6023 int vector_len = 1; 6024 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6025 %} 6026 ins_pipe( pipe_slow ); 6027 %} 6028 6029 instruct vadd16S_mem_evex_special(vecY dst, vecY src, memory mem) %{ 6030 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 6031 match(Set dst (AddVS dst (LoadVector mem))); 6032 effect(TEMP src); 6033 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} 6034 ins_encode %{ 6035 int vector_len = 1; 6036 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6037 %} 6038 ins_pipe( pipe_slow ); 6039 %} 6040 6041 instruct vadd32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6042 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6043 match(Set dst (AddVS src1 src2)); 6044 format %{ "vpaddw $dst,$src1,$src2\t! add packed32S" %} 6045 ins_encode %{ 6046 int vector_len = 2; 6047 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6048 %} 6049 ins_pipe( pipe_slow ); 6050 %} 6051 6052 instruct vadd32S_mem(vecZ dst, vecZ src, memory mem) %{ 6053 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6054 match(Set dst (AddVS src (LoadVector mem))); 6055 format %{ "vpaddw $dst,$src,$mem\t! add packed32S" %} 6056 ins_encode %{ 6057 int vector_len = 2; 6058 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6059 %} 6060 ins_pipe( pipe_slow ); 6061 %} 6062 6063 // Integers vector add 6064 instruct vadd2I(vecD dst, vecD src) %{ 6065 predicate(n->as_Vector()->length() == 2); 6066 match(Set dst (AddVI dst src)); 6067 format %{ "paddd $dst,$src\t! add packed2I" %} 6068 ins_encode %{ 6069 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 6070 %} 6071 ins_pipe( pipe_slow ); 6072 %} 6073 6074 instruct vadd2I_reg(vecD dst, vecD src1, vecD src2) %{ 6075 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6076 match(Set dst (AddVI src1 src2)); 6077 format %{ "vpaddd $dst,$src1,$src2\t! add packed2I" %} 6078 ins_encode %{ 6079 int vector_len = 0; 6080 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6081 %} 6082 ins_pipe( pipe_slow ); 6083 %} 6084 6085 instruct vadd2I_mem(vecD dst, vecD src, memory mem) %{ 6086 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6087 match(Set dst (AddVI src (LoadVector mem))); 6088 format %{ "vpaddd $dst,$src,$mem\t! add packed2I" %} 6089 ins_encode %{ 6090 int vector_len = 0; 6091 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6092 %} 6093 ins_pipe( pipe_slow ); 6094 %} 6095 6096 instruct vadd4I(vecX dst, vecX src) %{ 6097 predicate(n->as_Vector()->length() == 4); 6098 match(Set dst (AddVI dst src)); 6099 format %{ "paddd $dst,$src\t! add packed4I" %} 6100 ins_encode %{ 6101 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 6102 %} 6103 ins_pipe( pipe_slow ); 6104 %} 6105 6106 instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{ 6107 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6108 match(Set dst (AddVI src1 src2)); 6109 format %{ "vpaddd $dst,$src1,$src2\t! add packed4I" %} 6110 ins_encode %{ 6111 int vector_len = 0; 6112 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6113 %} 6114 ins_pipe( pipe_slow ); 6115 %} 6116 6117 instruct vadd4I_mem(vecX dst, vecX src, memory mem) %{ 6118 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6119 match(Set dst (AddVI src (LoadVector mem))); 6120 format %{ "vpaddd $dst,$src,$mem\t! add packed4I" %} 6121 ins_encode %{ 6122 int vector_len = 0; 6123 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6124 %} 6125 ins_pipe( pipe_slow ); 6126 %} 6127 6128 instruct vadd8I_reg(vecY dst, vecY src1, vecY src2) %{ 6129 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 6130 match(Set dst (AddVI src1 src2)); 6131 format %{ "vpaddd $dst,$src1,$src2\t! add packed8I" %} 6132 ins_encode %{ 6133 int vector_len = 1; 6134 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6135 %} 6136 ins_pipe( pipe_slow ); 6137 %} 6138 6139 instruct vadd8I_mem(vecY dst, vecY src, memory mem) %{ 6140 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 6141 match(Set dst (AddVI src (LoadVector mem))); 6142 format %{ "vpaddd $dst,$src,$mem\t! add packed8I" %} 6143 ins_encode %{ 6144 int vector_len = 1; 6145 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6146 %} 6147 ins_pipe( pipe_slow ); 6148 %} 6149 6150 instruct vadd16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6151 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6152 match(Set dst (AddVI src1 src2)); 6153 format %{ "vpaddd $dst,$src1,$src2\t! add packed16I" %} 6154 ins_encode %{ 6155 int vector_len = 2; 6156 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6157 %} 6158 ins_pipe( pipe_slow ); 6159 %} 6160 6161 instruct vadd16I_mem(vecZ dst, vecZ src, memory mem) %{ 6162 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6163 match(Set dst (AddVI src (LoadVector mem))); 6164 format %{ "vpaddd $dst,$src,$mem\t! add packed16I" %} 6165 ins_encode %{ 6166 int vector_len = 2; 6167 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6168 %} 6169 ins_pipe( pipe_slow ); 6170 %} 6171 6172 // Longs vector add 6173 instruct vadd2L(vecX dst, vecX src) %{ 6174 predicate(n->as_Vector()->length() == 2); 6175 match(Set dst (AddVL dst src)); 6176 format %{ "paddq $dst,$src\t! add packed2L" %} 6177 ins_encode %{ 6178 __ paddq($dst$$XMMRegister, $src$$XMMRegister); 6179 %} 6180 ins_pipe( pipe_slow ); 6181 %} 6182 6183 instruct vadd2L_reg(vecX dst, vecX src1, vecX src2) %{ 6184 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6185 match(Set dst (AddVL src1 src2)); 6186 format %{ "vpaddq $dst,$src1,$src2\t! add packed2L" %} 6187 ins_encode %{ 6188 int vector_len = 0; 6189 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6190 %} 6191 ins_pipe( pipe_slow ); 6192 %} 6193 6194 instruct vadd2L_mem(vecX dst, vecX src, memory mem) %{ 6195 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6196 match(Set dst (AddVL src (LoadVector mem))); 6197 format %{ "vpaddq $dst,$src,$mem\t! add packed2L" %} 6198 ins_encode %{ 6199 int vector_len = 0; 6200 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6201 %} 6202 ins_pipe( pipe_slow ); 6203 %} 6204 6205 instruct vadd4L_reg(vecY dst, vecY src1, vecY src2) %{ 6206 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 6207 match(Set dst (AddVL src1 src2)); 6208 format %{ "vpaddq $dst,$src1,$src2\t! add packed4L" %} 6209 ins_encode %{ 6210 int vector_len = 1; 6211 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6212 %} 6213 ins_pipe( pipe_slow ); 6214 %} 6215 6216 instruct vadd4L_mem(vecY dst, vecY src, memory mem) %{ 6217 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 6218 match(Set dst (AddVL src (LoadVector mem))); 6219 format %{ "vpaddq $dst,$src,$mem\t! add packed4L" %} 6220 ins_encode %{ 6221 int vector_len = 1; 6222 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6223 %} 6224 ins_pipe( pipe_slow ); 6225 %} 6226 6227 instruct vadd8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6228 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6229 match(Set dst (AddVL src1 src2)); 6230 format %{ "vpaddq $dst,$src1,$src2\t! add packed8L" %} 6231 ins_encode %{ 6232 int vector_len = 2; 6233 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6234 %} 6235 ins_pipe( pipe_slow ); 6236 %} 6237 6238 instruct vadd8L_mem(vecZ dst, vecZ src, memory mem) %{ 6239 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6240 match(Set dst (AddVL src (LoadVector mem))); 6241 format %{ "vpaddq $dst,$src,$mem\t! add packed8L" %} 6242 ins_encode %{ 6243 int vector_len = 2; 6244 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6245 %} 6246 ins_pipe( pipe_slow ); 6247 %} 6248 6249 // Floats vector add 6250 instruct vadd2F(vecD dst, vecD src) %{ 6251 predicate(n->as_Vector()->length() == 2); 6252 match(Set dst (AddVF dst src)); 6253 format %{ "addps $dst,$src\t! add packed2F" %} 6254 ins_encode %{ 6255 __ addps($dst$$XMMRegister, $src$$XMMRegister); 6256 %} 6257 ins_pipe( pipe_slow ); 6258 %} 6259 6260 instruct vadd2F_reg(vecD dst, vecD src1, vecD src2) %{ 6261 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6262 match(Set dst (AddVF src1 src2)); 6263 format %{ "vaddps $dst,$src1,$src2\t! add packed2F" %} 6264 ins_encode %{ 6265 int vector_len = 0; 6266 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6267 %} 6268 ins_pipe( pipe_slow ); 6269 %} 6270 6271 instruct vadd2F_mem(vecD dst, vecD src, memory mem) %{ 6272 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6273 match(Set dst (AddVF src (LoadVector mem))); 6274 format %{ "vaddps $dst,$src,$mem\t! add packed2F" %} 6275 ins_encode %{ 6276 int vector_len = 0; 6277 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6278 %} 6279 ins_pipe( pipe_slow ); 6280 %} 6281 6282 instruct vadd4F(vecX dst, vecX src) %{ 6283 predicate(n->as_Vector()->length() == 4); 6284 match(Set dst (AddVF dst src)); 6285 format %{ "addps $dst,$src\t! add packed4F" %} 6286 ins_encode %{ 6287 __ addps($dst$$XMMRegister, $src$$XMMRegister); 6288 %} 6289 ins_pipe( pipe_slow ); 6290 %} 6291 6292 instruct vadd4F_reg(vecX dst, vecX src1, vecX src2) %{ 6293 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6294 match(Set dst (AddVF src1 src2)); 6295 format %{ "vaddps $dst,$src1,$src2\t! add packed4F" %} 6296 ins_encode %{ 6297 int vector_len = 0; 6298 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6299 %} 6300 ins_pipe( pipe_slow ); 6301 %} 6302 6303 instruct vadd4F_mem(vecX dst, vecX src, memory mem) %{ 6304 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6305 match(Set dst (AddVF src (LoadVector mem))); 6306 format %{ "vaddps $dst,$src,$mem\t! add packed4F" %} 6307 ins_encode %{ 6308 int vector_len = 0; 6309 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6310 %} 6311 ins_pipe( pipe_slow ); 6312 %} 6313 6314 instruct vadd8F_reg(vecY dst, vecY src1, vecY src2) %{ 6315 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6316 match(Set dst (AddVF src1 src2)); 6317 format %{ "vaddps $dst,$src1,$src2\t! add packed8F" %} 6318 ins_encode %{ 6319 int vector_len = 1; 6320 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6321 %} 6322 ins_pipe( pipe_slow ); 6323 %} 6324 6325 instruct vadd8F_mem(vecY dst, vecY src, memory mem) %{ 6326 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6327 match(Set dst (AddVF src (LoadVector mem))); 6328 format %{ "vaddps $dst,$src,$mem\t! add packed8F" %} 6329 ins_encode %{ 6330 int vector_len = 1; 6331 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6332 %} 6333 ins_pipe( pipe_slow ); 6334 %} 6335 6336 instruct vadd16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6337 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6338 match(Set dst (AddVF src1 src2)); 6339 format %{ "vaddps $dst,$src1,$src2\t! add packed16F" %} 6340 ins_encode %{ 6341 int vector_len = 2; 6342 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6343 %} 6344 ins_pipe( pipe_slow ); 6345 %} 6346 6347 instruct vadd16F_mem(vecZ dst, vecZ src, memory mem) %{ 6348 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6349 match(Set dst (AddVF src (LoadVector mem))); 6350 format %{ "vaddps $dst,$src,$mem\t! add packed16F" %} 6351 ins_encode %{ 6352 int vector_len = 2; 6353 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6354 %} 6355 ins_pipe( pipe_slow ); 6356 %} 6357 6358 // Doubles vector add 6359 instruct vadd2D(vecX dst, vecX src) %{ 6360 predicate(n->as_Vector()->length() == 2); 6361 match(Set dst (AddVD dst src)); 6362 format %{ "addpd $dst,$src\t! add packed2D" %} 6363 ins_encode %{ 6364 __ addpd($dst$$XMMRegister, $src$$XMMRegister); 6365 %} 6366 ins_pipe( pipe_slow ); 6367 %} 6368 6369 instruct vadd2D_reg(vecX dst, vecX src1, vecX src2) %{ 6370 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6371 match(Set dst (AddVD src1 src2)); 6372 format %{ "vaddpd $dst,$src1,$src2\t! add packed2D" %} 6373 ins_encode %{ 6374 int vector_len = 0; 6375 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6376 %} 6377 ins_pipe( pipe_slow ); 6378 %} 6379 6380 instruct vadd2D_mem(vecX dst, vecX src, memory mem) %{ 6381 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6382 match(Set dst (AddVD src (LoadVector mem))); 6383 format %{ "vaddpd $dst,$src,$mem\t! add packed2D" %} 6384 ins_encode %{ 6385 int vector_len = 0; 6386 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6387 %} 6388 ins_pipe( pipe_slow ); 6389 %} 6390 6391 instruct vadd4D_reg(vecY dst, vecY src1, vecY src2) %{ 6392 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6393 match(Set dst (AddVD src1 src2)); 6394 format %{ "vaddpd $dst,$src1,$src2\t! add packed4D" %} 6395 ins_encode %{ 6396 int vector_len = 1; 6397 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6398 %} 6399 ins_pipe( pipe_slow ); 6400 %} 6401 6402 instruct vadd4D_mem(vecY dst, vecY src, memory mem) %{ 6403 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6404 match(Set dst (AddVD src (LoadVector mem))); 6405 format %{ "vaddpd $dst,$src,$mem\t! add packed4D" %} 6406 ins_encode %{ 6407 int vector_len = 1; 6408 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6409 %} 6410 ins_pipe( pipe_slow ); 6411 %} 6412 6413 instruct vadd8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6414 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6415 match(Set dst (AddVD src1 src2)); 6416 format %{ "vaddpd $dst,$src1,$src2\t! add packed8D" %} 6417 ins_encode %{ 6418 int vector_len = 2; 6419 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6420 %} 6421 ins_pipe( pipe_slow ); 6422 %} 6423 6424 instruct vadd8D_mem(vecZ dst, vecZ src, memory mem) %{ 6425 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6426 match(Set dst (AddVD src (LoadVector mem))); 6427 format %{ "vaddpd $dst,$src,$mem\t! add packed8D" %} 6428 ins_encode %{ 6429 int vector_len = 2; 6430 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6431 %} 6432 ins_pipe( pipe_slow ); 6433 %} 6434 6435 // --------------------------------- SUB -------------------------------------- 6436 6437 // Bytes vector sub 6438 instruct vsub4B(vecS dst, vecS src) %{ 6439 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 6440 match(Set dst (SubVB dst src)); 6441 format %{ "psubb $dst,$src\t! sub packed4B" %} 6442 ins_encode %{ 6443 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 6444 %} 6445 ins_pipe( pipe_slow ); 6446 %} 6447 6448 instruct vsub4B_reg_avx(vecS dst, vecS src1, vecS src2) %{ 6449 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 6450 match(Set dst (SubVB src1 src2)); 6451 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} 6452 ins_encode %{ 6453 int vector_len = 0; 6454 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6455 %} 6456 ins_pipe( pipe_slow ); 6457 %} 6458 6459 instruct vsub4B_reg_evex(vecS dst, vecS src1, vecS src2) %{ 6460 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 6461 match(Set dst (SubVB src1 src2)); 6462 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} 6463 ins_encode %{ 6464 int vector_len = 0; 6465 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6466 %} 6467 ins_pipe( pipe_slow ); 6468 %} 6469 6470 instruct vsub4B_reg_exex_special(vecS dst, vecS src1, vecS src2) %{ 6471 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 6472 match(Set dst (SubVB dst src2)); 6473 effect(TEMP src1); 6474 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} 6475 ins_encode %{ 6476 int vector_len = 0; 6477 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6478 %} 6479 ins_pipe( pipe_slow ); 6480 %} 6481 6482 instruct vsub4B_mem_avx(vecS dst, vecS src, memory mem) %{ 6483 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 6484 match(Set dst (SubVB src (LoadVector mem))); 6485 format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %} 6486 ins_encode %{ 6487 int vector_len = 0; 6488 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6489 %} 6490 ins_pipe( pipe_slow ); 6491 %} 6492 6493 instruct vsub4B_mem_evex(vecS dst, vecS src, memory mem) %{ 6494 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 6495 match(Set dst (SubVB src (LoadVector mem))); 6496 format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %} 6497 ins_encode %{ 6498 int vector_len = 0; 6499 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6500 %} 6501 ins_pipe( pipe_slow ); 6502 %} 6503 6504 instruct vsub4B_mem_evex_special(vecS dst, vecS src, memory mem) %{ 6505 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 6506 match(Set dst (SubVB dst (LoadVector mem))); 6507 effect(TEMP src); 6508 format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %} 6509 ins_encode %{ 6510 int vector_len = 0; 6511 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6512 %} 6513 ins_pipe( pipe_slow ); 6514 %} 6515 6516 instruct vsub8B(vecD dst, vecD src) %{ 6517 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 6518 match(Set dst (SubVB dst src)); 6519 format %{ "psubb $dst,$src\t! sub packed8B" %} 6520 ins_encode %{ 6521 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 6522 %} 6523 ins_pipe( pipe_slow ); 6524 %} 6525 6526 instruct vsub8B_reg_avx(vecD dst, vecD src1, vecD src2) %{ 6527 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 6528 match(Set dst (SubVB src1 src2)); 6529 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} 6530 ins_encode %{ 6531 int vector_len = 0; 6532 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6533 %} 6534 ins_pipe( pipe_slow ); 6535 %} 6536 6537 instruct vsub8B_reg_evex(vecD dst, vecD src1, vecD src2) %{ 6538 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 6539 match(Set dst (SubVB src1 src2)); 6540 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} 6541 ins_encode %{ 6542 int vector_len = 0; 6543 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6544 %} 6545 ins_pipe( pipe_slow ); 6546 %} 6547 6548 instruct vsub8B_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 6549 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 6550 match(Set dst (SubVB dst src2)); 6551 effect(TEMP src1); 6552 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} 6553 ins_encode %{ 6554 int vector_len = 0; 6555 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6556 %} 6557 ins_pipe( pipe_slow ); 6558 %} 6559 6560 instruct vsub8B_mem_avx(vecD dst, vecD src, memory mem) %{ 6561 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 6562 match(Set dst (SubVB src (LoadVector mem))); 6563 format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %} 6564 ins_encode %{ 6565 int vector_len = 0; 6566 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6567 %} 6568 ins_pipe( pipe_slow ); 6569 %} 6570 6571 instruct vsub8B_mem_evex(vecD dst, vecD src, memory mem) %{ 6572 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 6573 match(Set dst (SubVB src (LoadVector mem))); 6574 format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %} 6575 ins_encode %{ 6576 int vector_len = 0; 6577 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6578 %} 6579 ins_pipe( pipe_slow ); 6580 %} 6581 6582 instruct vsub8B_mem_evex_special(vecD dst, vecD src, memory mem) %{ 6583 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 6584 match(Set dst (SubVB dst (LoadVector mem))); 6585 effect(TEMP src); 6586 format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %} 6587 ins_encode %{ 6588 int vector_len = 0; 6589 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6590 %} 6591 ins_pipe( pipe_slow ); 6592 %} 6593 6594 instruct vsub16B(vecX dst, vecX src) %{ 6595 predicate(UseAVX == 0 && n->as_Vector()->length() == 16); 6596 match(Set dst (SubVB dst src)); 6597 format %{ "psubb $dst,$src\t! sub packed16B" %} 6598 ins_encode %{ 6599 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 6600 %} 6601 ins_pipe( pipe_slow ); 6602 %} 6603 6604 instruct vsub16B_reg_avx(vecX dst, vecX src1, vecX src2) %{ 6605 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); 6606 match(Set dst (SubVB src1 src2)); 6607 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} 6608 ins_encode %{ 6609 int vector_len = 0; 6610 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6611 %} 6612 ins_pipe( pipe_slow ); 6613 %} 6614 6615 instruct vsub16B_reg_evex(vecX dst, vecX src1, vecX src2) %{ 6616 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 6617 match(Set dst (SubVB src1 src2)); 6618 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} 6619 ins_encode %{ 6620 int vector_len = 0; 6621 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6622 %} 6623 ins_pipe( pipe_slow ); 6624 %} 6625 6626 instruct vsub16B_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 6627 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 6628 match(Set dst (SubVB dst src2)); 6629 effect(TEMP src1); 6630 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} 6631 ins_encode %{ 6632 int vector_len = 0; 6633 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6634 %} 6635 ins_pipe( pipe_slow ); 6636 %} 6637 6638 instruct vsub16B_mem_avx(vecX dst, vecX src, memory mem) %{ 6639 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); 6640 match(Set dst (SubVB src (LoadVector mem))); 6641 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} 6642 ins_encode %{ 6643 int vector_len = 0; 6644 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6645 %} 6646 ins_pipe( pipe_slow ); 6647 %} 6648 6649 instruct vsub16B_mem_evex(vecX dst, vecX src, memory mem) %{ 6650 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 6651 match(Set dst (SubVB src (LoadVector mem))); 6652 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} 6653 ins_encode %{ 6654 int vector_len = 0; 6655 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6656 %} 6657 ins_pipe( pipe_slow ); 6658 %} 6659 6660 instruct vsub16B_mem_evex_special(vecX dst, vecX src, memory mem) %{ 6661 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 6662 match(Set dst (SubVB dst (LoadVector mem))); 6663 effect(TEMP src); 6664 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} 6665 ins_encode %{ 6666 int vector_len = 0; 6667 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6668 %} 6669 ins_pipe( pipe_slow ); 6670 %} 6671 6672 instruct vsub32B_reg_avx(vecY dst, vecY src1, vecY src2) %{ 6673 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32); 6674 match(Set dst (SubVB src1 src2)); 6675 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} 6676 ins_encode %{ 6677 int vector_len = 1; 6678 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6679 %} 6680 ins_pipe( pipe_slow ); 6681 %} 6682 6683 instruct vsub32B_reg_evex(vecY dst, vecY src1, vecY src2) %{ 6684 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6685 match(Set dst (SubVB src1 src2)); 6686 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} 6687 ins_encode %{ 6688 int vector_len = 1; 6689 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6690 %} 6691 ins_pipe( pipe_slow ); 6692 %} 6693 6694 instruct vsub32B_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 6695 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32); 6696 match(Set dst (SubVB dst src2)); 6697 effect(TEMP src1); 6698 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} 6699 ins_encode %{ 6700 int vector_len = 1; 6701 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6702 %} 6703 ins_pipe( pipe_slow ); 6704 %} 6705 6706 instruct vsub32B_mem_avx(vecY dst, vecY src, memory mem) %{ 6707 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32); 6708 match(Set dst (SubVB src (LoadVector mem))); 6709 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} 6710 ins_encode %{ 6711 int vector_len = 1; 6712 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6713 %} 6714 ins_pipe( pipe_slow ); 6715 %} 6716 6717 instruct vsub32B_mem_evex(vecY dst, vecY src, memory mem) %{ 6718 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6719 match(Set dst (SubVB src (LoadVector mem))); 6720 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} 6721 ins_encode %{ 6722 int vector_len = 1; 6723 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6724 %} 6725 ins_pipe( pipe_slow ); 6726 %} 6727 6728 instruct vsub32B_mem_evex_special(vecY dst, vecY src, memory mem) %{ 6729 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32); 6730 match(Set dst (SubVB dst (LoadVector mem))); 6731 effect(TEMP src); 6732 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} 6733 ins_encode %{ 6734 int vector_len = 1; 6735 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6736 %} 6737 ins_pipe( pipe_slow ); 6738 %} 6739 6740 instruct vsub64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6741 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 6742 match(Set dst (SubVB src1 src2)); 6743 format %{ "vpsubb $dst,$src1,$src2\t! sub packed64B" %} 6744 ins_encode %{ 6745 int vector_len = 2; 6746 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6747 %} 6748 ins_pipe( pipe_slow ); 6749 %} 6750 6751 instruct vsub64B_mem(vecZ dst, vecZ src, memory mem) %{ 6752 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 6753 match(Set dst (SubVB src (LoadVector mem))); 6754 format %{ "vpsubb $dst,$src,$mem\t! sub packed64B" %} 6755 ins_encode %{ 6756 int vector_len = 2; 6757 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6758 %} 6759 ins_pipe( pipe_slow ); 6760 %} 6761 6762 // Shorts/Chars vector sub 6763 instruct vsub2S(vecS dst, vecS src) %{ 6764 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 6765 match(Set dst (SubVS dst src)); 6766 format %{ "psubw $dst,$src\t! sub packed2S" %} 6767 ins_encode %{ 6768 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 6769 %} 6770 ins_pipe( pipe_slow ); 6771 %} 6772 6773 instruct vsub2S_reg_avx(vecS dst, vecS src1, vecS src2) %{ 6774 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 6775 match(Set dst (SubVS src1 src2)); 6776 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} 6777 ins_encode %{ 6778 int vector_len = 0; 6779 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6780 %} 6781 ins_pipe( pipe_slow ); 6782 %} 6783 6784 instruct vsub2S_reg_evex(vecS dst, vecS src1, vecS src2) %{ 6785 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 6786 match(Set dst (SubVS src1 src2)); 6787 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} 6788 ins_encode %{ 6789 int vector_len = 0; 6790 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6791 %} 6792 ins_pipe( pipe_slow ); 6793 %} 6794 6795 instruct vsub2S_reg_evex_special(vecS dst, vecS src1, vecS src2) %{ 6796 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 6797 match(Set dst (SubVS dst src2)); 6798 effect(TEMP src1); 6799 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} 6800 ins_encode %{ 6801 int vector_len = 0; 6802 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6803 %} 6804 ins_pipe( pipe_slow ); 6805 %} 6806 6807 instruct vsub2S_mem_avx(vecS dst, vecS src, memory mem) %{ 6808 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 6809 match(Set dst (SubVS src (LoadVector mem))); 6810 format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %} 6811 ins_encode %{ 6812 int vector_len = 0; 6813 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6814 %} 6815 ins_pipe( pipe_slow ); 6816 %} 6817 6818 instruct vsub2S_mem_evex(vecS dst, vecS src, memory mem) %{ 6819 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 6820 match(Set dst (SubVS src (LoadVector mem))); 6821 format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %} 6822 ins_encode %{ 6823 int vector_len = 0; 6824 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6825 %} 6826 ins_pipe( pipe_slow ); 6827 %} 6828 6829 instruct vsub2S_mem_evex_special(vecS dst, vecS src, memory mem) %{ 6830 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 6831 match(Set dst (SubVS dst (LoadVector mem))); 6832 effect(TEMP src); 6833 format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %} 6834 ins_encode %{ 6835 int vector_len = 0; 6836 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6837 %} 6838 ins_pipe( pipe_slow ); 6839 %} 6840 6841 instruct vsub4S(vecD dst, vecD src) %{ 6842 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 6843 match(Set dst (SubVS dst src)); 6844 format %{ "psubw $dst,$src\t! sub packed4S" %} 6845 ins_encode %{ 6846 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 6847 %} 6848 ins_pipe( pipe_slow ); 6849 %} 6850 6851 instruct vsub4S_reg_avx(vecD dst, vecD src1, vecD src2) %{ 6852 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 6853 match(Set dst (SubVS src1 src2)); 6854 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} 6855 ins_encode %{ 6856 int vector_len = 0; 6857 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6858 %} 6859 ins_pipe( pipe_slow ); 6860 %} 6861 6862 instruct vsub4S_reg_evex(vecD dst, vecD src1, vecD src2) %{ 6863 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 6864 match(Set dst (SubVS src1 src2)); 6865 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} 6866 ins_encode %{ 6867 int vector_len = 0; 6868 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6869 %} 6870 ins_pipe( pipe_slow ); 6871 %} 6872 6873 instruct vsub4S_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 6874 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 6875 match(Set dst (SubVS dst src2)); 6876 effect(TEMP src1); 6877 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} 6878 ins_encode %{ 6879 int vector_len = 0; 6880 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6881 %} 6882 ins_pipe( pipe_slow ); 6883 %} 6884 6885 instruct vsub4S_mem_avx(vecD dst, vecD src, memory mem) %{ 6886 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 6887 match(Set dst (SubVS src (LoadVector mem))); 6888 format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %} 6889 ins_encode %{ 6890 int vector_len = 0; 6891 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6892 %} 6893 ins_pipe( pipe_slow ); 6894 %} 6895 6896 instruct vsub4S_mem_evex(vecD dst, vecD src, memory mem) %{ 6897 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 6898 match(Set dst (SubVS src (LoadVector mem))); 6899 format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %} 6900 ins_encode %{ 6901 int vector_len = 0; 6902 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6903 %} 6904 ins_pipe( pipe_slow ); 6905 %} 6906 6907 instruct vsub4S_mem_evex_special(vecD dst, vecD src, memory mem) %{ 6908 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 6909 match(Set dst (SubVS dst (LoadVector mem))); 6910 effect(TEMP src); 6911 format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %} 6912 ins_encode %{ 6913 int vector_len = 0; 6914 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6915 %} 6916 ins_pipe( pipe_slow ); 6917 %} 6918 6919 instruct vsub8S(vecX dst, vecX src) %{ 6920 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 6921 match(Set dst (SubVS dst src)); 6922 format %{ "psubw $dst,$src\t! sub packed8S" %} 6923 ins_encode %{ 6924 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 6925 %} 6926 ins_pipe( pipe_slow ); 6927 %} 6928 6929 instruct vsub8S_reg_avx(vecX dst, vecX src1, vecX src2) %{ 6930 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 6931 match(Set dst (SubVS src1 src2)); 6932 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} 6933 ins_encode %{ 6934 int vector_len = 0; 6935 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6936 %} 6937 ins_pipe( pipe_slow ); 6938 %} 6939 6940 instruct vsub8S_reg_evex(vecX dst, vecX src1, vecX src2) %{ 6941 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 6942 match(Set dst (SubVS src1 src2)); 6943 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} 6944 ins_encode %{ 6945 int vector_len = 0; 6946 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6947 %} 6948 ins_pipe( pipe_slow ); 6949 %} 6950 6951 instruct vsub8S_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 6952 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 6953 match(Set dst (SubVS dst src2)); 6954 effect(TEMP src1); 6955 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} 6956 ins_encode %{ 6957 int vector_len = 0; 6958 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6959 %} 6960 ins_pipe( pipe_slow ); 6961 %} 6962 6963 instruct vsub8S_mem_avx(vecX dst, vecX src, memory mem) %{ 6964 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 6965 match(Set dst (SubVS src (LoadVector mem))); 6966 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} 6967 ins_encode %{ 6968 int vector_len = 0; 6969 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6970 %} 6971 ins_pipe( pipe_slow ); 6972 %} 6973 6974 instruct vsub8S_mem_evex(vecX dst, vecX src, memory mem) %{ 6975 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 6976 match(Set dst (SubVS src (LoadVector mem))); 6977 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} 6978 ins_encode %{ 6979 int vector_len = 0; 6980 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6981 %} 6982 ins_pipe( pipe_slow ); 6983 %} 6984 6985 instruct vsub8S_mem_evex_special(vecX dst, vecX src, memory mem) %{ 6986 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 6987 match(Set dst (SubVS dst (LoadVector mem))); 6988 effect(TEMP src); 6989 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} 6990 ins_encode %{ 6991 int vector_len = 0; 6992 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6993 %} 6994 ins_pipe( pipe_slow ); 6995 %} 6996 6997 instruct vsub16S_reg_avx(vecY dst, vecY src1, vecY src2) %{ 6998 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 6999 match(Set dst (SubVS src1 src2)); 7000 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} 7001 ins_encode %{ 7002 int vector_len = 1; 7003 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7004 %} 7005 ins_pipe( pipe_slow ); 7006 %} 7007 7008 instruct vsub16S_reg_evex(vecY dst, vecY src1, vecY src2) %{ 7009 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 7010 match(Set dst (SubVS src1 src2)); 7011 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} 7012 ins_encode %{ 7013 int vector_len = 1; 7014 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7015 %} 7016 ins_pipe( pipe_slow ); 7017 %} 7018 7019 instruct vsub16S_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 7020 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 7021 match(Set dst (SubVS dst src2)); 7022 effect(TEMP src1); 7023 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} 7024 ins_encode %{ 7025 int vector_len = 1; 7026 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7027 %} 7028 ins_pipe( pipe_slow ); 7029 %} 7030 7031 instruct vsub16S_mem_avx(vecY dst, vecY src, memory mem) %{ 7032 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 7033 match(Set dst (SubVS src (LoadVector mem))); 7034 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} 7035 ins_encode %{ 7036 int vector_len = 1; 7037 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7038 %} 7039 ins_pipe( pipe_slow ); 7040 %} 7041 7042 instruct vsub16S_mem_evex(vecY dst, vecY src, memory mem) %{ 7043 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 7044 match(Set dst (SubVS src (LoadVector mem))); 7045 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} 7046 ins_encode %{ 7047 int vector_len = 1; 7048 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7049 %} 7050 ins_pipe( pipe_slow ); 7051 %} 7052 7053 instruct vsub16S_mem_evex_special(vecY dst, vecY src, memory mem) %{ 7054 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 7055 match(Set dst (SubVS dst (LoadVector mem))); 7056 effect(TEMP src); 7057 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} 7058 ins_encode %{ 7059 int vector_len = 1; 7060 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7061 %} 7062 ins_pipe( pipe_slow ); 7063 %} 7064 7065 instruct vsub32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7066 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 7067 match(Set dst (SubVS src1 src2)); 7068 format %{ "vpsubw $dst,$src1,$src2\t! sub packed32S" %} 7069 ins_encode %{ 7070 int vector_len = 2; 7071 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7072 %} 7073 ins_pipe( pipe_slow ); 7074 %} 7075 7076 instruct vsub32S_mem(vecZ dst, vecZ src, memory mem) %{ 7077 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 7078 match(Set dst (SubVS src (LoadVector mem))); 7079 format %{ "vpsubw $dst,$src,$mem\t! sub packed32S" %} 7080 ins_encode %{ 7081 int vector_len = 2; 7082 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7083 %} 7084 ins_pipe( pipe_slow ); 7085 %} 7086 7087 // Integers vector sub 7088 instruct vsub2I(vecD dst, vecD src) %{ 7089 predicate(n->as_Vector()->length() == 2); 7090 match(Set dst (SubVI dst src)); 7091 format %{ "psubd $dst,$src\t! sub packed2I" %} 7092 ins_encode %{ 7093 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 7094 %} 7095 ins_pipe( pipe_slow ); 7096 %} 7097 7098 instruct vsub2I_reg(vecD dst, vecD src1, vecD src2) %{ 7099 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7100 match(Set dst (SubVI src1 src2)); 7101 format %{ "vpsubd $dst,$src1,$src2\t! sub packed2I" %} 7102 ins_encode %{ 7103 int vector_len = 0; 7104 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7105 %} 7106 ins_pipe( pipe_slow ); 7107 %} 7108 7109 instruct vsub2I_mem(vecD dst, vecD src, memory mem) %{ 7110 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7111 match(Set dst (SubVI src (LoadVector mem))); 7112 format %{ "vpsubd $dst,$src,$mem\t! sub packed2I" %} 7113 ins_encode %{ 7114 int vector_len = 0; 7115 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7116 %} 7117 ins_pipe( pipe_slow ); 7118 %} 7119 7120 instruct vsub4I(vecX dst, vecX src) %{ 7121 predicate(n->as_Vector()->length() == 4); 7122 match(Set dst (SubVI dst src)); 7123 format %{ "psubd $dst,$src\t! sub packed4I" %} 7124 ins_encode %{ 7125 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 7126 %} 7127 ins_pipe( pipe_slow ); 7128 %} 7129 7130 instruct vsub4I_reg(vecX dst, vecX src1, vecX src2) %{ 7131 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7132 match(Set dst (SubVI src1 src2)); 7133 format %{ "vpsubd $dst,$src1,$src2\t! sub packed4I" %} 7134 ins_encode %{ 7135 int vector_len = 0; 7136 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7137 %} 7138 ins_pipe( pipe_slow ); 7139 %} 7140 7141 instruct vsub4I_mem(vecX dst, vecX src, memory mem) %{ 7142 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7143 match(Set dst (SubVI src (LoadVector mem))); 7144 format %{ "vpsubd $dst,$src,$mem\t! sub packed4I" %} 7145 ins_encode %{ 7146 int vector_len = 0; 7147 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7148 %} 7149 ins_pipe( pipe_slow ); 7150 %} 7151 7152 instruct vsub8I_reg(vecY dst, vecY src1, vecY src2) %{ 7153 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 7154 match(Set dst (SubVI src1 src2)); 7155 format %{ "vpsubd $dst,$src1,$src2\t! sub packed8I" %} 7156 ins_encode %{ 7157 int vector_len = 1; 7158 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7159 %} 7160 ins_pipe( pipe_slow ); 7161 %} 7162 7163 instruct vsub8I_mem(vecY dst, vecY src, memory mem) %{ 7164 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 7165 match(Set dst (SubVI src (LoadVector mem))); 7166 format %{ "vpsubd $dst,$src,$mem\t! sub packed8I" %} 7167 ins_encode %{ 7168 int vector_len = 1; 7169 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7170 %} 7171 ins_pipe( pipe_slow ); 7172 %} 7173 7174 instruct vsub16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7175 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7176 match(Set dst (SubVI src1 src2)); 7177 format %{ "vpsubd $dst,$src1,$src2\t! sub packed16I" %} 7178 ins_encode %{ 7179 int vector_len = 2; 7180 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7181 %} 7182 ins_pipe( pipe_slow ); 7183 %} 7184 7185 instruct vsub16I_mem(vecZ dst, vecZ src, memory mem) %{ 7186 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7187 match(Set dst (SubVI src (LoadVector mem))); 7188 format %{ "vpsubd $dst,$src,$mem\t! sub packed16I" %} 7189 ins_encode %{ 7190 int vector_len = 2; 7191 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7192 %} 7193 ins_pipe( pipe_slow ); 7194 %} 7195 7196 // Longs vector sub 7197 instruct vsub2L(vecX dst, vecX src) %{ 7198 predicate(n->as_Vector()->length() == 2); 7199 match(Set dst (SubVL dst src)); 7200 format %{ "psubq $dst,$src\t! sub packed2L" %} 7201 ins_encode %{ 7202 __ psubq($dst$$XMMRegister, $src$$XMMRegister); 7203 %} 7204 ins_pipe( pipe_slow ); 7205 %} 7206 7207 instruct vsub2L_reg(vecX dst, vecX src1, vecX src2) %{ 7208 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7209 match(Set dst (SubVL src1 src2)); 7210 format %{ "vpsubq $dst,$src1,$src2\t! sub packed2L" %} 7211 ins_encode %{ 7212 int vector_len = 0; 7213 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7214 %} 7215 ins_pipe( pipe_slow ); 7216 %} 7217 7218 instruct vsub2L_mem(vecX dst, vecX src, memory mem) %{ 7219 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7220 match(Set dst (SubVL src (LoadVector mem))); 7221 format %{ "vpsubq $dst,$src,$mem\t! sub packed2L" %} 7222 ins_encode %{ 7223 int vector_len = 0; 7224 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7225 %} 7226 ins_pipe( pipe_slow ); 7227 %} 7228 7229 instruct vsub4L_reg(vecY dst, vecY src1, vecY src2) %{ 7230 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 7231 match(Set dst (SubVL src1 src2)); 7232 format %{ "vpsubq $dst,$src1,$src2\t! sub packed4L" %} 7233 ins_encode %{ 7234 int vector_len = 1; 7235 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7236 %} 7237 ins_pipe( pipe_slow ); 7238 %} 7239 7240 instruct vsub4L_mem(vecY dst, vecY src, memory mem) %{ 7241 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 7242 match(Set dst (SubVL src (LoadVector mem))); 7243 format %{ "vpsubq $dst,$src,$mem\t! sub packed4L" %} 7244 ins_encode %{ 7245 int vector_len = 1; 7246 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7247 %} 7248 ins_pipe( pipe_slow ); 7249 %} 7250 7251 instruct vsub8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7252 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7253 match(Set dst (SubVL src1 src2)); 7254 format %{ "vpsubq $dst,$src1,$src2\t! sub packed8L" %} 7255 ins_encode %{ 7256 int vector_len = 2; 7257 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7258 %} 7259 ins_pipe( pipe_slow ); 7260 %} 7261 7262 instruct vsub8L_mem(vecZ dst, vecZ src, memory mem) %{ 7263 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7264 match(Set dst (SubVL src (LoadVector mem))); 7265 format %{ "vpsubq $dst,$src,$mem\t! sub packed8L" %} 7266 ins_encode %{ 7267 int vector_len = 2; 7268 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7269 %} 7270 ins_pipe( pipe_slow ); 7271 %} 7272 7273 // Floats vector sub 7274 instruct vsub2F(vecD dst, vecD src) %{ 7275 predicate(n->as_Vector()->length() == 2); 7276 match(Set dst (SubVF dst src)); 7277 format %{ "subps $dst,$src\t! sub packed2F" %} 7278 ins_encode %{ 7279 __ subps($dst$$XMMRegister, $src$$XMMRegister); 7280 %} 7281 ins_pipe( pipe_slow ); 7282 %} 7283 7284 instruct vsub2F_reg(vecD dst, vecD src1, vecD src2) %{ 7285 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7286 match(Set dst (SubVF src1 src2)); 7287 format %{ "vsubps $dst,$src1,$src2\t! sub packed2F" %} 7288 ins_encode %{ 7289 int vector_len = 0; 7290 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7291 %} 7292 ins_pipe( pipe_slow ); 7293 %} 7294 7295 instruct vsub2F_mem(vecD dst, vecD src, memory mem) %{ 7296 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7297 match(Set dst (SubVF src (LoadVector mem))); 7298 format %{ "vsubps $dst,$src,$mem\t! sub packed2F" %} 7299 ins_encode %{ 7300 int vector_len = 0; 7301 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7302 %} 7303 ins_pipe( pipe_slow ); 7304 %} 7305 7306 instruct vsub4F(vecX dst, vecX src) %{ 7307 predicate(n->as_Vector()->length() == 4); 7308 match(Set dst (SubVF dst src)); 7309 format %{ "subps $dst,$src\t! sub packed4F" %} 7310 ins_encode %{ 7311 __ subps($dst$$XMMRegister, $src$$XMMRegister); 7312 %} 7313 ins_pipe( pipe_slow ); 7314 %} 7315 7316 instruct vsub4F_reg(vecX dst, vecX src1, vecX src2) %{ 7317 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7318 match(Set dst (SubVF src1 src2)); 7319 format %{ "vsubps $dst,$src1,$src2\t! sub packed4F" %} 7320 ins_encode %{ 7321 int vector_len = 0; 7322 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7323 %} 7324 ins_pipe( pipe_slow ); 7325 %} 7326 7327 instruct vsub4F_mem(vecX dst, vecX src, memory mem) %{ 7328 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7329 match(Set dst (SubVF src (LoadVector mem))); 7330 format %{ "vsubps $dst,$src,$mem\t! sub packed4F" %} 7331 ins_encode %{ 7332 int vector_len = 0; 7333 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7334 %} 7335 ins_pipe( pipe_slow ); 7336 %} 7337 7338 instruct vsub8F_reg(vecY dst, vecY src1, vecY src2) %{ 7339 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7340 match(Set dst (SubVF src1 src2)); 7341 format %{ "vsubps $dst,$src1,$src2\t! sub packed8F" %} 7342 ins_encode %{ 7343 int vector_len = 1; 7344 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7345 %} 7346 ins_pipe( pipe_slow ); 7347 %} 7348 7349 instruct vsub8F_mem(vecY dst, vecY src, memory mem) %{ 7350 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7351 match(Set dst (SubVF src (LoadVector mem))); 7352 format %{ "vsubps $dst,$src,$mem\t! sub packed8F" %} 7353 ins_encode %{ 7354 int vector_len = 1; 7355 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7356 %} 7357 ins_pipe( pipe_slow ); 7358 %} 7359 7360 instruct vsub16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7361 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7362 match(Set dst (SubVF src1 src2)); 7363 format %{ "vsubps $dst,$src1,$src2\t! sub packed16F" %} 7364 ins_encode %{ 7365 int vector_len = 2; 7366 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7367 %} 7368 ins_pipe( pipe_slow ); 7369 %} 7370 7371 instruct vsub16F_mem(vecZ dst, vecZ src, memory mem) %{ 7372 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7373 match(Set dst (SubVF src (LoadVector mem))); 7374 format %{ "vsubps $dst,$src,$mem\t! sub packed16F" %} 7375 ins_encode %{ 7376 int vector_len = 2; 7377 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7378 %} 7379 ins_pipe( pipe_slow ); 7380 %} 7381 7382 // Doubles vector sub 7383 instruct vsub2D(vecX dst, vecX src) %{ 7384 predicate(n->as_Vector()->length() == 2); 7385 match(Set dst (SubVD dst src)); 7386 format %{ "subpd $dst,$src\t! sub packed2D" %} 7387 ins_encode %{ 7388 __ subpd($dst$$XMMRegister, $src$$XMMRegister); 7389 %} 7390 ins_pipe( pipe_slow ); 7391 %} 7392 7393 instruct vsub2D_reg(vecX dst, vecX src1, vecX src2) %{ 7394 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7395 match(Set dst (SubVD src1 src2)); 7396 format %{ "vsubpd $dst,$src1,$src2\t! sub packed2D" %} 7397 ins_encode %{ 7398 int vector_len = 0; 7399 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7400 %} 7401 ins_pipe( pipe_slow ); 7402 %} 7403 7404 instruct vsub2D_mem(vecX dst, vecX src, memory mem) %{ 7405 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7406 match(Set dst (SubVD src (LoadVector mem))); 7407 format %{ "vsubpd $dst,$src,$mem\t! sub packed2D" %} 7408 ins_encode %{ 7409 int vector_len = 0; 7410 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7411 %} 7412 ins_pipe( pipe_slow ); 7413 %} 7414 7415 instruct vsub4D_reg(vecY dst, vecY src1, vecY src2) %{ 7416 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7417 match(Set dst (SubVD src1 src2)); 7418 format %{ "vsubpd $dst,$src1,$src2\t! sub packed4D" %} 7419 ins_encode %{ 7420 int vector_len = 1; 7421 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7422 %} 7423 ins_pipe( pipe_slow ); 7424 %} 7425 7426 instruct vsub4D_mem(vecY dst, vecY src, memory mem) %{ 7427 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7428 match(Set dst (SubVD src (LoadVector mem))); 7429 format %{ "vsubpd $dst,$src,$mem\t! sub packed4D" %} 7430 ins_encode %{ 7431 int vector_len = 1; 7432 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7433 %} 7434 ins_pipe( pipe_slow ); 7435 %} 7436 7437 instruct vsub8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7438 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7439 match(Set dst (SubVD src1 src2)); 7440 format %{ "vsubpd $dst,$src1,$src2\t! sub packed8D" %} 7441 ins_encode %{ 7442 int vector_len = 2; 7443 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7444 %} 7445 ins_pipe( pipe_slow ); 7446 %} 7447 7448 instruct vsub8D_mem(vecZ dst, vecZ src, memory mem) %{ 7449 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7450 match(Set dst (SubVD src (LoadVector mem))); 7451 format %{ "vsubpd $dst,$src,$mem\t! sub packed8D" %} 7452 ins_encode %{ 7453 int vector_len = 2; 7454 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7455 %} 7456 ins_pipe( pipe_slow ); 7457 %} 7458 7459 // --------------------------------- MUL -------------------------------------- 7460 7461 // Shorts/Chars vector mul 7462 instruct vmul2S(vecS dst, vecS src) %{ 7463 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 7464 match(Set dst (MulVS dst src)); 7465 format %{ "pmullw $dst,$src\t! mul packed2S" %} 7466 ins_encode %{ 7467 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 7468 %} 7469 ins_pipe( pipe_slow ); 7470 %} 7471 7472 instruct vmul2S_reg_avx(vecS dst, vecS src1, vecS src2) %{ 7473 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 7474 match(Set dst (MulVS src1 src2)); 7475 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} 7476 ins_encode %{ 7477 int vector_len = 0; 7478 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7479 %} 7480 ins_pipe( pipe_slow ); 7481 %} 7482 7483 instruct vmul2S_reg_evex(vecS dst, vecS src1, vecS src2) %{ 7484 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 7485 match(Set dst (MulVS src1 src2)); 7486 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} 7487 ins_encode %{ 7488 int vector_len = 0; 7489 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7490 %} 7491 ins_pipe( pipe_slow ); 7492 %} 7493 7494 instruct vmul2S_evex_special(vecS dst, vecS src1, vecS src2) %{ 7495 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 7496 match(Set dst (MulVS dst src2)); 7497 effect(TEMP src1); 7498 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} 7499 ins_encode %{ 7500 int vector_len = 0; 7501 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7502 %} 7503 ins_pipe( pipe_slow ); 7504 %} 7505 7506 instruct vmul2S_mem_avx(vecS dst, vecS src, memory mem) %{ 7507 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 7508 match(Set dst (MulVS src (LoadVector mem))); 7509 format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %} 7510 ins_encode %{ 7511 int vector_len = 0; 7512 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7513 %} 7514 ins_pipe( pipe_slow ); 7515 %} 7516 7517 instruct vmul2S_mem_evex(vecS dst, vecS src, memory mem) %{ 7518 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 7519 match(Set dst (MulVS src (LoadVector mem))); 7520 format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %} 7521 ins_encode %{ 7522 int vector_len = 0; 7523 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7524 %} 7525 ins_pipe( pipe_slow ); 7526 %} 7527 7528 instruct vmul2S_mem_evex_special(vecS dst, vecS src, memory mem) %{ 7529 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 7530 match(Set dst (MulVS dst (LoadVector mem))); 7531 effect(TEMP src); 7532 format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %} 7533 ins_encode %{ 7534 int vector_len = 0; 7535 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7536 %} 7537 ins_pipe( pipe_slow ); 7538 %} 7539 7540 instruct vmul4S(vecD dst, vecD src) %{ 7541 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 7542 match(Set dst (MulVS dst src)); 7543 format %{ "pmullw $dst,$src\t! mul packed4S" %} 7544 ins_encode %{ 7545 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 7546 %} 7547 ins_pipe( pipe_slow ); 7548 %} 7549 7550 instruct vmul4S_reg_avx(vecD dst, vecD src1, vecD src2) %{ 7551 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 7552 match(Set dst (MulVS src1 src2)); 7553 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} 7554 ins_encode %{ 7555 int vector_len = 0; 7556 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7557 %} 7558 ins_pipe( pipe_slow ); 7559 %} 7560 7561 instruct vmul4S_reg_evex(vecD dst, vecD src1, vecD src2) %{ 7562 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 7563 match(Set dst (MulVS src1 src2)); 7564 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} 7565 ins_encode %{ 7566 int vector_len = 0; 7567 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7568 %} 7569 ins_pipe( pipe_slow ); 7570 %} 7571 7572 instruct vmul4S_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 7573 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 7574 match(Set dst (MulVS dst src2)); 7575 effect(TEMP src1); 7576 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} 7577 ins_encode %{ 7578 int vector_len = 0; 7579 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7580 %} 7581 ins_pipe( pipe_slow ); 7582 %} 7583 7584 instruct vmul4S_mem_avx(vecD dst, vecD src, memory mem) %{ 7585 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 7586 match(Set dst (MulVS src (LoadVector mem))); 7587 format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %} 7588 ins_encode %{ 7589 int vector_len = 0; 7590 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7591 %} 7592 ins_pipe( pipe_slow ); 7593 %} 7594 7595 instruct vmul4S_mem_evex(vecD dst, vecD src, memory mem) %{ 7596 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 7597 match(Set dst (MulVS src (LoadVector mem))); 7598 format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %} 7599 ins_encode %{ 7600 int vector_len = 0; 7601 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7602 %} 7603 ins_pipe( pipe_slow ); 7604 %} 7605 7606 instruct vmul4S_mem_evex_special(vecD dst, vecD src, memory mem) %{ 7607 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 7608 match(Set dst (MulVS dst (LoadVector mem))); 7609 effect(TEMP src); 7610 format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %} 7611 ins_encode %{ 7612 int vector_len = 0; 7613 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7614 %} 7615 ins_pipe( pipe_slow ); 7616 %} 7617 7618 instruct vmul8S(vecX dst, vecX src) %{ 7619 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 7620 match(Set dst (MulVS dst src)); 7621 format %{ "pmullw $dst,$src\t! mul packed8S" %} 7622 ins_encode %{ 7623 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 7624 %} 7625 ins_pipe( pipe_slow ); 7626 %} 7627 7628 instruct vmul8S_reg_avx(vecX dst, vecX src1, vecX src2) %{ 7629 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 7630 match(Set dst (MulVS src1 src2)); 7631 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} 7632 ins_encode %{ 7633 int vector_len = 0; 7634 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7635 %} 7636 ins_pipe( pipe_slow ); 7637 %} 7638 7639 instruct vmul8S_reg_evex(vecX dst, vecX src1, vecX src2) %{ 7640 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 7641 match(Set dst (MulVS src1 src2)); 7642 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} 7643 ins_encode %{ 7644 int vector_len = 0; 7645 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7646 %} 7647 ins_pipe( pipe_slow ); 7648 %} 7649 7650 instruct vmul8S_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 7651 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 7652 match(Set dst (MulVS dst src2)); 7653 effect(TEMP src1); 7654 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} 7655 ins_encode %{ 7656 int vector_len = 0; 7657 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7658 %} 7659 ins_pipe( pipe_slow ); 7660 %} 7661 7662 instruct vmul8S_mem_avx(vecX dst, vecX src, memory mem) %{ 7663 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 7664 match(Set dst (MulVS src (LoadVector mem))); 7665 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} 7666 ins_encode %{ 7667 int vector_len = 0; 7668 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7669 %} 7670 ins_pipe( pipe_slow ); 7671 %} 7672 7673 instruct vmul8S_mem_evex(vecX dst, vecX src, memory mem) %{ 7674 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 7675 match(Set dst (MulVS src (LoadVector mem))); 7676 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} 7677 ins_encode %{ 7678 int vector_len = 0; 7679 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7680 %} 7681 ins_pipe( pipe_slow ); 7682 %} 7683 7684 instruct vmul8S_mem_evex_special(vecX dst, vecX src, memory mem) %{ 7685 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 7686 match(Set dst (MulVS dst (LoadVector mem))); 7687 effect(TEMP src); 7688 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} 7689 ins_encode %{ 7690 int vector_len = 0; 7691 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7692 %} 7693 ins_pipe( pipe_slow ); 7694 %} 7695 7696 instruct vmul16S_reg_avx(vecY dst, vecY src1, vecY src2) %{ 7697 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 7698 match(Set dst (MulVS src1 src2)); 7699 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} 7700 ins_encode %{ 7701 int vector_len = 1; 7702 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7703 %} 7704 ins_pipe( pipe_slow ); 7705 %} 7706 7707 instruct vmul16S_reg_evex(vecY dst, vecY src1, vecY src2) %{ 7708 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 7709 match(Set dst (MulVS src1 src2)); 7710 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} 7711 ins_encode %{ 7712 int vector_len = 1; 7713 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7714 %} 7715 ins_pipe( pipe_slow ); 7716 %} 7717 7718 instruct vmul16S_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 7719 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 7720 match(Set dst (MulVS dst src2)); 7721 effect(TEMP src1); 7722 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} 7723 ins_encode %{ 7724 int vector_len = 1; 7725 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7726 %} 7727 ins_pipe( pipe_slow ); 7728 %} 7729 7730 instruct vmul16S_mem_avx(vecY dst, vecY src, memory mem) %{ 7731 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 7732 match(Set dst (MulVS src (LoadVector mem))); 7733 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} 7734 ins_encode %{ 7735 int vector_len = 1; 7736 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7737 %} 7738 ins_pipe( pipe_slow ); 7739 %} 7740 7741 instruct vmul16S_mem_evex(vecY dst, vecY src, memory mem) %{ 7742 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 7743 match(Set dst (MulVS src (LoadVector mem))); 7744 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} 7745 ins_encode %{ 7746 int vector_len = 1; 7747 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7748 %} 7749 ins_pipe( pipe_slow ); 7750 %} 7751 7752 instruct vmul16S_mem_evex_special(vecY dst, vecY src, memory mem) %{ 7753 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 7754 match(Set dst (MulVS dst (LoadVector mem))); 7755 effect(TEMP src); 7756 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} 7757 ins_encode %{ 7758 int vector_len = 1; 7759 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7760 %} 7761 ins_pipe( pipe_slow ); 7762 %} 7763 7764 instruct vmul32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7765 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 7766 match(Set dst (MulVS src1 src2)); 7767 format %{ "vpmullw $dst,$src1,$src2\t! mul packed32S" %} 7768 ins_encode %{ 7769 int vector_len = 2; 7770 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7771 %} 7772 ins_pipe( pipe_slow ); 7773 %} 7774 7775 instruct vmul32S_mem(vecZ dst, vecZ src, memory mem) %{ 7776 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 7777 match(Set dst (MulVS src (LoadVector mem))); 7778 format %{ "vpmullw $dst,$src,$mem\t! mul packed32S" %} 7779 ins_encode %{ 7780 int vector_len = 2; 7781 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7782 %} 7783 ins_pipe( pipe_slow ); 7784 %} 7785 7786 // Integers vector mul (sse4_1) 7787 instruct vmul2I(vecD dst, vecD src) %{ 7788 predicate(UseSSE > 3 && n->as_Vector()->length() == 2); 7789 match(Set dst (MulVI dst src)); 7790 format %{ "pmulld $dst,$src\t! mul packed2I" %} 7791 ins_encode %{ 7792 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 7793 %} 7794 ins_pipe( pipe_slow ); 7795 %} 7796 7797 instruct vmul2I_reg(vecD dst, vecD src1, vecD src2) %{ 7798 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7799 match(Set dst (MulVI src1 src2)); 7800 format %{ "vpmulld $dst,$src1,$src2\t! mul packed2I" %} 7801 ins_encode %{ 7802 int vector_len = 0; 7803 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7804 %} 7805 ins_pipe( pipe_slow ); 7806 %} 7807 7808 instruct vmul2I_mem(vecD dst, vecD src, memory mem) %{ 7809 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7810 match(Set dst (MulVI src (LoadVector mem))); 7811 format %{ "vpmulld $dst,$src,$mem\t! mul packed2I" %} 7812 ins_encode %{ 7813 int vector_len = 0; 7814 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7815 %} 7816 ins_pipe( pipe_slow ); 7817 %} 7818 7819 instruct vmul4I(vecX dst, vecX src) %{ 7820 predicate(UseSSE > 3 && n->as_Vector()->length() == 4); 7821 match(Set dst (MulVI dst src)); 7822 format %{ "pmulld $dst,$src\t! mul packed4I" %} 7823 ins_encode %{ 7824 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 7825 %} 7826 ins_pipe( pipe_slow ); 7827 %} 7828 7829 instruct vmul4I_reg(vecX dst, vecX src1, vecX src2) %{ 7830 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7831 match(Set dst (MulVI src1 src2)); 7832 format %{ "vpmulld $dst,$src1,$src2\t! mul packed4I" %} 7833 ins_encode %{ 7834 int vector_len = 0; 7835 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7836 %} 7837 ins_pipe( pipe_slow ); 7838 %} 7839 7840 instruct vmul4I_mem(vecX dst, vecX src, memory mem) %{ 7841 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7842 match(Set dst (MulVI src (LoadVector mem))); 7843 format %{ "vpmulld $dst,$src,$mem\t! mul packed4I" %} 7844 ins_encode %{ 7845 int vector_len = 0; 7846 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7847 %} 7848 ins_pipe( pipe_slow ); 7849 %} 7850 7851 instruct vmul2L_reg(vecX dst, vecX src1, vecX src2) %{ 7852 predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && VM_Version::supports_avx512dq()); 7853 match(Set dst (MulVL src1 src2)); 7854 format %{ "vpmullq $dst,$src1,$src2\t! mul packed2L" %} 7855 ins_encode %{ 7856 int vector_len = 0; 7857 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7858 %} 7859 ins_pipe( pipe_slow ); 7860 %} 7861 7862 instruct vmul2L_mem(vecX dst, vecX src, memory mem) %{ 7863 predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && VM_Version::supports_avx512dq()); 7864 match(Set dst (MulVL src (LoadVector mem))); 7865 format %{ "vpmullq $dst,$src,$mem\t! mul packed2L" %} 7866 ins_encode %{ 7867 int vector_len = 0; 7868 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7869 %} 7870 ins_pipe( pipe_slow ); 7871 %} 7872 7873 instruct vmul4L_reg(vecY dst, vecY src1, vecY src2) %{ 7874 predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && VM_Version::supports_avx512dq()); 7875 match(Set dst (MulVL src1 src2)); 7876 format %{ "vpmullq $dst,$src1,$src2\t! mul packed4L" %} 7877 ins_encode %{ 7878 int vector_len = 1; 7879 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7880 %} 7881 ins_pipe( pipe_slow ); 7882 %} 7883 7884 instruct vmul4L_mem(vecY dst, vecY src, memory mem) %{ 7885 predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && VM_Version::supports_avx512dq()); 7886 match(Set dst (MulVL src (LoadVector mem))); 7887 format %{ "vpmullq $dst,$src,$mem\t! mul packed4L" %} 7888 ins_encode %{ 7889 int vector_len = 1; 7890 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7891 %} 7892 ins_pipe( pipe_slow ); 7893 %} 7894 7895 instruct vmul8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7896 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq()); 7897 match(Set dst (MulVL src1 src2)); 7898 format %{ "vpmullq $dst,$src1,$src2\t! mul packed8L" %} 7899 ins_encode %{ 7900 int vector_len = 2; 7901 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7902 %} 7903 ins_pipe( pipe_slow ); 7904 %} 7905 7906 instruct vmul8L_mem(vecZ dst, vecZ src, memory mem) %{ 7907 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq()); 7908 match(Set dst (MulVL src (LoadVector mem))); 7909 format %{ "vpmullq $dst,$src,$mem\t! mul packed8L" %} 7910 ins_encode %{ 7911 int vector_len = 2; 7912 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7913 %} 7914 ins_pipe( pipe_slow ); 7915 %} 7916 7917 instruct vmul8I_reg(vecY dst, vecY src1, vecY src2) %{ 7918 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 7919 match(Set dst (MulVI src1 src2)); 7920 format %{ "vpmulld $dst,$src1,$src2\t! mul packed8I" %} 7921 ins_encode %{ 7922 int vector_len = 1; 7923 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7924 %} 7925 ins_pipe( pipe_slow ); 7926 %} 7927 7928 instruct vmul8I_mem(vecY dst, vecY src, memory mem) %{ 7929 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 7930 match(Set dst (MulVI src (LoadVector mem))); 7931 format %{ "vpmulld $dst,$src,$mem\t! mul packed8I" %} 7932 ins_encode %{ 7933 int vector_len = 1; 7934 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7935 %} 7936 ins_pipe( pipe_slow ); 7937 %} 7938 7939 instruct vmul16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7940 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7941 match(Set dst (MulVI src1 src2)); 7942 format %{ "vpmulld $dst,$src1,$src2\t! mul packed16I" %} 7943 ins_encode %{ 7944 int vector_len = 2; 7945 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7946 %} 7947 ins_pipe( pipe_slow ); 7948 %} 7949 7950 instruct vmul16I_mem(vecZ dst, vecZ src, memory mem) %{ 7951 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7952 match(Set dst (MulVI src (LoadVector mem))); 7953 format %{ "vpmulld $dst,$src,$mem\t! mul packed16I" %} 7954 ins_encode %{ 7955 int vector_len = 2; 7956 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7957 %} 7958 ins_pipe( pipe_slow ); 7959 %} 7960 7961 // Floats vector mul 7962 instruct vmul2F(vecD dst, vecD src) %{ 7963 predicate(n->as_Vector()->length() == 2); 7964 match(Set dst (MulVF dst src)); 7965 format %{ "mulps $dst,$src\t! mul packed2F" %} 7966 ins_encode %{ 7967 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 7968 %} 7969 ins_pipe( pipe_slow ); 7970 %} 7971 7972 instruct vmul2F_reg(vecD dst, vecD src1, vecD src2) %{ 7973 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7974 match(Set dst (MulVF src1 src2)); 7975 format %{ "vmulps $dst,$src1,$src2\t! mul packed2F" %} 7976 ins_encode %{ 7977 int vector_len = 0; 7978 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7979 %} 7980 ins_pipe( pipe_slow ); 7981 %} 7982 7983 instruct vmul2F_mem(vecD dst, vecD src, memory mem) %{ 7984 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7985 match(Set dst (MulVF src (LoadVector mem))); 7986 format %{ "vmulps $dst,$src,$mem\t! mul packed2F" %} 7987 ins_encode %{ 7988 int vector_len = 0; 7989 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7990 %} 7991 ins_pipe( pipe_slow ); 7992 %} 7993 7994 instruct vmul4F(vecX dst, vecX src) %{ 7995 predicate(n->as_Vector()->length() == 4); 7996 match(Set dst (MulVF dst src)); 7997 format %{ "mulps $dst,$src\t! mul packed4F" %} 7998 ins_encode %{ 7999 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 8000 %} 8001 ins_pipe( pipe_slow ); 8002 %} 8003 8004 instruct vmul4F_reg(vecX dst, vecX src1, vecX src2) %{ 8005 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8006 match(Set dst (MulVF src1 src2)); 8007 format %{ "vmulps $dst,$src1,$src2\t! mul packed4F" %} 8008 ins_encode %{ 8009 int vector_len = 0; 8010 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8011 %} 8012 ins_pipe( pipe_slow ); 8013 %} 8014 8015 instruct vmul4F_mem(vecX dst, vecX src, memory mem) %{ 8016 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8017 match(Set dst (MulVF src (LoadVector mem))); 8018 format %{ "vmulps $dst,$src,$mem\t! mul packed4F" %} 8019 ins_encode %{ 8020 int vector_len = 0; 8021 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8022 %} 8023 ins_pipe( pipe_slow ); 8024 %} 8025 8026 instruct vmul8F_reg(vecY dst, vecY src1, vecY src2) %{ 8027 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8028 match(Set dst (MulVF src1 src2)); 8029 format %{ "vmulps $dst,$src1,$src2\t! mul packed8F" %} 8030 ins_encode %{ 8031 int vector_len = 1; 8032 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8033 %} 8034 ins_pipe( pipe_slow ); 8035 %} 8036 8037 instruct vmul8F_mem(vecY dst, vecY src, memory mem) %{ 8038 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8039 match(Set dst (MulVF src (LoadVector mem))); 8040 format %{ "vmulps $dst,$src,$mem\t! mul packed8F" %} 8041 ins_encode %{ 8042 int vector_len = 1; 8043 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8044 %} 8045 ins_pipe( pipe_slow ); 8046 %} 8047 8048 instruct vmul16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8049 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8050 match(Set dst (MulVF src1 src2)); 8051 format %{ "vmulps $dst,$src1,$src2\t! mul packed16F" %} 8052 ins_encode %{ 8053 int vector_len = 2; 8054 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8055 %} 8056 ins_pipe( pipe_slow ); 8057 %} 8058 8059 instruct vmul16F_mem(vecZ dst, vecZ src, memory mem) %{ 8060 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8061 match(Set dst (MulVF src (LoadVector mem))); 8062 format %{ "vmulps $dst,$src,$mem\t! mul packed16F" %} 8063 ins_encode %{ 8064 int vector_len = 2; 8065 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8066 %} 8067 ins_pipe( pipe_slow ); 8068 %} 8069 8070 // Doubles vector mul 8071 instruct vmul2D(vecX dst, vecX src) %{ 8072 predicate(n->as_Vector()->length() == 2); 8073 match(Set dst (MulVD dst src)); 8074 format %{ "mulpd $dst,$src\t! mul packed2D" %} 8075 ins_encode %{ 8076 __ mulpd($dst$$XMMRegister, $src$$XMMRegister); 8077 %} 8078 ins_pipe( pipe_slow ); 8079 %} 8080 8081 instruct vmul2D_reg(vecX dst, vecX src1, vecX src2) %{ 8082 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8083 match(Set dst (MulVD src1 src2)); 8084 format %{ "vmulpd $dst,$src1,$src2\t! mul packed2D" %} 8085 ins_encode %{ 8086 int vector_len = 0; 8087 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8088 %} 8089 ins_pipe( pipe_slow ); 8090 %} 8091 8092 instruct vmul2D_mem(vecX dst, vecX src, memory mem) %{ 8093 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8094 match(Set dst (MulVD src (LoadVector mem))); 8095 format %{ "vmulpd $dst,$src,$mem\t! mul packed2D" %} 8096 ins_encode %{ 8097 int vector_len = 0; 8098 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8099 %} 8100 ins_pipe( pipe_slow ); 8101 %} 8102 8103 instruct vmul4D_reg(vecY dst, vecY src1, vecY src2) %{ 8104 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8105 match(Set dst (MulVD src1 src2)); 8106 format %{ "vmulpd $dst,$src1,$src2\t! mul packed4D" %} 8107 ins_encode %{ 8108 int vector_len = 1; 8109 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8110 %} 8111 ins_pipe( pipe_slow ); 8112 %} 8113 8114 instruct vmul4D_mem(vecY dst, vecY src, memory mem) %{ 8115 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8116 match(Set dst (MulVD src (LoadVector mem))); 8117 format %{ "vmulpd $dst,$src,$mem\t! mul packed4D" %} 8118 ins_encode %{ 8119 int vector_len = 1; 8120 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8121 %} 8122 ins_pipe( pipe_slow ); 8123 %} 8124 8125 instruct vmul8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8126 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8127 match(Set dst (MulVD src1 src2)); 8128 format %{ "vmulpd $dst k0,$src1,$src2\t! mul packed8D" %} 8129 ins_encode %{ 8130 int vector_len = 2; 8131 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8132 %} 8133 ins_pipe( pipe_slow ); 8134 %} 8135 8136 instruct vmul8D_mem(vecZ dst, vecZ src, memory mem) %{ 8137 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8138 match(Set dst (MulVD src (LoadVector mem))); 8139 format %{ "vmulpd $dst k0,$src,$mem\t! mul packed8D" %} 8140 ins_encode %{ 8141 int vector_len = 2; 8142 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8143 %} 8144 ins_pipe( pipe_slow ); 8145 %} 8146 8147 instruct vcmov4D_reg(vecY dst, vecY src1, vecY src2, immI8 cop, cmpOp_vcmppd copnd) %{ 8148 predicate(UseAVX > 0 && UseAVX < 3 && n->as_Vector()->length() == 4); 8149 match(Set dst (CMoveVD (Binary copnd cop) (Binary src1 src2))); 8150 effect(TEMP dst, USE src1, USE src2); 8151 format %{ "cmppd.$copnd $dst, $src1, $src2 ! vcmovevd, cond=$cop\n\t" 8152 "vpblendd $dst,$src1,$src2,$dst ! vcmovevd\n\t" 8153 %} 8154 ins_encode %{ 8155 int vector_len = 1; 8156 int cond = (Assembler::Condition)($copnd$$cmpcode); 8157 __ cmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cond, vector_len); 8158 __ vpblendd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vector_len); 8159 %} 8160 ins_pipe( pipe_slow ); 8161 %} 8162 8163 // --------------------------------- DIV -------------------------------------- 8164 8165 // Floats vector div 8166 instruct vdiv2F(vecD dst, vecD src) %{ 8167 predicate(n->as_Vector()->length() == 2); 8168 match(Set dst (DivVF dst src)); 8169 format %{ "divps $dst,$src\t! div packed2F" %} 8170 ins_encode %{ 8171 __ divps($dst$$XMMRegister, $src$$XMMRegister); 8172 %} 8173 ins_pipe( pipe_slow ); 8174 %} 8175 8176 instruct vdiv2F_reg(vecD dst, vecD src1, vecD src2) %{ 8177 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8178 match(Set dst (DivVF src1 src2)); 8179 format %{ "vdivps $dst,$src1,$src2\t! div packed2F" %} 8180 ins_encode %{ 8181 int vector_len = 0; 8182 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8183 %} 8184 ins_pipe( pipe_slow ); 8185 %} 8186 8187 instruct vdiv2F_mem(vecD dst, vecD src, memory mem) %{ 8188 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8189 match(Set dst (DivVF src (LoadVector mem))); 8190 format %{ "vdivps $dst,$src,$mem\t! div packed2F" %} 8191 ins_encode %{ 8192 int vector_len = 0; 8193 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8194 %} 8195 ins_pipe( pipe_slow ); 8196 %} 8197 8198 instruct vdiv4F(vecX dst, vecX src) %{ 8199 predicate(n->as_Vector()->length() == 4); 8200 match(Set dst (DivVF dst src)); 8201 format %{ "divps $dst,$src\t! div packed4F" %} 8202 ins_encode %{ 8203 __ divps($dst$$XMMRegister, $src$$XMMRegister); 8204 %} 8205 ins_pipe( pipe_slow ); 8206 %} 8207 8208 instruct vdiv4F_reg(vecX dst, vecX src1, vecX src2) %{ 8209 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8210 match(Set dst (DivVF src1 src2)); 8211 format %{ "vdivps $dst,$src1,$src2\t! div packed4F" %} 8212 ins_encode %{ 8213 int vector_len = 0; 8214 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8215 %} 8216 ins_pipe( pipe_slow ); 8217 %} 8218 8219 instruct vdiv4F_mem(vecX dst, vecX src, memory mem) %{ 8220 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8221 match(Set dst (DivVF src (LoadVector mem))); 8222 format %{ "vdivps $dst,$src,$mem\t! div packed4F" %} 8223 ins_encode %{ 8224 int vector_len = 0; 8225 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8226 %} 8227 ins_pipe( pipe_slow ); 8228 %} 8229 8230 instruct vdiv8F_reg(vecY dst, vecY src1, vecY src2) %{ 8231 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8232 match(Set dst (DivVF src1 src2)); 8233 format %{ "vdivps $dst,$src1,$src2\t! div packed8F" %} 8234 ins_encode %{ 8235 int vector_len = 1; 8236 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8237 %} 8238 ins_pipe( pipe_slow ); 8239 %} 8240 8241 instruct vdiv8F_mem(vecY dst, vecY src, memory mem) %{ 8242 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8243 match(Set dst (DivVF src (LoadVector mem))); 8244 format %{ "vdivps $dst,$src,$mem\t! div packed8F" %} 8245 ins_encode %{ 8246 int vector_len = 1; 8247 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8248 %} 8249 ins_pipe( pipe_slow ); 8250 %} 8251 8252 instruct vdiv16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8253 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 8254 match(Set dst (DivVF src1 src2)); 8255 format %{ "vdivps $dst,$src1,$src2\t! div packed16F" %} 8256 ins_encode %{ 8257 int vector_len = 2; 8258 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8259 %} 8260 ins_pipe( pipe_slow ); 8261 %} 8262 8263 instruct vdiv16F_mem(vecZ dst, vecZ src, memory mem) %{ 8264 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 8265 match(Set dst (DivVF src (LoadVector mem))); 8266 format %{ "vdivps $dst,$src,$mem\t! div packed16F" %} 8267 ins_encode %{ 8268 int vector_len = 2; 8269 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8270 %} 8271 ins_pipe( pipe_slow ); 8272 %} 8273 8274 // Doubles vector div 8275 instruct vdiv2D(vecX dst, vecX src) %{ 8276 predicate(n->as_Vector()->length() == 2); 8277 match(Set dst (DivVD dst src)); 8278 format %{ "divpd $dst,$src\t! div packed2D" %} 8279 ins_encode %{ 8280 __ divpd($dst$$XMMRegister, $src$$XMMRegister); 8281 %} 8282 ins_pipe( pipe_slow ); 8283 %} 8284 8285 instruct vdiv2D_reg(vecX dst, vecX src1, vecX src2) %{ 8286 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8287 match(Set dst (DivVD src1 src2)); 8288 format %{ "vdivpd $dst,$src1,$src2\t! div packed2D" %} 8289 ins_encode %{ 8290 int vector_len = 0; 8291 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8292 %} 8293 ins_pipe( pipe_slow ); 8294 %} 8295 8296 instruct vdiv2D_mem(vecX dst, vecX src, memory mem) %{ 8297 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8298 match(Set dst (DivVD src (LoadVector mem))); 8299 format %{ "vdivpd $dst,$src,$mem\t! div packed2D" %} 8300 ins_encode %{ 8301 int vector_len = 0; 8302 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8303 %} 8304 ins_pipe( pipe_slow ); 8305 %} 8306 8307 instruct vdiv4D_reg(vecY dst, vecY src1, vecY src2) %{ 8308 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8309 match(Set dst (DivVD src1 src2)); 8310 format %{ "vdivpd $dst,$src1,$src2\t! div packed4D" %} 8311 ins_encode %{ 8312 int vector_len = 1; 8313 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8314 %} 8315 ins_pipe( pipe_slow ); 8316 %} 8317 8318 instruct vdiv4D_mem(vecY dst, vecY src, memory mem) %{ 8319 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8320 match(Set dst (DivVD src (LoadVector mem))); 8321 format %{ "vdivpd $dst,$src,$mem\t! div packed4D" %} 8322 ins_encode %{ 8323 int vector_len = 1; 8324 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8325 %} 8326 ins_pipe( pipe_slow ); 8327 %} 8328 8329 instruct vdiv8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8330 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8331 match(Set dst (DivVD src1 src2)); 8332 format %{ "vdivpd $dst,$src1,$src2\t! div packed8D" %} 8333 ins_encode %{ 8334 int vector_len = 2; 8335 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8336 %} 8337 ins_pipe( pipe_slow ); 8338 %} 8339 8340 instruct vdiv8D_mem(vecZ dst, vecZ src, memory mem) %{ 8341 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8342 match(Set dst (DivVD src (LoadVector mem))); 8343 format %{ "vdivpd $dst,$src,$mem\t! div packed8D" %} 8344 ins_encode %{ 8345 int vector_len = 2; 8346 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8347 %} 8348 ins_pipe( pipe_slow ); 8349 %} 8350 8351 // ------------------------------ Shift --------------------------------------- 8352 8353 // Left and right shift count vectors are the same on x86 8354 // (only lowest bits of xmm reg are used for count). 8355 instruct vshiftcnt(vecS dst, rRegI cnt) %{ 8356 match(Set dst (LShiftCntV cnt)); 8357 match(Set dst (RShiftCntV cnt)); 8358 format %{ "movd $dst,$cnt\t! load shift count" %} 8359 ins_encode %{ 8360 __ movdl($dst$$XMMRegister, $cnt$$Register); 8361 %} 8362 ins_pipe( pipe_slow ); 8363 %} 8364 8365 // --------------------------------- Sqrt -------------------------------------- 8366 8367 // Floating point vector sqrt - double precision only 8368 instruct vsqrt2D_reg(vecX dst, vecX src) %{ 8369 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8370 match(Set dst (SqrtVD src)); 8371 format %{ "vsqrtpd $dst,$src\t! sqrt packed2D" %} 8372 ins_encode %{ 8373 int vector_len = 0; 8374 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8375 %} 8376 ins_pipe( pipe_slow ); 8377 %} 8378 8379 instruct vsqrt2D_mem(vecX dst, memory mem) %{ 8380 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8381 match(Set dst (SqrtVD (LoadVector mem))); 8382 format %{ "vsqrtpd $dst,$mem\t! sqrt packed2D" %} 8383 ins_encode %{ 8384 int vector_len = 0; 8385 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); 8386 %} 8387 ins_pipe( pipe_slow ); 8388 %} 8389 8390 instruct vsqrt4D_reg(vecY dst, vecY src) %{ 8391 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8392 match(Set dst (SqrtVD src)); 8393 format %{ "vsqrtpd $dst,$src\t! sqrt packed4D" %} 8394 ins_encode %{ 8395 int vector_len = 1; 8396 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8397 %} 8398 ins_pipe( pipe_slow ); 8399 %} 8400 8401 instruct vsqrt4D_mem(vecY dst, memory mem) %{ 8402 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8403 match(Set dst (SqrtVD (LoadVector mem))); 8404 format %{ "vsqrtpd $dst,$mem\t! sqrt packed4D" %} 8405 ins_encode %{ 8406 int vector_len = 1; 8407 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); 8408 %} 8409 ins_pipe( pipe_slow ); 8410 %} 8411 8412 instruct vsqrt8D_reg(vecZ dst, vecZ src) %{ 8413 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8414 match(Set dst (SqrtVD src)); 8415 format %{ "vsqrtpd $dst,$src\t! sqrt packed8D" %} 8416 ins_encode %{ 8417 int vector_len = 2; 8418 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8419 %} 8420 ins_pipe( pipe_slow ); 8421 %} 8422 8423 instruct vsqrt8D_mem(vecZ dst, memory mem) %{ 8424 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8425 match(Set dst (SqrtVD (LoadVector mem))); 8426 format %{ "vsqrtpd $dst,$mem\t! sqrt packed8D" %} 8427 ins_encode %{ 8428 int vector_len = 2; 8429 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); 8430 %} 8431 ins_pipe( pipe_slow ); 8432 %} 8433 8434 // ------------------------------ LeftShift ----------------------------------- 8435 8436 // Shorts/Chars vector left shift 8437 instruct vsll2S(vecS dst, vecS shift) %{ 8438 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8439 match(Set dst (LShiftVS dst shift)); 8440 format %{ "psllw $dst,$shift\t! left shift packed2S" %} 8441 ins_encode %{ 8442 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 8443 %} 8444 ins_pipe( pipe_slow ); 8445 %} 8446 8447 instruct vsll2S_imm(vecS dst, immI8 shift) %{ 8448 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8449 match(Set dst (LShiftVS dst shift)); 8450 format %{ "psllw $dst,$shift\t! left shift packed2S" %} 8451 ins_encode %{ 8452 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 8453 %} 8454 ins_pipe( pipe_slow ); 8455 %} 8456 8457 instruct vsll2S_reg_avx(vecS dst, vecS src, vecS shift) %{ 8458 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 8459 match(Set dst (LShiftVS src shift)); 8460 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 8461 ins_encode %{ 8462 int vector_len = 0; 8463 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8464 %} 8465 ins_pipe( pipe_slow ); 8466 %} 8467 8468 instruct vsll2S_reg_evex(vecS dst, vecS src, vecS shift) %{ 8469 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 8470 match(Set dst (LShiftVS src shift)); 8471 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 8472 ins_encode %{ 8473 int vector_len = 0; 8474 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8475 %} 8476 ins_pipe( pipe_slow ); 8477 %} 8478 8479 instruct vsll2S_reg_evex_special(vecS dst, vecS src, vecS shift) %{ 8480 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 8481 match(Set dst (LShiftVS dst shift)); 8482 effect(TEMP src); 8483 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 8484 ins_encode %{ 8485 int vector_len = 0; 8486 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8487 %} 8488 ins_pipe( pipe_slow ); 8489 %} 8490 8491 instruct vsll2S_reg_imm_avx(vecS dst, vecS src, immI8 shift) %{ 8492 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 8493 match(Set dst (LShiftVS src shift)); 8494 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 8495 ins_encode %{ 8496 int vector_len = 0; 8497 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8498 %} 8499 ins_pipe( pipe_slow ); 8500 %} 8501 8502 instruct vsll2S_reg_imm_evex(vecS dst, vecS src, immI8 shift) %{ 8503 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 8504 match(Set dst (LShiftVS src shift)); 8505 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 8506 ins_encode %{ 8507 int vector_len = 0; 8508 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8509 %} 8510 ins_pipe( pipe_slow ); 8511 %} 8512 8513 instruct vsll2S_reg_imm_evex_special(vecS dst, vecS src, immI8 shift) %{ 8514 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 8515 match(Set dst (LShiftVS dst shift)); 8516 effect(TEMP src); 8517 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 8518 ins_encode %{ 8519 int vector_len = 0; 8520 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8521 %} 8522 ins_pipe( pipe_slow ); 8523 %} 8524 8525 instruct vsll4S(vecD dst, vecS shift) %{ 8526 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 8527 match(Set dst (LShiftVS dst shift)); 8528 format %{ "psllw $dst,$shift\t! left shift packed4S" %} 8529 ins_encode %{ 8530 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 8531 %} 8532 ins_pipe( pipe_slow ); 8533 %} 8534 8535 instruct vsll4S_imm(vecD dst, immI8 shift) %{ 8536 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 8537 match(Set dst (LShiftVS dst shift)); 8538 format %{ "psllw $dst,$shift\t! left shift packed4S" %} 8539 ins_encode %{ 8540 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 8541 %} 8542 ins_pipe( pipe_slow ); 8543 %} 8544 8545 instruct vsll4S_reg_avx(vecD dst, vecD src, vecS shift) %{ 8546 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 8547 match(Set dst (LShiftVS src shift)); 8548 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 8549 ins_encode %{ 8550 int vector_len = 0; 8551 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8552 %} 8553 ins_pipe( pipe_slow ); 8554 %} 8555 8556 instruct vsll4S_reg_evex(vecD dst, vecD src, vecS shift) %{ 8557 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 8558 match(Set dst (LShiftVS src shift)); 8559 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 8560 ins_encode %{ 8561 int vector_len = 0; 8562 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8563 %} 8564 ins_pipe( pipe_slow ); 8565 %} 8566 8567 instruct vsll4S_reg_evex_special(vecD dst, vecD src, vecS shift) %{ 8568 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 8569 match(Set dst (LShiftVS dst shift)); 8570 effect(TEMP src); 8571 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 8572 ins_encode %{ 8573 int vector_len = 0; 8574 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8575 %} 8576 ins_pipe( pipe_slow ); 8577 %} 8578 8579 instruct vsll4S_reg_imm_avx(vecD dst, vecD src, immI8 shift) %{ 8580 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 8581 match(Set dst (LShiftVS src shift)); 8582 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 8583 ins_encode %{ 8584 int vector_len = 0; 8585 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8586 %} 8587 ins_pipe( pipe_slow ); 8588 %} 8589 8590 instruct vsll4S_reg_imm_evex(vecD dst, vecD src, immI8 shift) %{ 8591 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 8592 match(Set dst (LShiftVS src shift)); 8593 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 8594 ins_encode %{ 8595 int vector_len = 0; 8596 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8597 %} 8598 ins_pipe( pipe_slow ); 8599 %} 8600 8601 instruct vsll4S_reg_imm_evex_special(vecD dst, vecD src, immI8 shift) %{ 8602 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 8603 match(Set dst (LShiftVS dst shift)); 8604 effect(TEMP src); 8605 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 8606 ins_encode %{ 8607 int vector_len = 0; 8608 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8609 %} 8610 ins_pipe( pipe_slow ); 8611 %} 8612 8613 instruct vsll8S(vecX dst, vecS shift) %{ 8614 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 8615 match(Set dst (LShiftVS dst shift)); 8616 format %{ "psllw $dst,$shift\t! left shift packed8S" %} 8617 ins_encode %{ 8618 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 8619 %} 8620 ins_pipe( pipe_slow ); 8621 %} 8622 8623 instruct vsll8S_imm(vecX dst, immI8 shift) %{ 8624 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 8625 match(Set dst (LShiftVS dst shift)); 8626 format %{ "psllw $dst,$shift\t! left shift packed8S" %} 8627 ins_encode %{ 8628 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 8629 %} 8630 ins_pipe( pipe_slow ); 8631 %} 8632 8633 instruct vsll8S_reg_avx(vecX dst, vecX src, vecS shift) %{ 8634 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 8635 match(Set dst (LShiftVS src shift)); 8636 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 8637 ins_encode %{ 8638 int vector_len = 0; 8639 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8640 %} 8641 ins_pipe( pipe_slow ); 8642 %} 8643 8644 instruct vsll8S_reg_evex(vecX dst, vecX src, vecS shift) %{ 8645 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 8646 match(Set dst (LShiftVS src shift)); 8647 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 8648 ins_encode %{ 8649 int vector_len = 0; 8650 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8651 %} 8652 ins_pipe( pipe_slow ); 8653 %} 8654 8655 instruct vsll8S_reg_evex_special(vecX dst, vecX src, vecS shift) %{ 8656 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 8657 match(Set dst (LShiftVS dst shift)); 8658 effect(TEMP src); 8659 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 8660 ins_encode %{ 8661 int vector_len = 0; 8662 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8663 %} 8664 ins_pipe( pipe_slow ); 8665 %} 8666 8667 instruct vsll8S_reg_imm_avx(vecX dst, vecX src, immI8 shift) %{ 8668 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 8669 match(Set dst (LShiftVS src shift)); 8670 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 8671 ins_encode %{ 8672 int vector_len = 0; 8673 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8674 %} 8675 ins_pipe( pipe_slow ); 8676 %} 8677 8678 instruct vsll8S_reg_imm_evex(vecX dst, vecX src, immI8 shift) %{ 8679 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 8680 match(Set dst (LShiftVS src shift)); 8681 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 8682 ins_encode %{ 8683 int vector_len = 0; 8684 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8685 %} 8686 ins_pipe( pipe_slow ); 8687 %} 8688 8689 instruct vsll8S_reg_imm_evex_special(vecX dst, vecX src, immI8 shift) %{ 8690 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 8691 match(Set dst (LShiftVS dst shift)); 8692 effect(TEMP src); 8693 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 8694 ins_encode %{ 8695 int vector_len = 0; 8696 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8697 %} 8698 ins_pipe( pipe_slow ); 8699 %} 8700 8701 instruct vsll16S_reg_avx(vecY dst, vecY src, vecS shift) %{ 8702 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 8703 match(Set dst (LShiftVS src shift)); 8704 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 8705 ins_encode %{ 8706 int vector_len = 1; 8707 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8708 %} 8709 ins_pipe( pipe_slow ); 8710 %} 8711 8712 instruct vsll16S_reg_evex(vecY dst, vecY src, vecS shift) %{ 8713 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 8714 match(Set dst (LShiftVS src shift)); 8715 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 8716 ins_encode %{ 8717 int vector_len = 1; 8718 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8719 %} 8720 ins_pipe( pipe_slow ); 8721 %} 8722 8723 instruct vsll16S_reg_evex_special(vecY dst, vecY src, vecS shift) %{ 8724 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 8725 match(Set dst (LShiftVS dst shift)); 8726 effect(TEMP src); 8727 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 8728 ins_encode %{ 8729 int vector_len = 1; 8730 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8731 %} 8732 ins_pipe( pipe_slow ); 8733 %} 8734 8735 instruct vsll16S_reg_imm_avx(vecY dst, vecY src, immI8 shift) %{ 8736 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 8737 match(Set dst (LShiftVS src shift)); 8738 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 8739 ins_encode %{ 8740 int vector_len = 1; 8741 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8742 %} 8743 ins_pipe( pipe_slow ); 8744 %} 8745 8746 instruct vsll16S_reg_imm_evex(vecY dst, vecY src, immI8 shift) %{ 8747 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 8748 match(Set dst (LShiftVS src shift)); 8749 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 8750 ins_encode %{ 8751 int vector_len = 1; 8752 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8753 %} 8754 ins_pipe( pipe_slow ); 8755 %} 8756 8757 instruct vsll16S_reg_imm_evex_special(vecY dst, vecY src, immI8 shift) %{ 8758 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 8759 match(Set dst (LShiftVS dst shift)); 8760 effect(TEMP src); 8761 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 8762 ins_encode %{ 8763 int vector_len = 1; 8764 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8765 %} 8766 ins_pipe( pipe_slow ); 8767 %} 8768 8769 instruct vsll32S_reg(vecZ dst, vecZ src, vecS shift) %{ 8770 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 8771 match(Set dst (LShiftVS src shift)); 8772 format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %} 8773 ins_encode %{ 8774 int vector_len = 2; 8775 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8776 %} 8777 ins_pipe( pipe_slow ); 8778 %} 8779 8780 instruct vsll32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 8781 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 8782 match(Set dst (LShiftVS src shift)); 8783 format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %} 8784 ins_encode %{ 8785 int vector_len = 2; 8786 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8787 %} 8788 ins_pipe( pipe_slow ); 8789 %} 8790 8791 // Integers vector left shift 8792 instruct vsll2I(vecD dst, vecS shift) %{ 8793 predicate(n->as_Vector()->length() == 2); 8794 match(Set dst (LShiftVI dst shift)); 8795 format %{ "pslld $dst,$shift\t! left shift packed2I" %} 8796 ins_encode %{ 8797 __ pslld($dst$$XMMRegister, $shift$$XMMRegister); 8798 %} 8799 ins_pipe( pipe_slow ); 8800 %} 8801 8802 instruct vsll2I_imm(vecD dst, immI8 shift) %{ 8803 predicate(n->as_Vector()->length() == 2); 8804 match(Set dst (LShiftVI dst shift)); 8805 format %{ "pslld $dst,$shift\t! left shift packed2I" %} 8806 ins_encode %{ 8807 __ pslld($dst$$XMMRegister, (int)$shift$$constant); 8808 %} 8809 ins_pipe( pipe_slow ); 8810 %} 8811 8812 instruct vsll2I_reg(vecD dst, vecD src, vecS shift) %{ 8813 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8814 match(Set dst (LShiftVI src shift)); 8815 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} 8816 ins_encode %{ 8817 int vector_len = 0; 8818 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8819 %} 8820 ins_pipe( pipe_slow ); 8821 %} 8822 8823 instruct vsll2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 8824 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8825 match(Set dst (LShiftVI src shift)); 8826 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} 8827 ins_encode %{ 8828 int vector_len = 0; 8829 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8830 %} 8831 ins_pipe( pipe_slow ); 8832 %} 8833 8834 instruct vsll4I(vecX dst, vecS shift) %{ 8835 predicate(n->as_Vector()->length() == 4); 8836 match(Set dst (LShiftVI dst shift)); 8837 format %{ "pslld $dst,$shift\t! left shift packed4I" %} 8838 ins_encode %{ 8839 __ pslld($dst$$XMMRegister, $shift$$XMMRegister); 8840 %} 8841 ins_pipe( pipe_slow ); 8842 %} 8843 8844 instruct vsll4I_imm(vecX dst, immI8 shift) %{ 8845 predicate(n->as_Vector()->length() == 4); 8846 match(Set dst (LShiftVI dst shift)); 8847 format %{ "pslld $dst,$shift\t! left shift packed4I" %} 8848 ins_encode %{ 8849 __ pslld($dst$$XMMRegister, (int)$shift$$constant); 8850 %} 8851 ins_pipe( pipe_slow ); 8852 %} 8853 8854 instruct vsll4I_reg(vecX dst, vecX src, vecS shift) %{ 8855 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8856 match(Set dst (LShiftVI src shift)); 8857 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} 8858 ins_encode %{ 8859 int vector_len = 0; 8860 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8861 %} 8862 ins_pipe( pipe_slow ); 8863 %} 8864 8865 instruct vsll4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 8866 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8867 match(Set dst (LShiftVI src shift)); 8868 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} 8869 ins_encode %{ 8870 int vector_len = 0; 8871 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8872 %} 8873 ins_pipe( pipe_slow ); 8874 %} 8875 8876 instruct vsll8I_reg(vecY dst, vecY src, vecS shift) %{ 8877 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 8878 match(Set dst (LShiftVI src shift)); 8879 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} 8880 ins_encode %{ 8881 int vector_len = 1; 8882 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8883 %} 8884 ins_pipe( pipe_slow ); 8885 %} 8886 8887 instruct vsll8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 8888 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 8889 match(Set dst (LShiftVI src shift)); 8890 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} 8891 ins_encode %{ 8892 int vector_len = 1; 8893 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8894 %} 8895 ins_pipe( pipe_slow ); 8896 %} 8897 8898 instruct vsll16I_reg(vecZ dst, vecZ src, vecS shift) %{ 8899 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8900 match(Set dst (LShiftVI src shift)); 8901 format %{ "vpslld $dst,$src,$shift\t! left shift packed16I" %} 8902 ins_encode %{ 8903 int vector_len = 2; 8904 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8905 %} 8906 ins_pipe( pipe_slow ); 8907 %} 8908 8909 instruct vsll16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 8910 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8911 match(Set dst (LShiftVI src shift)); 8912 format %{ "vpslld $dst,$src,$shift\t! left shift packed16I" %} 8913 ins_encode %{ 8914 int vector_len = 2; 8915 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8916 %} 8917 ins_pipe( pipe_slow ); 8918 %} 8919 8920 // Longs vector left shift 8921 instruct vsll2L(vecX dst, vecS shift) %{ 8922 predicate(n->as_Vector()->length() == 2); 8923 match(Set dst (LShiftVL dst shift)); 8924 format %{ "psllq $dst,$shift\t! left shift packed2L" %} 8925 ins_encode %{ 8926 __ psllq($dst$$XMMRegister, $shift$$XMMRegister); 8927 %} 8928 ins_pipe( pipe_slow ); 8929 %} 8930 8931 instruct vsll2L_imm(vecX dst, immI8 shift) %{ 8932 predicate(n->as_Vector()->length() == 2); 8933 match(Set dst (LShiftVL dst shift)); 8934 format %{ "psllq $dst,$shift\t! left shift packed2L" %} 8935 ins_encode %{ 8936 __ psllq($dst$$XMMRegister, (int)$shift$$constant); 8937 %} 8938 ins_pipe( pipe_slow ); 8939 %} 8940 8941 instruct vsll2L_reg(vecX dst, vecX src, vecS shift) %{ 8942 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8943 match(Set dst (LShiftVL src shift)); 8944 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} 8945 ins_encode %{ 8946 int vector_len = 0; 8947 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8948 %} 8949 ins_pipe( pipe_slow ); 8950 %} 8951 8952 instruct vsll2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ 8953 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8954 match(Set dst (LShiftVL src shift)); 8955 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} 8956 ins_encode %{ 8957 int vector_len = 0; 8958 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8959 %} 8960 ins_pipe( pipe_slow ); 8961 %} 8962 8963 instruct vsll4L_reg(vecY dst, vecY src, vecS shift) %{ 8964 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 8965 match(Set dst (LShiftVL src shift)); 8966 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} 8967 ins_encode %{ 8968 int vector_len = 1; 8969 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8970 %} 8971 ins_pipe( pipe_slow ); 8972 %} 8973 8974 instruct vsll4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ 8975 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 8976 match(Set dst (LShiftVL src shift)); 8977 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} 8978 ins_encode %{ 8979 int vector_len = 1; 8980 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8981 %} 8982 ins_pipe( pipe_slow ); 8983 %} 8984 8985 instruct vsll8L_reg(vecZ dst, vecZ src, vecS shift) %{ 8986 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8987 match(Set dst (LShiftVL src shift)); 8988 format %{ "vpsllq $dst,$src,$shift\t! left shift packed8L" %} 8989 ins_encode %{ 8990 int vector_len = 2; 8991 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8992 %} 8993 ins_pipe( pipe_slow ); 8994 %} 8995 8996 instruct vsll8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 8997 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8998 match(Set dst (LShiftVL src shift)); 8999 format %{ "vpsllq $dst,$src,$shift\t! left shift packed8L" %} 9000 ins_encode %{ 9001 int vector_len = 2; 9002 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9003 %} 9004 ins_pipe( pipe_slow ); 9005 %} 9006 9007 // ----------------------- LogicalRightShift ----------------------------------- 9008 9009 // Shorts vector logical right shift produces incorrect Java result 9010 // for negative data because java code convert short value into int with 9011 // sign extension before a shift. But char vectors are fine since chars are 9012 // unsigned values. 9013 9014 instruct vsrl2S(vecS dst, vecS shift) %{ 9015 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 9016 match(Set dst (URShiftVS dst shift)); 9017 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} 9018 ins_encode %{ 9019 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 9020 %} 9021 ins_pipe( pipe_slow ); 9022 %} 9023 9024 instruct vsrl2S_imm(vecS dst, immI8 shift) %{ 9025 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 9026 match(Set dst (URShiftVS dst shift)); 9027 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} 9028 ins_encode %{ 9029 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 9030 %} 9031 ins_pipe( pipe_slow ); 9032 %} 9033 9034 instruct vsrl2S_reg_avx(vecS dst, vecS src, vecS shift) %{ 9035 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 9036 match(Set dst (URShiftVS src shift)); 9037 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 9038 ins_encode %{ 9039 int vector_len = 0; 9040 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9041 %} 9042 ins_pipe( pipe_slow ); 9043 %} 9044 9045 instruct vsrl2S_reg_evex(vecS dst, vecS src, vecS shift) %{ 9046 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 9047 match(Set dst (URShiftVS src shift)); 9048 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 9049 ins_encode %{ 9050 int vector_len = 0; 9051 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9052 %} 9053 ins_pipe( pipe_slow ); 9054 %} 9055 9056 instruct vsrl2S_reg_evex_special(vecS dst, vecS src, vecS shift) %{ 9057 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 9058 match(Set dst (URShiftVS dst shift)); 9059 effect(TEMP src); 9060 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 9061 ins_encode %{ 9062 int vector_len = 0; 9063 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9064 %} 9065 ins_pipe( pipe_slow ); 9066 %} 9067 9068 instruct vsrl2S_reg_imm_avx(vecS dst, vecS src, immI8 shift) %{ 9069 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 9070 match(Set dst (URShiftVS src shift)); 9071 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 9072 ins_encode %{ 9073 int vector_len = 0; 9074 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9075 %} 9076 ins_pipe( pipe_slow ); 9077 %} 9078 9079 instruct vsrl2S_reg_imm_evex(vecS dst, vecS src, immI8 shift) %{ 9080 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 9081 match(Set dst (URShiftVS src shift)); 9082 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 9083 ins_encode %{ 9084 int vector_len = 0; 9085 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9086 %} 9087 ins_pipe( pipe_slow ); 9088 %} 9089 9090 instruct vsrl2S_reg_imm_evex_special(vecS dst, vecS src, immI8 shift) %{ 9091 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 9092 match(Set dst (URShiftVS dst shift)); 9093 effect(TEMP src); 9094 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 9095 ins_encode %{ 9096 int vector_len = 0; 9097 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9098 %} 9099 ins_pipe( pipe_slow ); 9100 %} 9101 9102 instruct vsrl4S(vecD dst, vecS shift) %{ 9103 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 9104 match(Set dst (URShiftVS dst shift)); 9105 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} 9106 ins_encode %{ 9107 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 9108 %} 9109 ins_pipe( pipe_slow ); 9110 %} 9111 9112 instruct vsrl4S_imm(vecD dst, immI8 shift) %{ 9113 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 9114 match(Set dst (URShiftVS dst shift)); 9115 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} 9116 ins_encode %{ 9117 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 9118 %} 9119 ins_pipe( pipe_slow ); 9120 %} 9121 9122 instruct vsrl4S_reg_avx(vecD dst, vecD src, vecS shift) %{ 9123 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 9124 match(Set dst (URShiftVS src shift)); 9125 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 9126 ins_encode %{ 9127 int vector_len = 0; 9128 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9129 %} 9130 ins_pipe( pipe_slow ); 9131 %} 9132 9133 instruct vsrl4S_reg_evex(vecD dst, vecD src, vecS shift) %{ 9134 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 9135 match(Set dst (URShiftVS src shift)); 9136 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 9137 ins_encode %{ 9138 int vector_len = 0; 9139 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9140 %} 9141 ins_pipe( pipe_slow ); 9142 %} 9143 9144 instruct vsrl4S_reg_evex_special(vecD dst, vecD src, vecS shift) %{ 9145 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 9146 match(Set dst (URShiftVS dst shift)); 9147 effect(TEMP src); 9148 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 9149 ins_encode %{ 9150 int vector_len = 0; 9151 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9152 %} 9153 ins_pipe( pipe_slow ); 9154 %} 9155 9156 instruct vsrl4S_reg_imm_avx(vecD dst, vecD src, immI8 shift) %{ 9157 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 9158 match(Set dst (URShiftVS src shift)); 9159 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 9160 ins_encode %{ 9161 int vector_len = 0; 9162 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9163 %} 9164 ins_pipe( pipe_slow ); 9165 %} 9166 9167 instruct vsrl4S_reg_imm_evex(vecD dst, vecD src, immI8 shift) %{ 9168 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 9169 match(Set dst (URShiftVS src shift)); 9170 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 9171 ins_encode %{ 9172 int vector_len = 0; 9173 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9174 %} 9175 ins_pipe( pipe_slow ); 9176 %} 9177 9178 instruct vsrl4S_reg_imm_evex_special(vecD dst, vecD src, immI8 shift) %{ 9179 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 9180 match(Set dst (URShiftVS dst shift)); 9181 effect(TEMP src); 9182 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 9183 ins_encode %{ 9184 int vector_len = 0; 9185 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9186 %} 9187 ins_pipe( pipe_slow ); 9188 %} 9189 9190 instruct vsrl8S(vecX dst, vecS shift) %{ 9191 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 9192 match(Set dst (URShiftVS dst shift)); 9193 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} 9194 ins_encode %{ 9195 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 9196 %} 9197 ins_pipe( pipe_slow ); 9198 %} 9199 9200 instruct vsrl8S_imm(vecX dst, immI8 shift) %{ 9201 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 9202 match(Set dst (URShiftVS dst shift)); 9203 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} 9204 ins_encode %{ 9205 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 9206 %} 9207 ins_pipe( pipe_slow ); 9208 %} 9209 9210 instruct vsrl8S_reg_avx(vecX dst, vecX src, vecS shift) %{ 9211 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 9212 match(Set dst (URShiftVS src shift)); 9213 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 9214 ins_encode %{ 9215 int vector_len = 0; 9216 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9217 %} 9218 ins_pipe( pipe_slow ); 9219 %} 9220 9221 instruct vsrl8S_reg_evex(vecX dst, vecX src, vecS shift) %{ 9222 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 9223 match(Set dst (URShiftVS src shift)); 9224 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 9225 ins_encode %{ 9226 int vector_len = 0; 9227 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9228 %} 9229 ins_pipe( pipe_slow ); 9230 %} 9231 9232 instruct vsrl8S_reg_evex_special(vecX dst, vecX src, vecS shift) %{ 9233 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 9234 match(Set dst (URShiftVS dst shift)); 9235 effect(TEMP src); 9236 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 9237 ins_encode %{ 9238 int vector_len = 0; 9239 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9240 %} 9241 ins_pipe( pipe_slow ); 9242 %} 9243 9244 instruct vsrl8S_reg_imm_avx(vecX dst, vecX src, immI8 shift) %{ 9245 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 9246 match(Set dst (URShiftVS src shift)); 9247 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 9248 ins_encode %{ 9249 int vector_len = 0; 9250 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9251 %} 9252 ins_pipe( pipe_slow ); 9253 %} 9254 9255 instruct vsrl8S_reg_imm_evex(vecX dst, vecX src, immI8 shift) %{ 9256 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 9257 match(Set dst (URShiftVS src shift)); 9258 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 9259 ins_encode %{ 9260 int vector_len = 0; 9261 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9262 %} 9263 ins_pipe( pipe_slow ); 9264 %} 9265 9266 instruct vsrl8S_reg_imm_evex_special(vecX dst, vecX src, immI8 shift) %{ 9267 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 9268 match(Set dst (URShiftVS dst shift)); 9269 effect(TEMP src); 9270 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 9271 ins_encode %{ 9272 int vector_len = 0; 9273 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9274 %} 9275 ins_pipe( pipe_slow ); 9276 %} 9277 9278 instruct vsrl16S_reg_avx(vecY dst, vecY src, vecS shift) %{ 9279 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 9280 match(Set dst (URShiftVS src shift)); 9281 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 9282 ins_encode %{ 9283 int vector_len = 1; 9284 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9285 %} 9286 ins_pipe( pipe_slow ); 9287 %} 9288 9289 instruct vsrl16S_reg_evex(vecY dst, vecY src, vecS shift) %{ 9290 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 9291 match(Set dst (URShiftVS src shift)); 9292 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 9293 ins_encode %{ 9294 int vector_len = 1; 9295 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9296 %} 9297 ins_pipe( pipe_slow ); 9298 %} 9299 9300 instruct vsrl16S_reg_evex_special(vecY dst, vecY src, vecS shift) %{ 9301 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 9302 match(Set dst (URShiftVS dst shift)); 9303 effect(TEMP src); 9304 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 9305 ins_encode %{ 9306 int vector_len = 1; 9307 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9308 %} 9309 ins_pipe( pipe_slow ); 9310 %} 9311 9312 instruct vsrl16S_reg_imm_avx(vecY dst, vecY src, immI8 shift) %{ 9313 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 9314 match(Set dst (URShiftVS src shift)); 9315 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 9316 ins_encode %{ 9317 int vector_len = 1; 9318 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9319 %} 9320 ins_pipe( pipe_slow ); 9321 %} 9322 9323 instruct vsrl16S_reg_imm_evex(vecY dst, vecY src, immI8 shift) %{ 9324 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 9325 match(Set dst (URShiftVS src shift)); 9326 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 9327 ins_encode %{ 9328 int vector_len = 1; 9329 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9330 %} 9331 ins_pipe( pipe_slow ); 9332 %} 9333 9334 instruct vsrl16S_reg_imm_evex_special(vecY dst, vecY src, immI8 shift) %{ 9335 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 9336 match(Set dst (URShiftVS dst shift)); 9337 effect(TEMP src); 9338 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 9339 ins_encode %{ 9340 int vector_len = 1; 9341 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9342 %} 9343 ins_pipe( pipe_slow ); 9344 %} 9345 9346 instruct vsrl32S_reg(vecZ dst, vecZ src, vecS shift) %{ 9347 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 9348 match(Set dst (URShiftVS src shift)); 9349 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %} 9350 ins_encode %{ 9351 int vector_len = 2; 9352 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9353 %} 9354 ins_pipe( pipe_slow ); 9355 %} 9356 9357 instruct vsrl32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 9358 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 9359 match(Set dst (URShiftVS src shift)); 9360 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %} 9361 ins_encode %{ 9362 int vector_len = 2; 9363 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9364 %} 9365 ins_pipe( pipe_slow ); 9366 %} 9367 9368 // Integers vector logical right shift 9369 instruct vsrl2I(vecD dst, vecS shift) %{ 9370 predicate(n->as_Vector()->length() == 2); 9371 match(Set dst (URShiftVI dst shift)); 9372 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} 9373 ins_encode %{ 9374 __ psrld($dst$$XMMRegister, $shift$$XMMRegister); 9375 %} 9376 ins_pipe( pipe_slow ); 9377 %} 9378 9379 instruct vsrl2I_imm(vecD dst, immI8 shift) %{ 9380 predicate(n->as_Vector()->length() == 2); 9381 match(Set dst (URShiftVI dst shift)); 9382 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} 9383 ins_encode %{ 9384 __ psrld($dst$$XMMRegister, (int)$shift$$constant); 9385 %} 9386 ins_pipe( pipe_slow ); 9387 %} 9388 9389 instruct vsrl2I_reg(vecD dst, vecD src, vecS shift) %{ 9390 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9391 match(Set dst (URShiftVI src shift)); 9392 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} 9393 ins_encode %{ 9394 int vector_len = 0; 9395 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9396 %} 9397 ins_pipe( pipe_slow ); 9398 %} 9399 9400 instruct vsrl2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 9401 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9402 match(Set dst (URShiftVI src shift)); 9403 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} 9404 ins_encode %{ 9405 int vector_len = 0; 9406 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9407 %} 9408 ins_pipe( pipe_slow ); 9409 %} 9410 9411 instruct vsrl4I(vecX dst, vecS shift) %{ 9412 predicate(n->as_Vector()->length() == 4); 9413 match(Set dst (URShiftVI dst shift)); 9414 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} 9415 ins_encode %{ 9416 __ psrld($dst$$XMMRegister, $shift$$XMMRegister); 9417 %} 9418 ins_pipe( pipe_slow ); 9419 %} 9420 9421 instruct vsrl4I_imm(vecX dst, immI8 shift) %{ 9422 predicate(n->as_Vector()->length() == 4); 9423 match(Set dst (URShiftVI dst shift)); 9424 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} 9425 ins_encode %{ 9426 __ psrld($dst$$XMMRegister, (int)$shift$$constant); 9427 %} 9428 ins_pipe( pipe_slow ); 9429 %} 9430 9431 instruct vsrl4I_reg(vecX dst, vecX src, vecS shift) %{ 9432 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9433 match(Set dst (URShiftVI src shift)); 9434 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} 9435 ins_encode %{ 9436 int vector_len = 0; 9437 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9438 %} 9439 ins_pipe( pipe_slow ); 9440 %} 9441 9442 instruct vsrl4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 9443 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9444 match(Set dst (URShiftVI src shift)); 9445 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} 9446 ins_encode %{ 9447 int vector_len = 0; 9448 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9449 %} 9450 ins_pipe( pipe_slow ); 9451 %} 9452 9453 instruct vsrl8I_reg(vecY dst, vecY src, vecS shift) %{ 9454 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 9455 match(Set dst (URShiftVI src shift)); 9456 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} 9457 ins_encode %{ 9458 int vector_len = 1; 9459 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9460 %} 9461 ins_pipe( pipe_slow ); 9462 %} 9463 9464 instruct vsrl8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 9465 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 9466 match(Set dst (URShiftVI src shift)); 9467 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} 9468 ins_encode %{ 9469 int vector_len = 1; 9470 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9471 %} 9472 ins_pipe( pipe_slow ); 9473 %} 9474 9475 instruct vsrl16I_reg(vecZ dst, vecZ src, vecS shift) %{ 9476 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 9477 match(Set dst (URShiftVI src shift)); 9478 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed16I" %} 9479 ins_encode %{ 9480 int vector_len = 2; 9481 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9482 %} 9483 ins_pipe( pipe_slow ); 9484 %} 9485 9486 instruct vsrl16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 9487 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 9488 match(Set dst (URShiftVI src shift)); 9489 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed16I" %} 9490 ins_encode %{ 9491 int vector_len = 2; 9492 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9493 %} 9494 ins_pipe( pipe_slow ); 9495 %} 9496 9497 // Longs vector logical right shift 9498 instruct vsrl2L(vecX dst, vecS shift) %{ 9499 predicate(n->as_Vector()->length() == 2); 9500 match(Set dst (URShiftVL dst shift)); 9501 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} 9502 ins_encode %{ 9503 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister); 9504 %} 9505 ins_pipe( pipe_slow ); 9506 %} 9507 9508 instruct vsrl2L_imm(vecX dst, immI8 shift) %{ 9509 predicate(n->as_Vector()->length() == 2); 9510 match(Set dst (URShiftVL dst shift)); 9511 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} 9512 ins_encode %{ 9513 __ psrlq($dst$$XMMRegister, (int)$shift$$constant); 9514 %} 9515 ins_pipe( pipe_slow ); 9516 %} 9517 9518 instruct vsrl2L_reg(vecX dst, vecX src, vecS shift) %{ 9519 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9520 match(Set dst (URShiftVL src shift)); 9521 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} 9522 ins_encode %{ 9523 int vector_len = 0; 9524 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9525 %} 9526 ins_pipe( pipe_slow ); 9527 %} 9528 9529 instruct vsrl2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ 9530 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9531 match(Set dst (URShiftVL src shift)); 9532 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} 9533 ins_encode %{ 9534 int vector_len = 0; 9535 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9536 %} 9537 ins_pipe( pipe_slow ); 9538 %} 9539 9540 instruct vsrl4L_reg(vecY dst, vecY src, vecS shift) %{ 9541 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 9542 match(Set dst (URShiftVL src shift)); 9543 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} 9544 ins_encode %{ 9545 int vector_len = 1; 9546 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9547 %} 9548 ins_pipe( pipe_slow ); 9549 %} 9550 9551 instruct vsrl4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ 9552 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 9553 match(Set dst (URShiftVL src shift)); 9554 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} 9555 ins_encode %{ 9556 int vector_len = 1; 9557 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9558 %} 9559 ins_pipe( pipe_slow ); 9560 %} 9561 9562 instruct vsrl8L_reg(vecZ dst, vecZ src, vecS shift) %{ 9563 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 9564 match(Set dst (URShiftVL src shift)); 9565 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed8L" %} 9566 ins_encode %{ 9567 int vector_len = 2; 9568 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9569 %} 9570 ins_pipe( pipe_slow ); 9571 %} 9572 9573 instruct vsrl8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 9574 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 9575 match(Set dst (URShiftVL src shift)); 9576 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed8L" %} 9577 ins_encode %{ 9578 int vector_len = 2; 9579 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9580 %} 9581 ins_pipe( pipe_slow ); 9582 %} 9583 9584 // ------------------- ArithmeticRightShift ----------------------------------- 9585 9586 // Shorts/Chars vector arithmetic right shift 9587 instruct vsra2S(vecS dst, vecS shift) %{ 9588 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 9589 match(Set dst (RShiftVS dst shift)); 9590 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} 9591 ins_encode %{ 9592 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 9593 %} 9594 ins_pipe( pipe_slow ); 9595 %} 9596 9597 instruct vsra2S_imm(vecS dst, immI8 shift) %{ 9598 predicate(n->as_Vector()->length() == 2); 9599 match(Set dst (RShiftVS dst shift)); 9600 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} 9601 ins_encode %{ 9602 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 9603 %} 9604 ins_pipe( pipe_slow ); 9605 %} 9606 9607 instruct vsra2S_reg_avx(vecS dst, vecS src, vecS shift) %{ 9608 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 9609 match(Set dst (RShiftVS src shift)); 9610 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 9611 ins_encode %{ 9612 int vector_len = 0; 9613 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9614 %} 9615 ins_pipe( pipe_slow ); 9616 %} 9617 9618 instruct vsra2S_reg_evex(vecS dst, vecS src, vecS shift) %{ 9619 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 9620 match(Set dst (RShiftVS src shift)); 9621 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 9622 ins_encode %{ 9623 int vector_len = 0; 9624 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9625 %} 9626 ins_pipe( pipe_slow ); 9627 %} 9628 9629 instruct vsra2S_reg_evex_special(vecS dst, vecS src, vecS shift) %{ 9630 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 9631 match(Set dst (RShiftVS dst shift)); 9632 effect(TEMP src); 9633 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 9634 ins_encode %{ 9635 int vector_len = 0; 9636 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9637 %} 9638 ins_pipe( pipe_slow ); 9639 %} 9640 9641 instruct vsra2S_reg_imm_avx(vecS dst, vecS src, immI8 shift) %{ 9642 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 9643 match(Set dst (RShiftVS src shift)); 9644 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 9645 ins_encode %{ 9646 int vector_len = 0; 9647 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9648 %} 9649 ins_pipe( pipe_slow ); 9650 %} 9651 9652 instruct vsra2S_reg_imm_evex(vecS dst, vecS src, immI8 shift) %{ 9653 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 9654 match(Set dst (RShiftVS src shift)); 9655 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 9656 ins_encode %{ 9657 int vector_len = 0; 9658 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9659 %} 9660 ins_pipe( pipe_slow ); 9661 %} 9662 9663 instruct vsra2S_reg_imm_evex_special(vecS dst, vecS src, immI8 shift) %{ 9664 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 9665 match(Set dst (RShiftVS dst shift)); 9666 effect(TEMP src); 9667 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 9668 ins_encode %{ 9669 int vector_len = 0; 9670 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9671 %} 9672 ins_pipe( pipe_slow ); 9673 %} 9674 9675 instruct vsra4S(vecD dst, vecS shift) %{ 9676 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 9677 match(Set dst (RShiftVS dst shift)); 9678 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} 9679 ins_encode %{ 9680 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 9681 %} 9682 ins_pipe( pipe_slow ); 9683 %} 9684 9685 instruct vsra4S_imm(vecD dst, immI8 shift) %{ 9686 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 9687 match(Set dst (RShiftVS dst shift)); 9688 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} 9689 ins_encode %{ 9690 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 9691 %} 9692 ins_pipe( pipe_slow ); 9693 %} 9694 9695 instruct vsra4S_reg_avx(vecD dst, vecD src, vecS shift) %{ 9696 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 9697 match(Set dst (RShiftVS src shift)); 9698 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 9699 ins_encode %{ 9700 int vector_len = 0; 9701 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9702 %} 9703 ins_pipe( pipe_slow ); 9704 %} 9705 9706 instruct vsra4S_reg_evex(vecD dst, vecD src, vecS shift) %{ 9707 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 9708 match(Set dst (RShiftVS src shift)); 9709 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 9710 ins_encode %{ 9711 int vector_len = 0; 9712 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9713 %} 9714 ins_pipe( pipe_slow ); 9715 %} 9716 9717 instruct vsra4S_reg_evex_special(vecD dst, vecD src, vecS shift) %{ 9718 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 9719 match(Set dst (RShiftVS dst shift)); 9720 effect(TEMP src); 9721 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 9722 ins_encode %{ 9723 int vector_len = 0; 9724 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9725 %} 9726 ins_pipe( pipe_slow ); 9727 %} 9728 9729 instruct vsra4S_reg_imm_avx(vecD dst, vecD src, immI8 shift) %{ 9730 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 9731 match(Set dst (RShiftVS src shift)); 9732 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 9733 ins_encode %{ 9734 int vector_len = 0; 9735 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9736 %} 9737 ins_pipe( pipe_slow ); 9738 %} 9739 9740 instruct vsra4S_reg_imm_evex(vecD dst, vecD src, immI8 shift) %{ 9741 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 9742 match(Set dst (RShiftVS src shift)); 9743 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 9744 ins_encode %{ 9745 int vector_len = 0; 9746 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9747 %} 9748 ins_pipe( pipe_slow ); 9749 %} 9750 9751 instruct vsra4S_reg_imm_evex_special(vecD dst, vecD src, immI8 shift) %{ 9752 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 9753 match(Set dst (RShiftVS dst shift)); 9754 effect(TEMP src); 9755 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 9756 ins_encode %{ 9757 int vector_len = 0; 9758 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9759 %} 9760 ins_pipe( pipe_slow ); 9761 %} 9762 9763 instruct vsra8S(vecX dst, vecS shift) %{ 9764 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 9765 match(Set dst (RShiftVS dst shift)); 9766 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} 9767 ins_encode %{ 9768 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 9769 %} 9770 ins_pipe( pipe_slow ); 9771 %} 9772 9773 instruct vsra8S_imm(vecX dst, immI8 shift) %{ 9774 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 9775 match(Set dst (RShiftVS dst shift)); 9776 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} 9777 ins_encode %{ 9778 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 9779 %} 9780 ins_pipe( pipe_slow ); 9781 %} 9782 9783 instruct vsra8S_reg_avx(vecX dst, vecX src, vecS shift) %{ 9784 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 9785 match(Set dst (RShiftVS src shift)); 9786 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 9787 ins_encode %{ 9788 int vector_len = 0; 9789 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9790 %} 9791 ins_pipe( pipe_slow ); 9792 %} 9793 9794 instruct vsra8S_reg_evex(vecX dst, vecX src, vecS shift) %{ 9795 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 9796 match(Set dst (RShiftVS src shift)); 9797 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 9798 ins_encode %{ 9799 int vector_len = 0; 9800 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9801 %} 9802 ins_pipe( pipe_slow ); 9803 %} 9804 9805 instruct vsra8S_reg_evex_special(vecX dst, vecX src, vecS shift) %{ 9806 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 9807 match(Set dst (RShiftVS dst shift)); 9808 effect(TEMP src); 9809 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 9810 ins_encode %{ 9811 int vector_len = 0; 9812 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9813 %} 9814 ins_pipe( pipe_slow ); 9815 %} 9816 9817 instruct vsra8S_reg_imm_avx(vecX dst, vecX src, immI8 shift) %{ 9818 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 9819 match(Set dst (RShiftVS src shift)); 9820 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 9821 ins_encode %{ 9822 int vector_len = 0; 9823 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9824 %} 9825 ins_pipe( pipe_slow ); 9826 %} 9827 9828 instruct vsra8S_reg_imm_evex(vecX dst, vecX src, immI8 shift) %{ 9829 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 9830 match(Set dst (RShiftVS src shift)); 9831 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 9832 ins_encode %{ 9833 int vector_len = 0; 9834 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9835 %} 9836 ins_pipe( pipe_slow ); 9837 %} 9838 9839 instruct vsra8S_reg_imm_evex_special(vecX dst, vecX src, immI8 shift) %{ 9840 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 9841 match(Set dst (RShiftVS dst shift)); 9842 effect(TEMP src); 9843 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 9844 ins_encode %{ 9845 int vector_len = 0; 9846 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9847 %} 9848 ins_pipe( pipe_slow ); 9849 %} 9850 9851 instruct vsra16S_reg_avx(vecY dst, vecY src, vecS shift) %{ 9852 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 9853 match(Set dst (RShiftVS src shift)); 9854 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 9855 ins_encode %{ 9856 int vector_len = 1; 9857 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9858 %} 9859 ins_pipe( pipe_slow ); 9860 %} 9861 9862 instruct vsra16S_reg_evex(vecY dst, vecY src, vecS shift) %{ 9863 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 9864 match(Set dst (RShiftVS src shift)); 9865 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 9866 ins_encode %{ 9867 int vector_len = 1; 9868 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9869 %} 9870 ins_pipe( pipe_slow ); 9871 %} 9872 9873 instruct vsra16S_reg_evex_special(vecY dst, vecY src, vecS shift) %{ 9874 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 9875 match(Set dst (RShiftVS dst shift)); 9876 effect(TEMP src); 9877 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 9878 ins_encode %{ 9879 int vector_len = 1; 9880 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9881 %} 9882 ins_pipe( pipe_slow ); 9883 %} 9884 9885 instruct vsra16S_reg_imm_avx(vecY dst, vecY src, immI8 shift) %{ 9886 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); 9887 match(Set dst (RShiftVS src shift)); 9888 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 9889 ins_encode %{ 9890 int vector_len = 1; 9891 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9892 %} 9893 ins_pipe( pipe_slow ); 9894 %} 9895 9896 instruct vsra16S_reg_imm_evex(vecY dst, vecY src, immI8 shift) %{ 9897 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 9898 match(Set dst (RShiftVS src shift)); 9899 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 9900 ins_encode %{ 9901 int vector_len = 1; 9902 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9903 %} 9904 ins_pipe( pipe_slow ); 9905 %} 9906 9907 instruct vsra16S_reg_imm_evex_special(vecY dst, vecY src, immI8 shift) %{ 9908 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 9909 match(Set dst (RShiftVS dst shift)); 9910 effect(TEMP src); 9911 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 9912 ins_encode %{ 9913 int vector_len = 1; 9914 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9915 %} 9916 ins_pipe( pipe_slow ); 9917 %} 9918 9919 instruct vsra32S_reg(vecZ dst, vecZ src, vecS shift) %{ 9920 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 9921 match(Set dst (RShiftVS src shift)); 9922 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %} 9923 ins_encode %{ 9924 int vector_len = 2; 9925 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9926 %} 9927 ins_pipe( pipe_slow ); 9928 %} 9929 9930 instruct vsra32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 9931 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 9932 match(Set dst (RShiftVS src shift)); 9933 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %} 9934 ins_encode %{ 9935 int vector_len = 2; 9936 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9937 %} 9938 ins_pipe( pipe_slow ); 9939 %} 9940 9941 // Integers vector arithmetic right shift 9942 instruct vsra2I(vecD dst, vecS shift) %{ 9943 predicate(n->as_Vector()->length() == 2); 9944 match(Set dst (RShiftVI dst shift)); 9945 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} 9946 ins_encode %{ 9947 __ psrad($dst$$XMMRegister, $shift$$XMMRegister); 9948 %} 9949 ins_pipe( pipe_slow ); 9950 %} 9951 9952 instruct vsra2I_imm(vecD dst, immI8 shift) %{ 9953 predicate(n->as_Vector()->length() == 2); 9954 match(Set dst (RShiftVI dst shift)); 9955 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} 9956 ins_encode %{ 9957 __ psrad($dst$$XMMRegister, (int)$shift$$constant); 9958 %} 9959 ins_pipe( pipe_slow ); 9960 %} 9961 9962 instruct vsra2I_reg(vecD dst, vecD src, vecS shift) %{ 9963 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9964 match(Set dst (RShiftVI src shift)); 9965 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} 9966 ins_encode %{ 9967 int vector_len = 0; 9968 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9969 %} 9970 ins_pipe( pipe_slow ); 9971 %} 9972 9973 instruct vsra2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 9974 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9975 match(Set dst (RShiftVI src shift)); 9976 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} 9977 ins_encode %{ 9978 int vector_len = 0; 9979 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9980 %} 9981 ins_pipe( pipe_slow ); 9982 %} 9983 9984 instruct vsra4I(vecX dst, vecS shift) %{ 9985 predicate(n->as_Vector()->length() == 4); 9986 match(Set dst (RShiftVI dst shift)); 9987 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} 9988 ins_encode %{ 9989 __ psrad($dst$$XMMRegister, $shift$$XMMRegister); 9990 %} 9991 ins_pipe( pipe_slow ); 9992 %} 9993 9994 instruct vsra4I_imm(vecX dst, immI8 shift) %{ 9995 predicate(n->as_Vector()->length() == 4); 9996 match(Set dst (RShiftVI dst shift)); 9997 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} 9998 ins_encode %{ 9999 __ psrad($dst$$XMMRegister, (int)$shift$$constant); 10000 %} 10001 ins_pipe( pipe_slow ); 10002 %} 10003 10004 instruct vsra4I_reg(vecX dst, vecX src, vecS shift) %{ 10005 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 10006 match(Set dst (RShiftVI src shift)); 10007 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} 10008 ins_encode %{ 10009 int vector_len = 0; 10010 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10011 %} 10012 ins_pipe( pipe_slow ); 10013 %} 10014 10015 instruct vsra4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 10016 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 10017 match(Set dst (RShiftVI src shift)); 10018 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} 10019 ins_encode %{ 10020 int vector_len = 0; 10021 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10022 %} 10023 ins_pipe( pipe_slow ); 10024 %} 10025 10026 instruct vsra8I_reg(vecY dst, vecY src, vecS shift) %{ 10027 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 10028 match(Set dst (RShiftVI src shift)); 10029 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} 10030 ins_encode %{ 10031 int vector_len = 1; 10032 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10033 %} 10034 ins_pipe( pipe_slow ); 10035 %} 10036 10037 instruct vsra8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 10038 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 10039 match(Set dst (RShiftVI src shift)); 10040 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} 10041 ins_encode %{ 10042 int vector_len = 1; 10043 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10044 %} 10045 ins_pipe( pipe_slow ); 10046 %} 10047 10048 instruct vsra16I_reg(vecZ dst, vecZ src, vecS shift) %{ 10049 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 10050 match(Set dst (RShiftVI src shift)); 10051 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed16I" %} 10052 ins_encode %{ 10053 int vector_len = 2; 10054 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10055 %} 10056 ins_pipe( pipe_slow ); 10057 %} 10058 10059 instruct vsra16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 10060 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 10061 match(Set dst (RShiftVI src shift)); 10062 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed16I" %} 10063 ins_encode %{ 10064 int vector_len = 2; 10065 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10066 %} 10067 ins_pipe( pipe_slow ); 10068 %} 10069 10070 // There are no longs vector arithmetic right shift instructions. 10071 10072 10073 // --------------------------------- AND -------------------------------------- 10074 10075 instruct vand4B(vecS dst, vecS src) %{ 10076 predicate(n->as_Vector()->length_in_bytes() == 4); 10077 match(Set dst (AndV dst src)); 10078 format %{ "pand $dst,$src\t! and vectors (4 bytes)" %} 10079 ins_encode %{ 10080 __ pand($dst$$XMMRegister, $src$$XMMRegister); 10081 %} 10082 ins_pipe( pipe_slow ); 10083 %} 10084 10085 instruct vand4B_reg(vecS dst, vecS src1, vecS src2) %{ 10086 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 10087 match(Set dst (AndV src1 src2)); 10088 format %{ "vpand $dst,$src1,$src2\t! and vectors (4 bytes)" %} 10089 ins_encode %{ 10090 int vector_len = 0; 10091 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10092 %} 10093 ins_pipe( pipe_slow ); 10094 %} 10095 10096 instruct vand4B_mem(vecS dst, vecS src, memory mem) %{ 10097 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 10098 match(Set dst (AndV src (LoadVector mem))); 10099 format %{ "vpand $dst,$src,$mem\t! and vectors (4 bytes)" %} 10100 ins_encode %{ 10101 int vector_len = 0; 10102 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10103 %} 10104 ins_pipe( pipe_slow ); 10105 %} 10106 10107 instruct vand8B(vecD dst, vecD src) %{ 10108 predicate(n->as_Vector()->length_in_bytes() == 8); 10109 match(Set dst (AndV dst src)); 10110 format %{ "pand $dst,$src\t! and vectors (8 bytes)" %} 10111 ins_encode %{ 10112 __ pand($dst$$XMMRegister, $src$$XMMRegister); 10113 %} 10114 ins_pipe( pipe_slow ); 10115 %} 10116 10117 instruct vand8B_reg(vecD dst, vecD src1, vecD src2) %{ 10118 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 10119 match(Set dst (AndV src1 src2)); 10120 format %{ "vpand $dst,$src1,$src2\t! and vectors (8 bytes)" %} 10121 ins_encode %{ 10122 int vector_len = 0; 10123 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10124 %} 10125 ins_pipe( pipe_slow ); 10126 %} 10127 10128 instruct vand8B_mem(vecD dst, vecD src, memory mem) %{ 10129 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 10130 match(Set dst (AndV src (LoadVector mem))); 10131 format %{ "vpand $dst,$src,$mem\t! and vectors (8 bytes)" %} 10132 ins_encode %{ 10133 int vector_len = 0; 10134 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10135 %} 10136 ins_pipe( pipe_slow ); 10137 %} 10138 10139 instruct vand16B(vecX dst, vecX src) %{ 10140 predicate(n->as_Vector()->length_in_bytes() == 16); 10141 match(Set dst (AndV dst src)); 10142 format %{ "pand $dst,$src\t! and vectors (16 bytes)" %} 10143 ins_encode %{ 10144 __ pand($dst$$XMMRegister, $src$$XMMRegister); 10145 %} 10146 ins_pipe( pipe_slow ); 10147 %} 10148 10149 instruct vand16B_reg(vecX dst, vecX src1, vecX src2) %{ 10150 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 10151 match(Set dst (AndV src1 src2)); 10152 format %{ "vpand $dst,$src1,$src2\t! and vectors (16 bytes)" %} 10153 ins_encode %{ 10154 int vector_len = 0; 10155 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10156 %} 10157 ins_pipe( pipe_slow ); 10158 %} 10159 10160 instruct vand16B_mem(vecX dst, vecX src, memory mem) %{ 10161 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 10162 match(Set dst (AndV src (LoadVector mem))); 10163 format %{ "vpand $dst,$src,$mem\t! and vectors (16 bytes)" %} 10164 ins_encode %{ 10165 int vector_len = 0; 10166 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10167 %} 10168 ins_pipe( pipe_slow ); 10169 %} 10170 10171 instruct vand32B_reg(vecY dst, vecY src1, vecY src2) %{ 10172 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 10173 match(Set dst (AndV src1 src2)); 10174 format %{ "vpand $dst,$src1,$src2\t! and vectors (32 bytes)" %} 10175 ins_encode %{ 10176 int vector_len = 1; 10177 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10178 %} 10179 ins_pipe( pipe_slow ); 10180 %} 10181 10182 instruct vand32B_mem(vecY dst, vecY src, memory mem) %{ 10183 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 10184 match(Set dst (AndV src (LoadVector mem))); 10185 format %{ "vpand $dst,$src,$mem\t! and vectors (32 bytes)" %} 10186 ins_encode %{ 10187 int vector_len = 1; 10188 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10189 %} 10190 ins_pipe( pipe_slow ); 10191 %} 10192 10193 instruct vand64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 10194 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 10195 match(Set dst (AndV src1 src2)); 10196 format %{ "vpand $dst,$src1,$src2\t! and vectors (64 bytes)" %} 10197 ins_encode %{ 10198 int vector_len = 2; 10199 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10200 %} 10201 ins_pipe( pipe_slow ); 10202 %} 10203 10204 instruct vand64B_mem(vecZ dst, vecZ src, memory mem) %{ 10205 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 10206 match(Set dst (AndV src (LoadVector mem))); 10207 format %{ "vpand $dst,$src,$mem\t! and vectors (64 bytes)" %} 10208 ins_encode %{ 10209 int vector_len = 2; 10210 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10211 %} 10212 ins_pipe( pipe_slow ); 10213 %} 10214 10215 // --------------------------------- OR --------------------------------------- 10216 10217 instruct vor4B(vecS dst, vecS src) %{ 10218 predicate(n->as_Vector()->length_in_bytes() == 4); 10219 match(Set dst (OrV dst src)); 10220 format %{ "por $dst,$src\t! or vectors (4 bytes)" %} 10221 ins_encode %{ 10222 __ por($dst$$XMMRegister, $src$$XMMRegister); 10223 %} 10224 ins_pipe( pipe_slow ); 10225 %} 10226 10227 instruct vor4B_reg(vecS dst, vecS src1, vecS src2) %{ 10228 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 10229 match(Set dst (OrV src1 src2)); 10230 format %{ "vpor $dst,$src1,$src2\t! or vectors (4 bytes)" %} 10231 ins_encode %{ 10232 int vector_len = 0; 10233 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10234 %} 10235 ins_pipe( pipe_slow ); 10236 %} 10237 10238 instruct vor4B_mem(vecS dst, vecS src, memory mem) %{ 10239 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 10240 match(Set dst (OrV src (LoadVector mem))); 10241 format %{ "vpor $dst,$src,$mem\t! or vectors (4 bytes)" %} 10242 ins_encode %{ 10243 int vector_len = 0; 10244 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10245 %} 10246 ins_pipe( pipe_slow ); 10247 %} 10248 10249 instruct vor8B(vecD dst, vecD src) %{ 10250 predicate(n->as_Vector()->length_in_bytes() == 8); 10251 match(Set dst (OrV dst src)); 10252 format %{ "por $dst,$src\t! or vectors (8 bytes)" %} 10253 ins_encode %{ 10254 __ por($dst$$XMMRegister, $src$$XMMRegister); 10255 %} 10256 ins_pipe( pipe_slow ); 10257 %} 10258 10259 instruct vor8B_reg(vecD dst, vecD src1, vecD src2) %{ 10260 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 10261 match(Set dst (OrV src1 src2)); 10262 format %{ "vpor $dst,$src1,$src2\t! or vectors (8 bytes)" %} 10263 ins_encode %{ 10264 int vector_len = 0; 10265 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10266 %} 10267 ins_pipe( pipe_slow ); 10268 %} 10269 10270 instruct vor8B_mem(vecD dst, vecD src, memory mem) %{ 10271 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 10272 match(Set dst (OrV src (LoadVector mem))); 10273 format %{ "vpor $dst,$src,$mem\t! or vectors (8 bytes)" %} 10274 ins_encode %{ 10275 int vector_len = 0; 10276 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10277 %} 10278 ins_pipe( pipe_slow ); 10279 %} 10280 10281 instruct vor16B(vecX dst, vecX src) %{ 10282 predicate(n->as_Vector()->length_in_bytes() == 16); 10283 match(Set dst (OrV dst src)); 10284 format %{ "por $dst,$src\t! or vectors (16 bytes)" %} 10285 ins_encode %{ 10286 __ por($dst$$XMMRegister, $src$$XMMRegister); 10287 %} 10288 ins_pipe( pipe_slow ); 10289 %} 10290 10291 instruct vor16B_reg(vecX dst, vecX src1, vecX src2) %{ 10292 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 10293 match(Set dst (OrV src1 src2)); 10294 format %{ "vpor $dst,$src1,$src2\t! or vectors (16 bytes)" %} 10295 ins_encode %{ 10296 int vector_len = 0; 10297 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10298 %} 10299 ins_pipe( pipe_slow ); 10300 %} 10301 10302 instruct vor16B_mem(vecX dst, vecX src, memory mem) %{ 10303 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 10304 match(Set dst (OrV src (LoadVector mem))); 10305 format %{ "vpor $dst,$src,$mem\t! or vectors (16 bytes)" %} 10306 ins_encode %{ 10307 int vector_len = 0; 10308 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10309 %} 10310 ins_pipe( pipe_slow ); 10311 %} 10312 10313 instruct vor32B_reg(vecY dst, vecY src1, vecY src2) %{ 10314 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 10315 match(Set dst (OrV src1 src2)); 10316 format %{ "vpor $dst,$src1,$src2\t! or vectors (32 bytes)" %} 10317 ins_encode %{ 10318 int vector_len = 1; 10319 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10320 %} 10321 ins_pipe( pipe_slow ); 10322 %} 10323 10324 instruct vor32B_mem(vecY dst, vecY src, memory mem) %{ 10325 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 10326 match(Set dst (OrV src (LoadVector mem))); 10327 format %{ "vpor $dst,$src,$mem\t! or vectors (32 bytes)" %} 10328 ins_encode %{ 10329 int vector_len = 1; 10330 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10331 %} 10332 ins_pipe( pipe_slow ); 10333 %} 10334 10335 instruct vor64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 10336 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 10337 match(Set dst (OrV src1 src2)); 10338 format %{ "vpor $dst,$src1,$src2\t! or vectors (64 bytes)" %} 10339 ins_encode %{ 10340 int vector_len = 2; 10341 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10342 %} 10343 ins_pipe( pipe_slow ); 10344 %} 10345 10346 instruct vor64B_mem(vecZ dst, vecZ src, memory mem) %{ 10347 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 10348 match(Set dst (OrV src (LoadVector mem))); 10349 format %{ "vpor $dst,$src,$mem\t! or vectors (64 bytes)" %} 10350 ins_encode %{ 10351 int vector_len = 2; 10352 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10353 %} 10354 ins_pipe( pipe_slow ); 10355 %} 10356 10357 // --------------------------------- XOR -------------------------------------- 10358 10359 instruct vxor4B(vecS dst, vecS src) %{ 10360 predicate(n->as_Vector()->length_in_bytes() == 4); 10361 match(Set dst (XorV dst src)); 10362 format %{ "pxor $dst,$src\t! xor vectors (4 bytes)" %} 10363 ins_encode %{ 10364 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 10365 %} 10366 ins_pipe( pipe_slow ); 10367 %} 10368 10369 instruct vxor4B_reg(vecS dst, vecS src1, vecS src2) %{ 10370 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 10371 match(Set dst (XorV src1 src2)); 10372 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (4 bytes)" %} 10373 ins_encode %{ 10374 int vector_len = 0; 10375 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10376 %} 10377 ins_pipe( pipe_slow ); 10378 %} 10379 10380 instruct vxor4B_mem(vecS dst, vecS src, memory mem) %{ 10381 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 10382 match(Set dst (XorV src (LoadVector mem))); 10383 format %{ "vpxor $dst,$src,$mem\t! xor vectors (4 bytes)" %} 10384 ins_encode %{ 10385 int vector_len = 0; 10386 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10387 %} 10388 ins_pipe( pipe_slow ); 10389 %} 10390 10391 instruct vxor8B(vecD dst, vecD src) %{ 10392 predicate(n->as_Vector()->length_in_bytes() == 8); 10393 match(Set dst (XorV dst src)); 10394 format %{ "pxor $dst,$src\t! xor vectors (8 bytes)" %} 10395 ins_encode %{ 10396 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 10397 %} 10398 ins_pipe( pipe_slow ); 10399 %} 10400 10401 instruct vxor8B_reg(vecD dst, vecD src1, vecD src2) %{ 10402 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 10403 match(Set dst (XorV src1 src2)); 10404 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (8 bytes)" %} 10405 ins_encode %{ 10406 int vector_len = 0; 10407 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10408 %} 10409 ins_pipe( pipe_slow ); 10410 %} 10411 10412 instruct vxor8B_mem(vecD dst, vecD src, memory mem) %{ 10413 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 10414 match(Set dst (XorV src (LoadVector mem))); 10415 format %{ "vpxor $dst,$src,$mem\t! xor vectors (8 bytes)" %} 10416 ins_encode %{ 10417 int vector_len = 0; 10418 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10419 %} 10420 ins_pipe( pipe_slow ); 10421 %} 10422 10423 instruct vxor16B(vecX dst, vecX src) %{ 10424 predicate(n->as_Vector()->length_in_bytes() == 16); 10425 match(Set dst (XorV dst src)); 10426 format %{ "pxor $dst,$src\t! xor vectors (16 bytes)" %} 10427 ins_encode %{ 10428 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 10429 %} 10430 ins_pipe( pipe_slow ); 10431 %} 10432 10433 instruct vxor16B_reg(vecX dst, vecX src1, vecX src2) %{ 10434 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 10435 match(Set dst (XorV src1 src2)); 10436 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (16 bytes)" %} 10437 ins_encode %{ 10438 int vector_len = 0; 10439 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10440 %} 10441 ins_pipe( pipe_slow ); 10442 %} 10443 10444 instruct vxor16B_mem(vecX dst, vecX src, memory mem) %{ 10445 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 10446 match(Set dst (XorV src (LoadVector mem))); 10447 format %{ "vpxor $dst,$src,$mem\t! xor vectors (16 bytes)" %} 10448 ins_encode %{ 10449 int vector_len = 0; 10450 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10451 %} 10452 ins_pipe( pipe_slow ); 10453 %} 10454 10455 instruct vxor32B_reg(vecY dst, vecY src1, vecY src2) %{ 10456 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 10457 match(Set dst (XorV src1 src2)); 10458 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (32 bytes)" %} 10459 ins_encode %{ 10460 int vector_len = 1; 10461 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10462 %} 10463 ins_pipe( pipe_slow ); 10464 %} 10465 10466 instruct vxor32B_mem(vecY dst, vecY src, memory mem) %{ 10467 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 10468 match(Set dst (XorV src (LoadVector mem))); 10469 format %{ "vpxor $dst,$src,$mem\t! xor vectors (32 bytes)" %} 10470 ins_encode %{ 10471 int vector_len = 1; 10472 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10473 %} 10474 ins_pipe( pipe_slow ); 10475 %} 10476 10477 instruct vxor64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 10478 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 10479 match(Set dst (XorV src1 src2)); 10480 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (64 bytes)" %} 10481 ins_encode %{ 10482 int vector_len = 2; 10483 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10484 %} 10485 ins_pipe( pipe_slow ); 10486 %} 10487 10488 instruct vxor64B_mem(vecZ dst, vecZ src, memory mem) %{ 10489 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 10490 match(Set dst (XorV src (LoadVector mem))); 10491 format %{ "vpxor $dst,$src,$mem\t! xor vectors (64 bytes)" %} 10492 ins_encode %{ 10493 int vector_len = 2; 10494 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10495 %} 10496 ins_pipe( pipe_slow ); 10497 %} 10498