1 // 2 // Copyright (c) 2011, 2016, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Common Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // archtecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // XMM registers. 512-bit registers or 8 words each, labeled (a)-p. 63 // Word a in each register holds a Float, words ab hold a Double. 64 // The whole registers are used in SSE4.2 version intrinsics, 65 // array copy stubs and superword operations (see UseSSE42Intrinsics, 66 // UseXMMForArrayCopy and UseSuperword flags). 67 // For pre EVEX enabled architectures: 68 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX) 69 // For EVEX enabled architectures: 70 // XMM8-XMM31 must be encoded with REX (EVEX for UseAVX). 71 // 72 // Linux ABI: No register preserved across function calls 73 // XMM0-XMM7 might hold parameters 74 // Windows ABI: XMM6-XMM31 preserved across function calls 75 // XMM0-XMM3 might hold parameters 76 77 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()); 78 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1)); 79 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2)); 80 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3)); 81 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4)); 82 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5)); 83 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6)); 84 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7)); 85 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8)); 86 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9)); 87 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10)); 88 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11)); 89 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12)); 90 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13)); 91 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14)); 92 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15)); 93 94 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()); 95 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1)); 96 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2)); 97 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3)); 98 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4)); 99 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5)); 100 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6)); 101 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7)); 102 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8)); 103 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9)); 104 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10)); 105 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11)); 106 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12)); 107 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13)); 108 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14)); 109 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15)); 110 111 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()); 112 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1)); 113 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2)); 114 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3)); 115 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4)); 116 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5)); 117 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6)); 118 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7)); 119 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8)); 120 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9)); 121 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10)); 122 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11)); 123 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12)); 124 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13)); 125 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14)); 126 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15)); 127 128 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()); 129 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1)); 130 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2)); 131 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3)); 132 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4)); 133 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5)); 134 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6)); 135 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7)); 136 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8)); 137 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9)); 138 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10)); 139 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11)); 140 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12)); 141 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13)); 142 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14)); 143 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15)); 144 145 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()); 146 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1)); 147 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2)); 148 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3)); 149 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4)); 150 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5)); 151 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6)); 152 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7)); 153 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8)); 154 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9)); 155 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10)); 156 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11)); 157 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12)); 158 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13)); 159 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14)); 160 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15)); 161 162 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()); 163 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1)); 164 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2)); 165 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3)); 166 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4)); 167 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5)); 168 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6)); 169 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7)); 170 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8)); 171 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9)); 172 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10)); 173 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11)); 174 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12)); 175 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13)); 176 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14)); 177 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15)); 178 179 #ifdef _WIN64 180 181 reg_def XMM6 ( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()); 182 reg_def XMM6b( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(1)); 183 reg_def XMM6c( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(2)); 184 reg_def XMM6d( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(3)); 185 reg_def XMM6e( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(4)); 186 reg_def XMM6f( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(5)); 187 reg_def XMM6g( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(6)); 188 reg_def XMM6h( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(7)); 189 reg_def XMM6i( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(8)); 190 reg_def XMM6j( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(9)); 191 reg_def XMM6k( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(10)); 192 reg_def XMM6l( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(11)); 193 reg_def XMM6m( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(12)); 194 reg_def XMM6n( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(13)); 195 reg_def XMM6o( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(14)); 196 reg_def XMM6p( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(15)); 197 198 reg_def XMM7 ( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()); 199 reg_def XMM7b( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(1)); 200 reg_def XMM7c( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(2)); 201 reg_def XMM7d( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(3)); 202 reg_def XMM7e( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(4)); 203 reg_def XMM7f( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(5)); 204 reg_def XMM7g( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(6)); 205 reg_def XMM7h( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(7)); 206 reg_def XMM7i( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(8)); 207 reg_def XMM7j( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(9)); 208 reg_def XMM7k( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(10)); 209 reg_def XMM7l( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(11)); 210 reg_def XMM7m( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(12)); 211 reg_def XMM7n( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(13)); 212 reg_def XMM7o( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(14)); 213 reg_def XMM7p( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(15)); 214 215 reg_def XMM8 ( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()); 216 reg_def XMM8b( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(1)); 217 reg_def XMM8c( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(2)); 218 reg_def XMM8d( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(3)); 219 reg_def XMM8e( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(4)); 220 reg_def XMM8f( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(5)); 221 reg_def XMM8g( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(6)); 222 reg_def XMM8h( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(7)); 223 reg_def XMM8i( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(8)); 224 reg_def XMM8j( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(9)); 225 reg_def XMM8k( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(10)); 226 reg_def XMM8l( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(11)); 227 reg_def XMM8m( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(12)); 228 reg_def XMM8n( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(13)); 229 reg_def XMM8o( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(14)); 230 reg_def XMM8p( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(15)); 231 232 reg_def XMM9 ( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()); 233 reg_def XMM9b( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(1)); 234 reg_def XMM9c( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(2)); 235 reg_def XMM9d( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(3)); 236 reg_def XMM9e( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(4)); 237 reg_def XMM9f( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(5)); 238 reg_def XMM9g( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(6)); 239 reg_def XMM9h( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(7)); 240 reg_def XMM9i( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(8)); 241 reg_def XMM9j( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(9)); 242 reg_def XMM9k( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(10)); 243 reg_def XMM9l( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(11)); 244 reg_def XMM9m( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(12)); 245 reg_def XMM9n( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(13)); 246 reg_def XMM9o( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(14)); 247 reg_def XMM9p( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(15)); 248 249 reg_def XMM10 ( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()); 250 reg_def XMM10b( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(1)); 251 reg_def XMM10c( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(2)); 252 reg_def XMM10d( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(3)); 253 reg_def XMM10e( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(4)); 254 reg_def XMM10f( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(5)); 255 reg_def XMM10g( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(6)); 256 reg_def XMM10h( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(7)); 257 reg_def XMM10i( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(8)); 258 reg_def XMM10j( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(9)); 259 reg_def XMM10k( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(10)); 260 reg_def XMM10l( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(11)); 261 reg_def XMM10m( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(12)); 262 reg_def XMM10n( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(13)); 263 reg_def XMM10o( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(14)); 264 reg_def XMM10p( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(15)); 265 266 reg_def XMM11 ( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()); 267 reg_def XMM11b( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(1)); 268 reg_def XMM11c( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(2)); 269 reg_def XMM11d( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(3)); 270 reg_def XMM11e( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(4)); 271 reg_def XMM11f( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(5)); 272 reg_def XMM11g( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(6)); 273 reg_def XMM11h( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(7)); 274 reg_def XMM11i( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(8)); 275 reg_def XMM11j( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(9)); 276 reg_def XMM11k( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(10)); 277 reg_def XMM11l( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(11)); 278 reg_def XMM11m( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(12)); 279 reg_def XMM11n( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(13)); 280 reg_def XMM11o( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(14)); 281 reg_def XMM11p( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(15)); 282 283 reg_def XMM12 ( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()); 284 reg_def XMM12b( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(1)); 285 reg_def XMM12c( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(2)); 286 reg_def XMM12d( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(3)); 287 reg_def XMM12e( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(4)); 288 reg_def XMM12f( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(5)); 289 reg_def XMM12g( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(6)); 290 reg_def XMM12h( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(7)); 291 reg_def XMM12i( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(8)); 292 reg_def XMM12j( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(9)); 293 reg_def XMM12k( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(10)); 294 reg_def XMM12l( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(11)); 295 reg_def XMM12m( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(12)); 296 reg_def XMM12n( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(13)); 297 reg_def XMM12o( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(14)); 298 reg_def XMM12p( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(15)); 299 300 reg_def XMM13 ( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()); 301 reg_def XMM13b( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(1)); 302 reg_def XMM13c( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(2)); 303 reg_def XMM13d( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(3)); 304 reg_def XMM13e( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(4)); 305 reg_def XMM13f( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(5)); 306 reg_def XMM13g( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(6)); 307 reg_def XMM13h( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(7)); 308 reg_def XMM13i( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(8)); 309 reg_def XMM13j( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(9)); 310 reg_def XMM13k( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(10)); 311 reg_def XMM13l( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(11)); 312 reg_def XMM13m( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(12)); 313 reg_def XMM13n( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(13)); 314 reg_def XMM13o( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(14)); 315 reg_def XMM13p( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(15)); 316 317 reg_def XMM14 ( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()); 318 reg_def XMM14b( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(1)); 319 reg_def XMM14c( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(2)); 320 reg_def XMM14d( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(3)); 321 reg_def XMM14e( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(4)); 322 reg_def XMM14f( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(5)); 323 reg_def XMM14g( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(6)); 324 reg_def XMM14h( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(7)); 325 reg_def XMM14i( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(8)); 326 reg_def XMM14j( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(9)); 327 reg_def XMM14k( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(10)); 328 reg_def XMM14l( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(11)); 329 reg_def XMM14m( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(12)); 330 reg_def XMM14n( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(13)); 331 reg_def XMM14o( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(14)); 332 reg_def XMM14p( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(15)); 333 334 reg_def XMM15 ( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()); 335 reg_def XMM15b( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(1)); 336 reg_def XMM15c( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(2)); 337 reg_def XMM15d( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(3)); 338 reg_def XMM15e( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(4)); 339 reg_def XMM15f( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(5)); 340 reg_def XMM15g( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(6)); 341 reg_def XMM15h( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(7)); 342 reg_def XMM15i( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(8)); 343 reg_def XMM15j( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(9)); 344 reg_def XMM15k( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(10)); 345 reg_def XMM15l( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(11)); 346 reg_def XMM15m( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(12)); 347 reg_def XMM15n( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(13)); 348 reg_def XMM15o( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(14)); 349 reg_def XMM15p( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(15)); 350 351 reg_def XMM16 ( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()); 352 reg_def XMM16b( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(1)); 353 reg_def XMM16c( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(2)); 354 reg_def XMM16d( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(3)); 355 reg_def XMM16e( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(4)); 356 reg_def XMM16f( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(5)); 357 reg_def XMM16g( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(6)); 358 reg_def XMM16h( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(7)); 359 reg_def XMM16i( SOC, SOE, Op_RegF, 16, xmm15->as_VMReg()->next(8)); 360 reg_def XMM16j( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(9)); 361 reg_def XMM16k( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(10)); 362 reg_def XMM16l( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(11)); 363 reg_def XMM16m( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(12)); 364 reg_def XMM16n( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(13)); 365 reg_def XMM16o( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(14)); 366 reg_def XMM16p( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(15)); 367 368 reg_def XMM17 ( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()); 369 reg_def XMM17b( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(1)); 370 reg_def XMM17c( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(2)); 371 reg_def XMM17d( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(3)); 372 reg_def XMM17e( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(4)); 373 reg_def XMM17f( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(5)); 374 reg_def XMM17g( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(6)); 375 reg_def XMM17h( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(7)); 376 reg_def XMM17i( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(8)); 377 reg_def XMM17j( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(9)); 378 reg_def XMM17k( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(10)); 379 reg_def XMM17l( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(11)); 380 reg_def XMM17m( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(12)); 381 reg_def XMM17n( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(13)); 382 reg_def XMM17o( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(14)); 383 reg_def XMM17p( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(15)); 384 385 reg_def XMM18 ( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()); 386 reg_def XMM18b( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(1)); 387 reg_def XMM18c( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(2)); 388 reg_def XMM18d( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(3)); 389 reg_def XMM18e( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(4)); 390 reg_def XMM18f( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(5)); 391 reg_def XMM18g( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(6)); 392 reg_def XMM18h( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(7)); 393 reg_def XMM18i( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(8)); 394 reg_def XMM18j( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(9)); 395 reg_def XMM18k( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(10)); 396 reg_def XMM18l( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(11)); 397 reg_def XMM18m( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(12)); 398 reg_def XMM18n( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(13)); 399 reg_def XMM18o( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(14)); 400 reg_def XMM18p( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(15)); 401 402 reg_def XMM19 ( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()); 403 reg_def XMM19b( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(1)); 404 reg_def XMM19c( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(2)); 405 reg_def XMM19d( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(3)); 406 reg_def XMM19e( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(4)); 407 reg_def XMM19f( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(5)); 408 reg_def XMM19g( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(6)); 409 reg_def XMM19h( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(7)); 410 reg_def XMM19i( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(8)); 411 reg_def XMM19j( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(9)); 412 reg_def XMM19k( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(10)); 413 reg_def XMM19l( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(11)); 414 reg_def XMM19m( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(12)); 415 reg_def XMM19n( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(13)); 416 reg_def XMM19o( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(14)); 417 reg_def XMM19p( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(15)); 418 419 reg_def XMM20 ( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()); 420 reg_def XMM20b( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(1)); 421 reg_def XMM20c( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(2)); 422 reg_def XMM20d( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(3)); 423 reg_def XMM20e( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(4)); 424 reg_def XMM20f( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(5)); 425 reg_def XMM20g( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(6)); 426 reg_def XMM20h( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(7)); 427 reg_def XMM20i( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(8)); 428 reg_def XMM20j( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(9)); 429 reg_def XMM20k( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(10)); 430 reg_def XMM20l( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(11)); 431 reg_def XMM20m( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(12)); 432 reg_def XMM20n( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(13)); 433 reg_def XMM20o( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(14)); 434 reg_def XMM20p( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(15)); 435 436 reg_def XMM21 ( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()); 437 reg_def XMM21b( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(1)); 438 reg_def XMM21c( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(2)); 439 reg_def XMM21d( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(3)); 440 reg_def XMM21e( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(4)); 441 reg_def XMM21f( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(5)); 442 reg_def XMM21g( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(6)); 443 reg_def XMM21h( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(7)); 444 reg_def XMM21i( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(8)); 445 reg_def XMM21j( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(9)); 446 reg_def XMM21k( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(10)); 447 reg_def XMM21l( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(11)); 448 reg_def XMM21m( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(12)); 449 reg_def XMM21n( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(13)); 450 reg_def XMM21o( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(14)); 451 reg_def XMM21p( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(15)); 452 453 reg_def XMM22 ( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()); 454 reg_def XMM22b( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(1)); 455 reg_def XMM22c( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(2)); 456 reg_def XMM22d( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(3)); 457 reg_def XMM22e( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(4)); 458 reg_def XMM22f( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(5)); 459 reg_def XMM22g( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(6)); 460 reg_def XMM22h( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(7)); 461 reg_def XMM22i( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(8)); 462 reg_def XMM22j( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(9)); 463 reg_def XMM22k( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(10)); 464 reg_def XMM22l( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(11)); 465 reg_def XMM22m( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(12)); 466 reg_def XMM22n( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(13)); 467 reg_def XMM22o( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(14)); 468 reg_def XMM22p( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(15)); 469 470 reg_def XMM23 ( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()); 471 reg_def XMM23b( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(1)); 472 reg_def XMM23c( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(2)); 473 reg_def XMM23d( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(3)); 474 reg_def XMM23e( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(4)); 475 reg_def XMM23f( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(5)); 476 reg_def XMM23g( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(6)); 477 reg_def XMM23h( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(7)); 478 reg_def XMM23i( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(8)); 479 reg_def XMM23j( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(9)); 480 reg_def XMM23k( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(10)); 481 reg_def XMM23l( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(11)); 482 reg_def XMM23m( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(12)); 483 reg_def XMM23n( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(13)); 484 reg_def XMM23o( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(14)); 485 reg_def XMM23p( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(15)); 486 487 reg_def XMM24 ( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()); 488 reg_def XMM24b( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(1)); 489 reg_def XMM24c( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(2)); 490 reg_def XMM24d( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(3)); 491 reg_def XMM24e( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(4)); 492 reg_def XMM24f( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(5)); 493 reg_def XMM24g( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(6)); 494 reg_def XMM24h( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(7)); 495 reg_def XMM24i( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(8)); 496 reg_def XMM24j( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(9)); 497 reg_def XMM24k( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(10)); 498 reg_def XMM24l( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(11)); 499 reg_def XMM24m( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(12)); 500 reg_def XMM24n( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(13)); 501 reg_def XMM24o( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(14)); 502 reg_def XMM24p( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(15)); 503 504 reg_def XMM25 ( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()); 505 reg_def XMM25b( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(1)); 506 reg_def XMM25c( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(2)); 507 reg_def XMM25d( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(3)); 508 reg_def XMM25e( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(4)); 509 reg_def XMM25f( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(5)); 510 reg_def XMM25g( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(6)); 511 reg_def XMM25h( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(7)); 512 reg_def XMM25i( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(8)); 513 reg_def XMM25j( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(9)); 514 reg_def XMM25k( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(10)); 515 reg_def XMM25l( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(11)); 516 reg_def XMM25m( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(12)); 517 reg_def XMM25n( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(13)); 518 reg_def XMM25o( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(14)); 519 reg_def XMM25p( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(15)); 520 521 reg_def XMM26 ( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()); 522 reg_def XMM26b( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(1)); 523 reg_def XMM26c( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(2)); 524 reg_def XMM26d( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(3)); 525 reg_def XMM26e( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(4)); 526 reg_def XMM26f( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(5)); 527 reg_def XMM26g( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(6)); 528 reg_def XMM26h( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(7)); 529 reg_def XMM26i( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(8)); 530 reg_def XMM26j( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(9)); 531 reg_def XMM26k( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(10)); 532 reg_def XMM26l( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(11)); 533 reg_def XMM26m( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(12)); 534 reg_def XMM26n( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(13)); 535 reg_def XMM26o( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(14)); 536 reg_def XMM26p( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(15)); 537 538 reg_def XMM27g( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(1)); 539 reg_def XMM27c( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(2)); 540 reg_def XMM27d( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(3)); 541 reg_def XMM27e( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(4)); 542 reg_def XMM27f( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(5)); 543 reg_def XMM27g( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(6)); 544 reg_def XMM27h( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(7)); 545 reg_def XMM27i( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(8)); 546 reg_def XMM27j( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(9)); 547 reg_def XMM27k( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(10)); 548 reg_def XMM27l( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(11)); 549 reg_def XMM27m( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(12)); 550 reg_def XMM27n( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(13)); 551 reg_def XMM27o( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(14)); 552 reg_def XMM27p( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(15)); 553 554 reg_def XMM28 ( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()); 555 reg_def XMM28b( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(1)); 556 reg_def XMM28c( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(2)); 557 reg_def XMM28d( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(3)); 558 reg_def XMM28e( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(4)); 559 reg_def XMM28f( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(5)); 560 reg_def XMM28g( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(6)); 561 reg_def XMM28h( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(7)); 562 reg_def XMM28i( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(8)); 563 reg_def XMM28j( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(9)); 564 reg_def XMM28k( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(10)); 565 reg_def XMM28l( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(11)); 566 reg_def XMM28m( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(12)); 567 reg_def XMM28n( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(13)); 568 reg_def XMM28o( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(14)); 569 reg_def XMM28p( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(15)); 570 571 reg_def XMM29 ( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()); 572 reg_def XMM29b( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(1)); 573 reg_def XMM29c( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(2)); 574 reg_def XMM29d( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(3)); 575 reg_def XMM29e( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(4)); 576 reg_def XMM29f( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(5)); 577 reg_def XMM29g( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(6)); 578 reg_def XMM29h( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(7)); 579 reg_def XMM29i( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(8)); 580 reg_def XMM29j( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(9)); 581 reg_def XMM29k( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(10)); 582 reg_def XMM29l( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(11)); 583 reg_def XMM29m( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(12)); 584 reg_def XMM29n( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(13)); 585 reg_def XMM29o( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(14)); 586 reg_def XMM29p( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(15)); 587 588 reg_def XMM30 ( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()); 589 reg_def XMM30b( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(1)); 590 reg_def XMM30c( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(2)); 591 reg_def XMM30d( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(3)); 592 reg_def XMM30e( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(4)); 593 reg_def XMM30f( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(5)); 594 reg_def XMM30g( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(6)); 595 reg_def XMM30h( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(7)); 596 reg_def XMM30i( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(8)); 597 reg_def XMM30j( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(9)); 598 reg_def XMM30k( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(10)); 599 reg_def XMM30l( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(11)); 600 reg_def XMM30m( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(12)); 601 reg_def XMM30n( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(13)); 602 reg_def XMM30o( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(14)); 603 reg_def XMM30p( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(15)); 604 605 reg_def XMM31 ( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()); 606 reg_def XMM31b( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(1)); 607 reg_def XMM31c( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(2)); 608 reg_def XMM31d( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(3)); 609 reg_def XMM31e( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(4)); 610 reg_def XMM31f( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(5)); 611 reg_def XMM31g( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(6)); 612 reg_def XMM31h( SOC, SOE, Op_RegF, 31, xmm31>-as_VMReg()->next(7)); 613 reg_def XMM31i( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(8)); 614 reg_def XMM31j( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(9)); 615 reg_def XMM31k( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(10)); 616 reg_def XMM31l( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(11)); 617 reg_def XMM31m( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(12)); 618 reg_def XMM31n( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(13)); 619 reg_def XMM31o( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(14)); 620 reg_def XMM31p( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(15)); 621 622 #else // _WIN64 623 624 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()); 625 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1)); 626 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2)); 627 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3)); 628 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4)); 629 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5)); 630 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6)); 631 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7)); 632 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8)); 633 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9)); 634 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10)); 635 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11)); 636 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12)); 637 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13)); 638 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14)); 639 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15)); 640 641 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()); 642 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1)); 643 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2)); 644 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3)); 645 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4)); 646 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5)); 647 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6)); 648 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7)); 649 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8)); 650 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9)); 651 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10)); 652 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11)); 653 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12)); 654 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13)); 655 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14)); 656 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15)); 657 658 #ifdef _LP64 659 660 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()); 661 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1)); 662 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2)); 663 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3)); 664 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4)); 665 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5)); 666 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6)); 667 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7)); 668 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8)); 669 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9)); 670 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10)); 671 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11)); 672 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12)); 673 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13)); 674 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14)); 675 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15)); 676 677 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()); 678 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1)); 679 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2)); 680 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3)); 681 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4)); 682 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5)); 683 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6)); 684 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7)); 685 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8)); 686 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9)); 687 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10)); 688 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11)); 689 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12)); 690 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13)); 691 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14)); 692 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15)); 693 694 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()); 695 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1)); 696 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2)); 697 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3)); 698 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4)); 699 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5)); 700 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6)); 701 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7)); 702 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8)); 703 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9)); 704 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10)); 705 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11)); 706 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12)); 707 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13)); 708 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14)); 709 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15)); 710 711 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()); 712 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1)); 713 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2)); 714 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3)); 715 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4)); 716 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5)); 717 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6)); 718 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7)); 719 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8)); 720 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9)); 721 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10)); 722 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11)); 723 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12)); 724 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13)); 725 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14)); 726 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15)); 727 728 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()); 729 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1)); 730 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2)); 731 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3)); 732 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4)); 733 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5)); 734 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6)); 735 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7)); 736 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8)); 737 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9)); 738 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10)); 739 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11)); 740 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12)); 741 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13)); 742 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14)); 743 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15)); 744 745 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()); 746 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1)); 747 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2)); 748 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3)); 749 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4)); 750 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5)); 751 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6)); 752 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7)); 753 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8)); 754 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9)); 755 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10)); 756 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11)); 757 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12)); 758 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13)); 759 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14)); 760 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15)); 761 762 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()); 763 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1)); 764 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2)); 765 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3)); 766 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4)); 767 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5)); 768 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6)); 769 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7)); 770 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8)); 771 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9)); 772 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10)); 773 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11)); 774 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12)); 775 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13)); 776 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14)); 777 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15)); 778 779 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()); 780 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1)); 781 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2)); 782 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3)); 783 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4)); 784 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5)); 785 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6)); 786 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7)); 787 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8)); 788 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9)); 789 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10)); 790 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11)); 791 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12)); 792 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13)); 793 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14)); 794 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15)); 795 796 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()); 797 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1)); 798 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2)); 799 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3)); 800 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4)); 801 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5)); 802 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6)); 803 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7)); 804 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8)); 805 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9)); 806 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10)); 807 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11)); 808 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12)); 809 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13)); 810 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14)); 811 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15)); 812 813 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()); 814 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1)); 815 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2)); 816 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3)); 817 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4)); 818 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5)); 819 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6)); 820 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7)); 821 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8)); 822 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9)); 823 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10)); 824 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11)); 825 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12)); 826 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13)); 827 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14)); 828 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15)); 829 830 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()); 831 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1)); 832 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2)); 833 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3)); 834 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4)); 835 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5)); 836 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6)); 837 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7)); 838 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8)); 839 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9)); 840 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10)); 841 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11)); 842 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12)); 843 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13)); 844 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14)); 845 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15)); 846 847 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()); 848 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1)); 849 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2)); 850 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3)); 851 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4)); 852 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5)); 853 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6)); 854 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7)); 855 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8)); 856 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9)); 857 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10)); 858 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11)); 859 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12)); 860 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13)); 861 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14)); 862 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15)); 863 864 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()); 865 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1)); 866 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2)); 867 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3)); 868 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4)); 869 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5)); 870 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6)); 871 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7)); 872 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8)); 873 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9)); 874 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10)); 875 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11)); 876 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12)); 877 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13)); 878 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14)); 879 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15)); 880 881 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()); 882 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1)); 883 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2)); 884 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3)); 885 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4)); 886 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5)); 887 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6)); 888 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7)); 889 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8)); 890 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9)); 891 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10)); 892 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11)); 893 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12)); 894 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13)); 895 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14)); 896 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15)); 897 898 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()); 899 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1)); 900 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2)); 901 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3)); 902 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4)); 903 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5)); 904 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6)); 905 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7)); 906 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8)); 907 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9)); 908 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10)); 909 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11)); 910 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12)); 911 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13)); 912 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14)); 913 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15)); 914 915 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()); 916 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1)); 917 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2)); 918 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3)); 919 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4)); 920 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5)); 921 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6)); 922 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7)); 923 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8)); 924 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9)); 925 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10)); 926 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11)); 927 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12)); 928 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13)); 929 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14)); 930 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15)); 931 932 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()); 933 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1)); 934 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2)); 935 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3)); 936 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4)); 937 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5)); 938 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6)); 939 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7)); 940 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8)); 941 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9)); 942 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10)); 943 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11)); 944 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12)); 945 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13)); 946 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14)); 947 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15)); 948 949 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()); 950 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1)); 951 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2)); 952 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3)); 953 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4)); 954 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5)); 955 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6)); 956 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7)); 957 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8)); 958 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9)); 959 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10)); 960 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11)); 961 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12)); 962 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13)); 963 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14)); 964 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15)); 965 966 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()); 967 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1)); 968 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2)); 969 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3)); 970 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4)); 971 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5)); 972 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6)); 973 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7)); 974 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8)); 975 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9)); 976 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10)); 977 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11)); 978 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12)); 979 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13)); 980 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14)); 981 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15)); 982 983 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()); 984 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1)); 985 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2)); 986 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3)); 987 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4)); 988 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5)); 989 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6)); 990 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7)); 991 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8)); 992 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9)); 993 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10)); 994 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11)); 995 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12)); 996 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13)); 997 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14)); 998 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15)); 999 1000 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()); 1001 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1)); 1002 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2)); 1003 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3)); 1004 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4)); 1005 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5)); 1006 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6)); 1007 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7)); 1008 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8)); 1009 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9)); 1010 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10)); 1011 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11)); 1012 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12)); 1013 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13)); 1014 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14)); 1015 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15)); 1016 1017 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()); 1018 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1)); 1019 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2)); 1020 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3)); 1021 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4)); 1022 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5)); 1023 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6)); 1024 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7)); 1025 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8)); 1026 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9)); 1027 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10)); 1028 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11)); 1029 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12)); 1030 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13)); 1031 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14)); 1032 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15)); 1033 1034 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()); 1035 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1)); 1036 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2)); 1037 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3)); 1038 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4)); 1039 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5)); 1040 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6)); 1041 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7)); 1042 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8)); 1043 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9)); 1044 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10)); 1045 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11)); 1046 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12)); 1047 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13)); 1048 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14)); 1049 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15)); 1050 1051 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()); 1052 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1)); 1053 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2)); 1054 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3)); 1055 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4)); 1056 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5)); 1057 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6)); 1058 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7)); 1059 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8)); 1060 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9)); 1061 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10)); 1062 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11)); 1063 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12)); 1064 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13)); 1065 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14)); 1066 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15)); 1067 1068 #endif // _LP64 1069 1070 #endif // _WIN64 1071 1072 #ifdef _LP64 1073 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad()); 1074 #else 1075 reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad()); 1076 #endif // _LP64 1077 1078 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1079 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1080 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1081 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1082 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1083 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1084 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1085 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1086 #ifdef _LP64 1087 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1088 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1089 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1090 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1091 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1092 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1093 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1094 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1095 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 1096 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 1097 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 1098 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 1099 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 1100 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 1101 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 1102 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 1103 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 1104 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 1105 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 1106 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 1107 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 1108 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 1109 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 1110 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 1111 #endif 1112 ); 1113 1114 // flags allocation class should be last. 1115 alloc_class chunk2(RFLAGS); 1116 1117 // Singleton class for condition codes 1118 reg_class int_flags(RFLAGS); 1119 1120 // Class for pre evex float registers 1121 reg_class float_reg_legacy(XMM0, 1122 XMM1, 1123 XMM2, 1124 XMM3, 1125 XMM4, 1126 XMM5, 1127 XMM6, 1128 XMM7 1129 #ifdef _LP64 1130 ,XMM8, 1131 XMM9, 1132 XMM10, 1133 XMM11, 1134 XMM12, 1135 XMM13, 1136 XMM14, 1137 XMM15 1138 #endif 1139 ); 1140 1141 // Class for evex float registers 1142 reg_class float_reg_evex(XMM0, 1143 XMM1, 1144 XMM2, 1145 XMM3, 1146 XMM4, 1147 XMM5, 1148 XMM6, 1149 XMM7 1150 #ifdef _LP64 1151 ,XMM8, 1152 XMM9, 1153 XMM10, 1154 XMM11, 1155 XMM12, 1156 XMM13, 1157 XMM14, 1158 XMM15, 1159 XMM16, 1160 XMM17, 1161 XMM18, 1162 XMM19, 1163 XMM20, 1164 XMM21, 1165 XMM22, 1166 XMM23, 1167 XMM24, 1168 XMM25, 1169 XMM26, 1170 XMM27, 1171 XMM28, 1172 XMM29, 1173 XMM30, 1174 XMM31 1175 #endif 1176 ); 1177 1178 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} ); 1179 1180 // Class for pre evex double registers 1181 reg_class double_reg_legacy(XMM0, XMM0b, 1182 XMM1, XMM1b, 1183 XMM2, XMM2b, 1184 XMM3, XMM3b, 1185 XMM4, XMM4b, 1186 XMM5, XMM5b, 1187 XMM6, XMM6b, 1188 XMM7, XMM7b 1189 #ifdef _LP64 1190 ,XMM8, XMM8b, 1191 XMM9, XMM9b, 1192 XMM10, XMM10b, 1193 XMM11, XMM11b, 1194 XMM12, XMM12b, 1195 XMM13, XMM13b, 1196 XMM14, XMM14b, 1197 XMM15, XMM15b 1198 #endif 1199 ); 1200 1201 // Class for evex double registers 1202 reg_class double_reg_evex(XMM0, XMM0b, 1203 XMM1, XMM1b, 1204 XMM2, XMM2b, 1205 XMM3, XMM3b, 1206 XMM4, XMM4b, 1207 XMM5, XMM5b, 1208 XMM6, XMM6b, 1209 XMM7, XMM7b 1210 #ifdef _LP64 1211 ,XMM8, XMM8b, 1212 XMM9, XMM9b, 1213 XMM10, XMM10b, 1214 XMM11, XMM11b, 1215 XMM12, XMM12b, 1216 XMM13, XMM13b, 1217 XMM14, XMM14b, 1218 XMM15, XMM15b, 1219 XMM16, XMM16b, 1220 XMM17, XMM17b, 1221 XMM18, XMM18b, 1222 XMM19, XMM19b, 1223 XMM20, XMM20b, 1224 XMM21, XMM21b, 1225 XMM22, XMM22b, 1226 XMM23, XMM23b, 1227 XMM24, XMM24b, 1228 XMM25, XMM25b, 1229 XMM26, XMM26b, 1230 XMM27, XMM27b, 1231 XMM28, XMM28b, 1232 XMM29, XMM29b, 1233 XMM30, XMM30b, 1234 XMM31, XMM31b 1235 #endif 1236 ); 1237 1238 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} ); 1239 1240 // Class for pre evex 32bit vector registers 1241 reg_class vectors_reg_legacy(XMM0, 1242 XMM1, 1243 XMM2, 1244 XMM3, 1245 XMM4, 1246 XMM5, 1247 XMM6, 1248 XMM7 1249 #ifdef _LP64 1250 ,XMM8, 1251 XMM9, 1252 XMM10, 1253 XMM11, 1254 XMM12, 1255 XMM13, 1256 XMM14, 1257 XMM15 1258 #endif 1259 ); 1260 1261 // Class for evex 32bit vector registers 1262 reg_class vectors_reg_evex(XMM0, 1263 XMM1, 1264 XMM2, 1265 XMM3, 1266 XMM4, 1267 XMM5, 1268 XMM6, 1269 XMM7 1270 #ifdef _LP64 1271 ,XMM8, 1272 XMM9, 1273 XMM10, 1274 XMM11, 1275 XMM12, 1276 XMM13, 1277 XMM14, 1278 XMM15, 1279 XMM16, 1280 XMM17, 1281 XMM18, 1282 XMM19, 1283 XMM20, 1284 XMM21, 1285 XMM22, 1286 XMM23, 1287 XMM24, 1288 XMM25, 1289 XMM26, 1290 XMM27, 1291 XMM28, 1292 XMM29, 1293 XMM30, 1294 XMM31 1295 #endif 1296 ); 1297 1298 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} ); 1299 1300 // Class for all 64bit vector registers 1301 reg_class vectord_reg_legacy(XMM0, XMM0b, 1302 XMM1, XMM1b, 1303 XMM2, XMM2b, 1304 XMM3, XMM3b, 1305 XMM4, XMM4b, 1306 XMM5, XMM5b, 1307 XMM6, XMM6b, 1308 XMM7, XMM7b 1309 #ifdef _LP64 1310 ,XMM8, XMM8b, 1311 XMM9, XMM9b, 1312 XMM10, XMM10b, 1313 XMM11, XMM11b, 1314 XMM12, XMM12b, 1315 XMM13, XMM13b, 1316 XMM14, XMM14b, 1317 XMM15, XMM15b 1318 #endif 1319 ); 1320 1321 // Class for all 64bit vector registers 1322 reg_class vectord_reg_evex(XMM0, XMM0b, 1323 XMM1, XMM1b, 1324 XMM2, XMM2b, 1325 XMM3, XMM3b, 1326 XMM4, XMM4b, 1327 XMM5, XMM5b, 1328 XMM6, XMM6b, 1329 XMM7, XMM7b 1330 #ifdef _LP64 1331 ,XMM8, XMM8b, 1332 XMM9, XMM9b, 1333 XMM10, XMM10b, 1334 XMM11, XMM11b, 1335 XMM12, XMM12b, 1336 XMM13, XMM13b, 1337 XMM14, XMM14b, 1338 XMM15, XMM15b, 1339 XMM16, XMM16b, 1340 XMM17, XMM17b, 1341 XMM18, XMM18b, 1342 XMM19, XMM19b, 1343 XMM20, XMM20b, 1344 XMM21, XMM21b, 1345 XMM22, XMM22b, 1346 XMM23, XMM23b, 1347 XMM24, XMM24b, 1348 XMM25, XMM25b, 1349 XMM26, XMM26b, 1350 XMM27, XMM27b, 1351 XMM28, XMM28b, 1352 XMM29, XMM29b, 1353 XMM30, XMM30b, 1354 XMM31, XMM31b 1355 #endif 1356 ); 1357 1358 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} ); 1359 1360 // Class for all 128bit vector registers 1361 reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, 1362 XMM1, XMM1b, XMM1c, XMM1d, 1363 XMM2, XMM2b, XMM2c, XMM2d, 1364 XMM3, XMM3b, XMM3c, XMM3d, 1365 XMM4, XMM4b, XMM4c, XMM4d, 1366 XMM5, XMM5b, XMM5c, XMM5d, 1367 XMM6, XMM6b, XMM6c, XMM6d, 1368 XMM7, XMM7b, XMM7c, XMM7d 1369 #ifdef _LP64 1370 ,XMM8, XMM8b, XMM8c, XMM8d, 1371 XMM9, XMM9b, XMM9c, XMM9d, 1372 XMM10, XMM10b, XMM10c, XMM10d, 1373 XMM11, XMM11b, XMM11c, XMM11d, 1374 XMM12, XMM12b, XMM12c, XMM12d, 1375 XMM13, XMM13b, XMM13c, XMM13d, 1376 XMM14, XMM14b, XMM14c, XMM14d, 1377 XMM15, XMM15b, XMM15c, XMM15d 1378 #endif 1379 ); 1380 1381 // Class for all 128bit vector registers 1382 reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, 1383 XMM1, XMM1b, XMM1c, XMM1d, 1384 XMM2, XMM2b, XMM2c, XMM2d, 1385 XMM3, XMM3b, XMM3c, XMM3d, 1386 XMM4, XMM4b, XMM4c, XMM4d, 1387 XMM5, XMM5b, XMM5c, XMM5d, 1388 XMM6, XMM6b, XMM6c, XMM6d, 1389 XMM7, XMM7b, XMM7c, XMM7d 1390 #ifdef _LP64 1391 ,XMM8, XMM8b, XMM8c, XMM8d, 1392 XMM9, XMM9b, XMM9c, XMM9d, 1393 XMM10, XMM10b, XMM10c, XMM10d, 1394 XMM11, XMM11b, XMM11c, XMM11d, 1395 XMM12, XMM12b, XMM12c, XMM12d, 1396 XMM13, XMM13b, XMM13c, XMM13d, 1397 XMM14, XMM14b, XMM14c, XMM14d, 1398 XMM15, XMM15b, XMM15c, XMM15d, 1399 XMM16, XMM16b, XMM16c, XMM16d, 1400 XMM17, XMM17b, XMM17c, XMM17d, 1401 XMM18, XMM18b, XMM18c, XMM18d, 1402 XMM19, XMM19b, XMM19c, XMM19d, 1403 XMM20, XMM20b, XMM20c, XMM20d, 1404 XMM21, XMM21b, XMM21c, XMM21d, 1405 XMM22, XMM22b, XMM22c, XMM22d, 1406 XMM23, XMM23b, XMM23c, XMM23d, 1407 XMM24, XMM24b, XMM24c, XMM24d, 1408 XMM25, XMM25b, XMM25c, XMM25d, 1409 XMM26, XMM26b, XMM26c, XMM26d, 1410 XMM27, XMM27b, XMM27c, XMM27d, 1411 XMM28, XMM28b, XMM28c, XMM28d, 1412 XMM29, XMM29b, XMM29c, XMM29d, 1413 XMM30, XMM30b, XMM30c, XMM30d, 1414 XMM31, XMM31b, XMM31c, XMM31d 1415 #endif 1416 ); 1417 1418 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} ); 1419 1420 // Class for all 256bit vector registers 1421 reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 1422 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 1423 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 1424 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 1425 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 1426 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 1427 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 1428 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 1429 #ifdef _LP64 1430 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 1431 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 1432 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 1433 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 1434 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 1435 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1436 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1437 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h 1438 #endif 1439 ); 1440 1441 // Class for all 256bit vector registers 1442 reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 1443 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 1444 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 1445 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 1446 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 1447 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 1448 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 1449 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 1450 #ifdef _LP64 1451 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 1452 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 1453 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 1454 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 1455 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 1456 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1457 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1458 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, 1459 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, 1460 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, 1461 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, 1462 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, 1463 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, 1464 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, 1465 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, 1466 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, 1467 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, 1468 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, 1469 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, 1470 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, 1471 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, 1472 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, 1473 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, 1474 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h 1475 #endif 1476 ); 1477 1478 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} ); 1479 1480 // Class for all 512bit vector registers 1481 reg_class vectorz_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1482 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1483 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1484 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1485 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1486 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1487 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1488 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1489 #ifdef _LP64 1490 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1491 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1492 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1493 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1494 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1495 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1496 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1497 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1498 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 1499 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 1500 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 1501 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 1502 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 1503 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 1504 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 1505 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 1506 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 1507 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 1508 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 1509 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 1510 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 1511 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 1512 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 1513 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 1514 #endif 1515 ); 1516 1517 %} 1518 1519 1520 //----------SOURCE BLOCK------------------------------------------------------- 1521 // This is a block of C++ code which provides values, functions, and 1522 // definitions necessary in the rest of the architecture description 1523 1524 source_hpp %{ 1525 // Header information of the source block. 1526 // Method declarations/definitions which are used outside 1527 // the ad-scope can conveniently be defined here. 1528 // 1529 // To keep related declarations/definitions/uses close together, 1530 // we switch between source %{ }% and source_hpp %{ }% freely as needed. 1531 1532 class NativeJump; 1533 1534 class CallStubImpl { 1535 1536 //-------------------------------------------------------------- 1537 //---< Used for optimization in Compile::shorten_branches >--- 1538 //-------------------------------------------------------------- 1539 1540 public: 1541 // Size of call trampoline stub. 1542 static uint size_call_trampoline() { 1543 return 0; // no call trampolines on this platform 1544 } 1545 1546 // number of relocations needed by a call trampoline stub 1547 static uint reloc_call_trampoline() { 1548 return 0; // no call trampolines on this platform 1549 } 1550 }; 1551 1552 class HandlerImpl { 1553 1554 public: 1555 1556 static int emit_exception_handler(CodeBuffer &cbuf); 1557 static int emit_deopt_handler(CodeBuffer& cbuf); 1558 1559 static uint size_exception_handler() { 1560 // NativeCall instruction size is the same as NativeJump. 1561 // exception handler starts out as jump and can be patched to 1562 // a call be deoptimization. (4932387) 1563 // Note that this value is also credited (in output.cpp) to 1564 // the size of the code section. 1565 return NativeJump::instruction_size; 1566 } 1567 1568 #ifdef _LP64 1569 static uint size_deopt_handler() { 1570 // three 5 byte instructions 1571 return 15; 1572 } 1573 #else 1574 static uint size_deopt_handler() { 1575 // NativeCall instruction size is the same as NativeJump. 1576 // exception handler starts out as jump and can be patched to 1577 // a call be deoptimization. (4932387) 1578 // Note that this value is also credited (in output.cpp) to 1579 // the size of the code section. 1580 return 5 + NativeJump::instruction_size; // pushl(); jmp; 1581 } 1582 #endif 1583 }; 1584 1585 %} // end source_hpp 1586 1587 source %{ 1588 1589 // Emit exception handler code. 1590 // Stuff framesize into a register and call a VM stub routine. 1591 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) { 1592 1593 // Note that the code buffer's insts_mark is always relative to insts. 1594 // That's why we must use the macroassembler to generate a handler. 1595 MacroAssembler _masm(&cbuf); 1596 address base = __ start_a_stub(size_exception_handler()); 1597 if (base == NULL) { 1598 ciEnv::current()->record_failure("CodeCache is full"); 1599 return 0; // CodeBuffer::expand failed 1600 } 1601 int offset = __ offset(); 1602 __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point())); 1603 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); 1604 __ end_a_stub(); 1605 return offset; 1606 } 1607 1608 // Emit deopt handler code. 1609 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) { 1610 1611 // Note that the code buffer's insts_mark is always relative to insts. 1612 // That's why we must use the macroassembler to generate a handler. 1613 MacroAssembler _masm(&cbuf); 1614 address base = __ start_a_stub(size_deopt_handler()); 1615 if (base == NULL) { 1616 ciEnv::current()->record_failure("CodeCache is full"); 1617 return 0; // CodeBuffer::expand failed 1618 } 1619 int offset = __ offset(); 1620 1621 #ifdef _LP64 1622 address the_pc = (address) __ pc(); 1623 Label next; 1624 // push a "the_pc" on the stack without destroying any registers 1625 // as they all may be live. 1626 1627 // push address of "next" 1628 __ call(next, relocInfo::none); // reloc none is fine since it is a disp32 1629 __ bind(next); 1630 // adjust it so it matches "the_pc" 1631 __ subptr(Address(rsp, 0), __ offset() - offset); 1632 #else 1633 InternalAddress here(__ pc()); 1634 __ pushptr(here.addr()); 1635 #endif 1636 1637 __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack())); 1638 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow"); 1639 __ end_a_stub(); 1640 return offset; 1641 } 1642 1643 1644 //============================================================================= 1645 1646 // Float masks come from different places depending on platform. 1647 #ifdef _LP64 1648 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); } 1649 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); } 1650 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); } 1651 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); } 1652 #else 1653 static address float_signmask() { return (address)float_signmask_pool; } 1654 static address float_signflip() { return (address)float_signflip_pool; } 1655 static address double_signmask() { return (address)double_signmask_pool; } 1656 static address double_signflip() { return (address)double_signflip_pool; } 1657 #endif 1658 1659 1660 const bool Matcher::match_rule_supported(int opcode) { 1661 if (!has_match_rule(opcode)) 1662 return false; 1663 1664 bool ret_value = true; 1665 switch (opcode) { 1666 case Op_PopCountI: 1667 case Op_PopCountL: 1668 if (!UsePopCountInstruction) 1669 ret_value = false; 1670 break; 1671 case Op_MulVI: 1672 if ((UseSSE < 4) && (UseAVX < 1)) // only with SSE4_1 or AVX 1673 ret_value = false; 1674 break; 1675 case Op_MulVL: 1676 case Op_MulReductionVL: 1677 if (VM_Version::supports_avx512dq() == false) 1678 ret_value = false; 1679 break; 1680 case Op_AddReductionVL: 1681 if (UseAVX < 3) // only EVEX : vector connectivity becomes an issue here 1682 ret_value = false; 1683 break; 1684 case Op_AddReductionVI: 1685 if (UseSSE < 3) // requires at least SSE3 1686 ret_value = false; 1687 break; 1688 case Op_MulReductionVI: 1689 if (UseSSE < 4) // requires at least SSE4 1690 ret_value = false; 1691 break; 1692 case Op_AddReductionVF: 1693 case Op_AddReductionVD: 1694 case Op_MulReductionVF: 1695 case Op_MulReductionVD: 1696 if (UseSSE < 1) // requires at least SSE 1697 ret_value = false; 1698 break; 1699 case Op_SqrtVD: 1700 if (UseAVX < 1) // enabled for AVX only 1701 ret_value = false; 1702 break; 1703 case Op_CompareAndSwapL: 1704 #ifdef _LP64 1705 case Op_CompareAndSwapP: 1706 #endif 1707 if (!VM_Version::supports_cx8()) 1708 ret_value = false; 1709 break; 1710 case Op_CMoveVD: 1711 if (UseAVX < 1 || UseAVX > 2) 1712 ret_value = false; 1713 break; 1714 case Op_StrIndexOf: 1715 if (!UseSSE42Intrinsics) 1716 ret_value = false; 1717 break; 1718 case Op_StrIndexOfChar: 1719 if (!(UseSSE > 4)) 1720 ret_value = false; 1721 break; 1722 } 1723 1724 return ret_value; // Per default match rules are supported. 1725 } 1726 1727 const bool Matcher::match_rule_supported_vector(int opcode, int vlen) { 1728 // identify extra cases that we might want to provide match rules for 1729 // e.g. Op_ vector nodes and other intrinsics while guarding with vlen 1730 bool ret_value = match_rule_supported(opcode); 1731 if (ret_value) { 1732 switch (opcode) { 1733 case Op_AddVB: 1734 case Op_SubVB: 1735 if ((vlen == 64) && (VM_Version::supports_avx512bw() == false)) 1736 ret_value = false; 1737 break; 1738 case Op_URShiftVS: 1739 case Op_RShiftVS: 1740 case Op_LShiftVS: 1741 case Op_MulVS: 1742 case Op_AddVS: 1743 case Op_SubVS: 1744 if ((vlen == 32) && (VM_Version::supports_avx512bw() == false)) 1745 ret_value = false; 1746 break; 1747 case Op_CMoveVD: 1748 if (vlen != 4) 1749 ret_value = false; 1750 break; 1751 } 1752 } 1753 1754 return ret_value; // Per default match rules are supported. 1755 } 1756 1757 const bool Matcher::has_predicated_vectors(void) { 1758 bool ret_value = false; 1759 switch(UseAVX) { 1760 case 0: 1761 case 1: 1762 case 2: 1763 break; 1764 1765 case 3: 1766 ret_value = VM_Version::supports_avx512vl(); 1767 break; 1768 } 1769 1770 return ret_value; 1771 } 1772 1773 const int Matcher::float_pressure(int default_pressure_threshold) { 1774 int float_pressure_threshold = default_pressure_threshold; 1775 #ifdef _LP64 1776 if (UseAVX > 2) { 1777 // Increase pressure threshold on machines with AVX3 which have 1778 // 2x more XMM registers. 1779 float_pressure_threshold = default_pressure_threshold * 2; 1780 } 1781 #endif 1782 return float_pressure_threshold; 1783 } 1784 1785 // Max vector size in bytes. 0 if not supported. 1786 const int Matcher::vector_width_in_bytes(BasicType bt) { 1787 assert(is_java_primitive(bt), "only primitive type vectors"); 1788 if (UseSSE < 2) return 0; 1789 // SSE2 supports 128bit vectors for all types. 1790 // AVX2 supports 256bit vectors for all types. 1791 // AVX2/EVEX supports 512bit vectors for all types. 1792 int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16; 1793 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE. 1794 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE)) 1795 size = (UseAVX > 2) ? 64 : 32; 1796 // Use flag to limit vector size. 1797 size = MIN2(size,(int)MaxVectorSize); 1798 // Minimum 2 values in vector (or 4 for bytes). 1799 switch (bt) { 1800 case T_DOUBLE: 1801 case T_LONG: 1802 if (size < 16) return 0; 1803 break; 1804 case T_FLOAT: 1805 case T_INT: 1806 if (size < 8) return 0; 1807 break; 1808 case T_BOOLEAN: 1809 if (size < 4) return 0; 1810 break; 1811 case T_CHAR: 1812 if (size < 4) return 0; 1813 break; 1814 case T_BYTE: 1815 if (size < 4) return 0; 1816 break; 1817 case T_SHORT: 1818 if (size < 4) return 0; 1819 break; 1820 default: 1821 ShouldNotReachHere(); 1822 } 1823 return size; 1824 } 1825 1826 // Limits on vector size (number of elements) loaded into vector. 1827 const int Matcher::max_vector_size(const BasicType bt) { 1828 return vector_width_in_bytes(bt)/type2aelembytes(bt); 1829 } 1830 const int Matcher::min_vector_size(const BasicType bt) { 1831 int max_size = max_vector_size(bt); 1832 // Min size which can be loaded into vector is 4 bytes. 1833 int size = (type2aelembytes(bt) == 1) ? 4 : 2; 1834 return MIN2(size,max_size); 1835 } 1836 1837 // Vector ideal reg corresponding to specidied size in bytes 1838 const int Matcher::vector_ideal_reg(int size) { 1839 assert(MaxVectorSize >= size, ""); 1840 switch(size) { 1841 case 4: return Op_VecS; 1842 case 8: return Op_VecD; 1843 case 16: return Op_VecX; 1844 case 32: return Op_VecY; 1845 case 64: return Op_VecZ; 1846 } 1847 ShouldNotReachHere(); 1848 return 0; 1849 } 1850 1851 // Only lowest bits of xmm reg are used for vector shift count. 1852 const int Matcher::vector_shift_count_ideal_reg(int size) { 1853 return Op_VecS; 1854 } 1855 1856 // x86 supports misaligned vectors store/load. 1857 const bool Matcher::misaligned_vectors_ok() { 1858 return !AlignVector; // can be changed by flag 1859 } 1860 1861 // x86 AES instructions are compatible with SunJCE expanded 1862 // keys, hence we do not need to pass the original key to stubs 1863 const bool Matcher::pass_original_key_for_aes() { 1864 return false; 1865 } 1866 1867 // Helper methods for MachSpillCopyNode::implementation(). 1868 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 1869 int src_hi, int dst_hi, uint ireg, outputStream* st) { 1870 // In 64-bit VM size calculation is very complex. Emitting instructions 1871 // into scratch buffer is used to get size in 64-bit VM. 1872 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 1873 assert(ireg == Op_VecS || // 32bit vector 1874 (src_lo & 1) == 0 && (src_lo + 1) == src_hi && 1875 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi, 1876 "no non-adjacent vector moves" ); 1877 if (cbuf) { 1878 MacroAssembler _masm(cbuf); 1879 int offset = __ offset(); 1880 switch (ireg) { 1881 case Op_VecS: // copy whole register 1882 case Op_VecD: 1883 case Op_VecX: 1884 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1885 break; 1886 case Op_VecY: 1887 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1888 break; 1889 case Op_VecZ: 1890 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2); 1891 break; 1892 default: 1893 ShouldNotReachHere(); 1894 } 1895 int size = __ offset() - offset; 1896 #ifdef ASSERT 1897 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1898 assert(!do_size || size == 4, "incorrect size calculattion"); 1899 #endif 1900 return size; 1901 #ifndef PRODUCT 1902 } else if (!do_size) { 1903 switch (ireg) { 1904 case Op_VecS: 1905 case Op_VecD: 1906 case Op_VecX: 1907 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 1908 break; 1909 case Op_VecY: 1910 case Op_VecZ: 1911 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 1912 break; 1913 default: 1914 ShouldNotReachHere(); 1915 } 1916 #endif 1917 } 1918 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 1919 return (UseAVX > 2) ? 6 : 4; 1920 } 1921 1922 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, 1923 int stack_offset, int reg, uint ireg, outputStream* st) { 1924 // In 64-bit VM size calculation is very complex. Emitting instructions 1925 // into scratch buffer is used to get size in 64-bit VM. 1926 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 1927 if (cbuf) { 1928 MacroAssembler _masm(cbuf); 1929 int offset = __ offset(); 1930 if (is_load) { 1931 switch (ireg) { 1932 case Op_VecS: 1933 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1934 break; 1935 case Op_VecD: 1936 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1937 break; 1938 case Op_VecX: 1939 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1940 break; 1941 case Op_VecY: 1942 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1943 break; 1944 case Op_VecZ: 1945 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2); 1946 break; 1947 default: 1948 ShouldNotReachHere(); 1949 } 1950 } else { // store 1951 switch (ireg) { 1952 case Op_VecS: 1953 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1954 break; 1955 case Op_VecD: 1956 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1957 break; 1958 case Op_VecX: 1959 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1960 break; 1961 case Op_VecY: 1962 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1963 break; 1964 case Op_VecZ: 1965 __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2); 1966 break; 1967 default: 1968 ShouldNotReachHere(); 1969 } 1970 } 1971 int size = __ offset() - offset; 1972 #ifdef ASSERT 1973 int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : (UseAVX > 2) ? 6 : 4); 1974 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1975 assert(!do_size || size == (5+offset_size), "incorrect size calculattion"); 1976 #endif 1977 return size; 1978 #ifndef PRODUCT 1979 } else if (!do_size) { 1980 if (is_load) { 1981 switch (ireg) { 1982 case Op_VecS: 1983 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1984 break; 1985 case Op_VecD: 1986 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1987 break; 1988 case Op_VecX: 1989 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1990 break; 1991 case Op_VecY: 1992 case Op_VecZ: 1993 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1994 break; 1995 default: 1996 ShouldNotReachHere(); 1997 } 1998 } else { // store 1999 switch (ireg) { 2000 case Op_VecS: 2001 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2002 break; 2003 case Op_VecD: 2004 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2005 break; 2006 case Op_VecX: 2007 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2008 break; 2009 case Op_VecY: 2010 case Op_VecZ: 2011 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2012 break; 2013 default: 2014 ShouldNotReachHere(); 2015 } 2016 } 2017 #endif 2018 } 2019 bool is_single_byte = false; 2020 int vec_len = 0; 2021 if ((UseAVX > 2) && (stack_offset != 0)) { 2022 int tuple_type = Assembler::EVEX_FVM; 2023 int input_size = Assembler::EVEX_32bit; 2024 switch (ireg) { 2025 case Op_VecS: 2026 tuple_type = Assembler::EVEX_T1S; 2027 break; 2028 case Op_VecD: 2029 tuple_type = Assembler::EVEX_T1S; 2030 input_size = Assembler::EVEX_64bit; 2031 break; 2032 case Op_VecX: 2033 break; 2034 case Op_VecY: 2035 vec_len = 1; 2036 break; 2037 case Op_VecZ: 2038 vec_len = 2; 2039 break; 2040 } 2041 is_single_byte = Assembler::query_compressed_disp_byte(stack_offset, true, vec_len, tuple_type, input_size, 0); 2042 } 2043 int offset_size = 0; 2044 int size = 5; 2045 if (UseAVX > 2 ) { 2046 if (VM_Version::supports_avx512novl() && (vec_len == 2)) { 2047 offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 2048 size += 2; // Need an additional two bytes for EVEX encoding 2049 } else if (VM_Version::supports_avx512novl() && (vec_len < 2)) { 2050 offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4); 2051 } else { 2052 offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 2053 size += 2; // Need an additional two bytes for EVEX encodding 2054 } 2055 } else { 2056 offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4); 2057 } 2058 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 2059 return size+offset_size; 2060 } 2061 2062 static inline jfloat replicate4_imm(int con, int width) { 2063 // Load a constant of "width" (in bytes) and replicate it to fill 32bit. 2064 assert(width == 1 || width == 2, "only byte or short types here"); 2065 int bit_width = width * 8; 2066 jint val = con; 2067 val &= (1 << bit_width) - 1; // mask off sign bits 2068 while(bit_width < 32) { 2069 val |= (val << bit_width); 2070 bit_width <<= 1; 2071 } 2072 jfloat fval = *((jfloat*) &val); // coerce to float type 2073 return fval; 2074 } 2075 2076 static inline jdouble replicate8_imm(int con, int width) { 2077 // Load a constant of "width" (in bytes) and replicate it to fill 64bit. 2078 assert(width == 1 || width == 2 || width == 4, "only byte, short or int types here"); 2079 int bit_width = width * 8; 2080 jlong val = con; 2081 val &= (((jlong) 1) << bit_width) - 1; // mask off sign bits 2082 while(bit_width < 64) { 2083 val |= (val << bit_width); 2084 bit_width <<= 1; 2085 } 2086 jdouble dval = *((jdouble*) &val); // coerce to double type 2087 return dval; 2088 } 2089 2090 #ifndef PRODUCT 2091 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const { 2092 st->print("nop \t# %d bytes pad for loops and calls", _count); 2093 } 2094 #endif 2095 2096 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const { 2097 MacroAssembler _masm(&cbuf); 2098 __ nop(_count); 2099 } 2100 2101 uint MachNopNode::size(PhaseRegAlloc*) const { 2102 return _count; 2103 } 2104 2105 void MachMskNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const { 2106 MacroAssembler _masm(&cbuf); 2107 __ restoremsk(); 2108 } 2109 2110 uint MachMskNode::size(PhaseRegAlloc* ra_) const { 2111 return MachNode::size(ra_); 2112 } 2113 2114 #ifndef PRODUCT 2115 void MachMskNode::format(PhaseRegAlloc*, outputStream* st) const { 2116 st->print("restoremsk \t# mask restore for loops"); 2117 } 2118 #endif 2119 2120 #ifndef PRODUCT 2121 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const { 2122 st->print("# breakpoint"); 2123 } 2124 #endif 2125 2126 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const { 2127 MacroAssembler _masm(&cbuf); 2128 __ int3(); 2129 } 2130 2131 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const { 2132 return MachNode::size(ra_); 2133 } 2134 2135 %} 2136 2137 encode %{ 2138 2139 enc_class call_epilog %{ 2140 if (VerifyStackAtCalls) { 2141 // Check that stack depth is unchanged: find majik cookie on stack 2142 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word)); 2143 MacroAssembler _masm(&cbuf); 2144 Label L; 2145 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d); 2146 __ jccb(Assembler::equal, L); 2147 // Die if stack mismatch 2148 __ int3(); 2149 __ bind(L); 2150 } 2151 %} 2152 2153 %} 2154 2155 2156 //----------OPERANDS----------------------------------------------------------- 2157 // Operand definitions must precede instruction definitions for correct parsing 2158 // in the ADLC because operands constitute user defined types which are used in 2159 // instruction definitions. 2160 2161 // This one generically applies only for evex, so only one version 2162 operand vecZ() %{ 2163 constraint(ALLOC_IN_RC(vectorz_reg)); 2164 match(VecZ); 2165 2166 format %{ %} 2167 interface(REG_INTER); 2168 %} 2169 2170 // Comparison Code for FP conditional move 2171 operand cmpOp_vcmppd() %{ 2172 match(Bool); 2173 2174 predicate(n->as_Bool()->_test._test != BoolTest::overflow && 2175 n->as_Bool()->_test._test != BoolTest::no_overflow); 2176 format %{ "" %} 2177 interface(COND_INTER) %{ 2178 equal (0x0, "eq"); 2179 less (0x1, "lt"); 2180 less_equal (0x2, "le"); 2181 not_equal (0xC, "ne"); 2182 greater_equal(0xD, "ge"); 2183 greater (0xE, "gt"); 2184 //TODO cannot compile (adlc breaks) without two next lines with error: 2185 // x86_64.ad(13987) Syntax Error: :In operand cmpOp_vcmppd: Do not support this encode constant: ' %{ 2186 // equal' for overflow. 2187 overflow (0x20, "o"); // not really supported by the instruction 2188 no_overflow (0x21, "no"); // not really supported by the instruction 2189 %} 2190 %} 2191 2192 2193 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit) 2194 2195 // ============================================================================ 2196 2197 instruct ShouldNotReachHere() %{ 2198 match(Halt); 2199 format %{ "int3\t# ShouldNotReachHere" %} 2200 ins_encode %{ 2201 __ int3(); 2202 %} 2203 ins_pipe(pipe_slow); 2204 %} 2205 2206 // =================================EVEX special=============================== 2207 2208 instruct set_mask(rRegI dst, rRegI src) %{ 2209 predicate(VM_Version::supports_avx512vl()); 2210 match(Set dst (MaskCreateI src)); 2211 effect(TEMP dst); 2212 format %{ "createmsk $dst, $src" %} 2213 ins_encode %{ 2214 __ createmsk($dst$$Register, $src$$Register); 2215 %} 2216 ins_pipe(pipe_slow); 2217 %} 2218 2219 // ============================================================================ 2220 2221 instruct addF_reg(regF dst, regF src) %{ 2222 predicate((UseSSE>=1) && (UseAVX == 0)); 2223 match(Set dst (AddF dst src)); 2224 2225 format %{ "addss $dst, $src" %} 2226 ins_cost(150); 2227 ins_encode %{ 2228 __ addss($dst$$XMMRegister, $src$$XMMRegister); 2229 %} 2230 ins_pipe(pipe_slow); 2231 %} 2232 2233 instruct addF_mem(regF dst, memory src) %{ 2234 predicate((UseSSE>=1) && (UseAVX == 0)); 2235 match(Set dst (AddF dst (LoadF src))); 2236 2237 format %{ "addss $dst, $src" %} 2238 ins_cost(150); 2239 ins_encode %{ 2240 __ addss($dst$$XMMRegister, $src$$Address); 2241 %} 2242 ins_pipe(pipe_slow); 2243 %} 2244 2245 instruct addF_imm(regF dst, immF con) %{ 2246 predicate((UseSSE>=1) && (UseAVX == 0)); 2247 match(Set dst (AddF dst con)); 2248 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2249 ins_cost(150); 2250 ins_encode %{ 2251 __ addss($dst$$XMMRegister, $constantaddress($con)); 2252 %} 2253 ins_pipe(pipe_slow); 2254 %} 2255 2256 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{ 2257 predicate(UseAVX > 0); 2258 match(Set dst (AddF src1 src2)); 2259 2260 format %{ "vaddss $dst, $src1, $src2" %} 2261 ins_cost(150); 2262 ins_encode %{ 2263 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2264 %} 2265 ins_pipe(pipe_slow); 2266 %} 2267 2268 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{ 2269 predicate(UseAVX > 0); 2270 match(Set dst (AddF src1 (LoadF src2))); 2271 2272 format %{ "vaddss $dst, $src1, $src2" %} 2273 ins_cost(150); 2274 ins_encode %{ 2275 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2276 %} 2277 ins_pipe(pipe_slow); 2278 %} 2279 2280 instruct addF_reg_imm(regF dst, regF src, immF con) %{ 2281 predicate(UseAVX > 0); 2282 match(Set dst (AddF src con)); 2283 2284 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2285 ins_cost(150); 2286 ins_encode %{ 2287 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2288 %} 2289 ins_pipe(pipe_slow); 2290 %} 2291 2292 instruct addD_reg(regD dst, regD src) %{ 2293 predicate((UseSSE>=2) && (UseAVX == 0)); 2294 match(Set dst (AddD dst src)); 2295 2296 format %{ "addsd $dst, $src" %} 2297 ins_cost(150); 2298 ins_encode %{ 2299 __ addsd($dst$$XMMRegister, $src$$XMMRegister); 2300 %} 2301 ins_pipe(pipe_slow); 2302 %} 2303 2304 instruct addD_mem(regD dst, memory src) %{ 2305 predicate((UseSSE>=2) && (UseAVX == 0)); 2306 match(Set dst (AddD dst (LoadD src))); 2307 2308 format %{ "addsd $dst, $src" %} 2309 ins_cost(150); 2310 ins_encode %{ 2311 __ addsd($dst$$XMMRegister, $src$$Address); 2312 %} 2313 ins_pipe(pipe_slow); 2314 %} 2315 2316 instruct addD_imm(regD dst, immD con) %{ 2317 predicate((UseSSE>=2) && (UseAVX == 0)); 2318 match(Set dst (AddD dst con)); 2319 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2320 ins_cost(150); 2321 ins_encode %{ 2322 __ addsd($dst$$XMMRegister, $constantaddress($con)); 2323 %} 2324 ins_pipe(pipe_slow); 2325 %} 2326 2327 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{ 2328 predicate(UseAVX > 0); 2329 match(Set dst (AddD src1 src2)); 2330 2331 format %{ "vaddsd $dst, $src1, $src2" %} 2332 ins_cost(150); 2333 ins_encode %{ 2334 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2335 %} 2336 ins_pipe(pipe_slow); 2337 %} 2338 2339 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{ 2340 predicate(UseAVX > 0); 2341 match(Set dst (AddD src1 (LoadD src2))); 2342 2343 format %{ "vaddsd $dst, $src1, $src2" %} 2344 ins_cost(150); 2345 ins_encode %{ 2346 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2347 %} 2348 ins_pipe(pipe_slow); 2349 %} 2350 2351 instruct addD_reg_imm(regD dst, regD src, immD con) %{ 2352 predicate(UseAVX > 0); 2353 match(Set dst (AddD src con)); 2354 2355 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2356 ins_cost(150); 2357 ins_encode %{ 2358 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2359 %} 2360 ins_pipe(pipe_slow); 2361 %} 2362 2363 instruct subF_reg(regF dst, regF src) %{ 2364 predicate((UseSSE>=1) && (UseAVX == 0)); 2365 match(Set dst (SubF dst src)); 2366 2367 format %{ "subss $dst, $src" %} 2368 ins_cost(150); 2369 ins_encode %{ 2370 __ subss($dst$$XMMRegister, $src$$XMMRegister); 2371 %} 2372 ins_pipe(pipe_slow); 2373 %} 2374 2375 instruct subF_mem(regF dst, memory src) %{ 2376 predicate((UseSSE>=1) && (UseAVX == 0)); 2377 match(Set dst (SubF dst (LoadF src))); 2378 2379 format %{ "subss $dst, $src" %} 2380 ins_cost(150); 2381 ins_encode %{ 2382 __ subss($dst$$XMMRegister, $src$$Address); 2383 %} 2384 ins_pipe(pipe_slow); 2385 %} 2386 2387 instruct subF_imm(regF dst, immF con) %{ 2388 predicate((UseSSE>=1) && (UseAVX == 0)); 2389 match(Set dst (SubF dst con)); 2390 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2391 ins_cost(150); 2392 ins_encode %{ 2393 __ subss($dst$$XMMRegister, $constantaddress($con)); 2394 %} 2395 ins_pipe(pipe_slow); 2396 %} 2397 2398 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{ 2399 predicate(UseAVX > 0); 2400 match(Set dst (SubF src1 src2)); 2401 2402 format %{ "vsubss $dst, $src1, $src2" %} 2403 ins_cost(150); 2404 ins_encode %{ 2405 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2406 %} 2407 ins_pipe(pipe_slow); 2408 %} 2409 2410 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{ 2411 predicate(UseAVX > 0); 2412 match(Set dst (SubF src1 (LoadF src2))); 2413 2414 format %{ "vsubss $dst, $src1, $src2" %} 2415 ins_cost(150); 2416 ins_encode %{ 2417 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2418 %} 2419 ins_pipe(pipe_slow); 2420 %} 2421 2422 instruct subF_reg_imm(regF dst, regF src, immF con) %{ 2423 predicate(UseAVX > 0); 2424 match(Set dst (SubF src con)); 2425 2426 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2427 ins_cost(150); 2428 ins_encode %{ 2429 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2430 %} 2431 ins_pipe(pipe_slow); 2432 %} 2433 2434 instruct subD_reg(regD dst, regD src) %{ 2435 predicate((UseSSE>=2) && (UseAVX == 0)); 2436 match(Set dst (SubD dst src)); 2437 2438 format %{ "subsd $dst, $src" %} 2439 ins_cost(150); 2440 ins_encode %{ 2441 __ subsd($dst$$XMMRegister, $src$$XMMRegister); 2442 %} 2443 ins_pipe(pipe_slow); 2444 %} 2445 2446 instruct subD_mem(regD dst, memory src) %{ 2447 predicate((UseSSE>=2) && (UseAVX == 0)); 2448 match(Set dst (SubD dst (LoadD src))); 2449 2450 format %{ "subsd $dst, $src" %} 2451 ins_cost(150); 2452 ins_encode %{ 2453 __ subsd($dst$$XMMRegister, $src$$Address); 2454 %} 2455 ins_pipe(pipe_slow); 2456 %} 2457 2458 instruct subD_imm(regD dst, immD con) %{ 2459 predicate((UseSSE>=2) && (UseAVX == 0)); 2460 match(Set dst (SubD dst con)); 2461 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2462 ins_cost(150); 2463 ins_encode %{ 2464 __ subsd($dst$$XMMRegister, $constantaddress($con)); 2465 %} 2466 ins_pipe(pipe_slow); 2467 %} 2468 2469 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{ 2470 predicate(UseAVX > 0); 2471 match(Set dst (SubD src1 src2)); 2472 2473 format %{ "vsubsd $dst, $src1, $src2" %} 2474 ins_cost(150); 2475 ins_encode %{ 2476 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2477 %} 2478 ins_pipe(pipe_slow); 2479 %} 2480 2481 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{ 2482 predicate(UseAVX > 0); 2483 match(Set dst (SubD src1 (LoadD src2))); 2484 2485 format %{ "vsubsd $dst, $src1, $src2" %} 2486 ins_cost(150); 2487 ins_encode %{ 2488 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2489 %} 2490 ins_pipe(pipe_slow); 2491 %} 2492 2493 instruct subD_reg_imm(regD dst, regD src, immD con) %{ 2494 predicate(UseAVX > 0); 2495 match(Set dst (SubD src con)); 2496 2497 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2498 ins_cost(150); 2499 ins_encode %{ 2500 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2501 %} 2502 ins_pipe(pipe_slow); 2503 %} 2504 2505 instruct mulF_reg(regF dst, regF src) %{ 2506 predicate((UseSSE>=1) && (UseAVX == 0)); 2507 match(Set dst (MulF dst src)); 2508 2509 format %{ "mulss $dst, $src" %} 2510 ins_cost(150); 2511 ins_encode %{ 2512 __ mulss($dst$$XMMRegister, $src$$XMMRegister); 2513 %} 2514 ins_pipe(pipe_slow); 2515 %} 2516 2517 instruct mulF_mem(regF dst, memory src) %{ 2518 predicate((UseSSE>=1) && (UseAVX == 0)); 2519 match(Set dst (MulF dst (LoadF src))); 2520 2521 format %{ "mulss $dst, $src" %} 2522 ins_cost(150); 2523 ins_encode %{ 2524 __ mulss($dst$$XMMRegister, $src$$Address); 2525 %} 2526 ins_pipe(pipe_slow); 2527 %} 2528 2529 instruct mulF_imm(regF dst, immF con) %{ 2530 predicate((UseSSE>=1) && (UseAVX == 0)); 2531 match(Set dst (MulF dst con)); 2532 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2533 ins_cost(150); 2534 ins_encode %{ 2535 __ mulss($dst$$XMMRegister, $constantaddress($con)); 2536 %} 2537 ins_pipe(pipe_slow); 2538 %} 2539 2540 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{ 2541 predicate(UseAVX > 0); 2542 match(Set dst (MulF src1 src2)); 2543 2544 format %{ "vmulss $dst, $src1, $src2" %} 2545 ins_cost(150); 2546 ins_encode %{ 2547 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2548 %} 2549 ins_pipe(pipe_slow); 2550 %} 2551 2552 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{ 2553 predicate(UseAVX > 0); 2554 match(Set dst (MulF src1 (LoadF src2))); 2555 2556 format %{ "vmulss $dst, $src1, $src2" %} 2557 ins_cost(150); 2558 ins_encode %{ 2559 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2560 %} 2561 ins_pipe(pipe_slow); 2562 %} 2563 2564 instruct mulF_reg_imm(regF dst, regF src, immF con) %{ 2565 predicate(UseAVX > 0); 2566 match(Set dst (MulF src con)); 2567 2568 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2569 ins_cost(150); 2570 ins_encode %{ 2571 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2572 %} 2573 ins_pipe(pipe_slow); 2574 %} 2575 2576 instruct mulD_reg(regD dst, regD src) %{ 2577 predicate((UseSSE>=2) && (UseAVX == 0)); 2578 match(Set dst (MulD dst src)); 2579 2580 format %{ "mulsd $dst, $src" %} 2581 ins_cost(150); 2582 ins_encode %{ 2583 __ mulsd($dst$$XMMRegister, $src$$XMMRegister); 2584 %} 2585 ins_pipe(pipe_slow); 2586 %} 2587 2588 instruct mulD_mem(regD dst, memory src) %{ 2589 predicate((UseSSE>=2) && (UseAVX == 0)); 2590 match(Set dst (MulD dst (LoadD src))); 2591 2592 format %{ "mulsd $dst, $src" %} 2593 ins_cost(150); 2594 ins_encode %{ 2595 __ mulsd($dst$$XMMRegister, $src$$Address); 2596 %} 2597 ins_pipe(pipe_slow); 2598 %} 2599 2600 instruct mulD_imm(regD dst, immD con) %{ 2601 predicate((UseSSE>=2) && (UseAVX == 0)); 2602 match(Set dst (MulD dst con)); 2603 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2604 ins_cost(150); 2605 ins_encode %{ 2606 __ mulsd($dst$$XMMRegister, $constantaddress($con)); 2607 %} 2608 ins_pipe(pipe_slow); 2609 %} 2610 2611 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{ 2612 predicate(UseAVX > 0); 2613 match(Set dst (MulD src1 src2)); 2614 2615 format %{ "vmulsd $dst, $src1, $src2" %} 2616 ins_cost(150); 2617 ins_encode %{ 2618 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2619 %} 2620 ins_pipe(pipe_slow); 2621 %} 2622 2623 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{ 2624 predicate(UseAVX > 0); 2625 match(Set dst (MulD src1 (LoadD src2))); 2626 2627 format %{ "vmulsd $dst, $src1, $src2" %} 2628 ins_cost(150); 2629 ins_encode %{ 2630 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2631 %} 2632 ins_pipe(pipe_slow); 2633 %} 2634 2635 instruct mulD_reg_imm(regD dst, regD src, immD con) %{ 2636 predicate(UseAVX > 0); 2637 match(Set dst (MulD src con)); 2638 2639 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2640 ins_cost(150); 2641 ins_encode %{ 2642 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2643 %} 2644 ins_pipe(pipe_slow); 2645 %} 2646 2647 instruct divF_reg(regF dst, regF src) %{ 2648 predicate((UseSSE>=1) && (UseAVX == 0)); 2649 match(Set dst (DivF dst src)); 2650 2651 format %{ "divss $dst, $src" %} 2652 ins_cost(150); 2653 ins_encode %{ 2654 __ divss($dst$$XMMRegister, $src$$XMMRegister); 2655 %} 2656 ins_pipe(pipe_slow); 2657 %} 2658 2659 instruct divF_mem(regF dst, memory src) %{ 2660 predicate((UseSSE>=1) && (UseAVX == 0)); 2661 match(Set dst (DivF dst (LoadF src))); 2662 2663 format %{ "divss $dst, $src" %} 2664 ins_cost(150); 2665 ins_encode %{ 2666 __ divss($dst$$XMMRegister, $src$$Address); 2667 %} 2668 ins_pipe(pipe_slow); 2669 %} 2670 2671 instruct divF_imm(regF dst, immF con) %{ 2672 predicate((UseSSE>=1) && (UseAVX == 0)); 2673 match(Set dst (DivF dst con)); 2674 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2675 ins_cost(150); 2676 ins_encode %{ 2677 __ divss($dst$$XMMRegister, $constantaddress($con)); 2678 %} 2679 ins_pipe(pipe_slow); 2680 %} 2681 2682 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{ 2683 predicate(UseAVX > 0); 2684 match(Set dst (DivF src1 src2)); 2685 2686 format %{ "vdivss $dst, $src1, $src2" %} 2687 ins_cost(150); 2688 ins_encode %{ 2689 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2690 %} 2691 ins_pipe(pipe_slow); 2692 %} 2693 2694 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{ 2695 predicate(UseAVX > 0); 2696 match(Set dst (DivF src1 (LoadF src2))); 2697 2698 format %{ "vdivss $dst, $src1, $src2" %} 2699 ins_cost(150); 2700 ins_encode %{ 2701 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2702 %} 2703 ins_pipe(pipe_slow); 2704 %} 2705 2706 instruct divF_reg_imm(regF dst, regF src, immF con) %{ 2707 predicate(UseAVX > 0); 2708 match(Set dst (DivF src con)); 2709 2710 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2711 ins_cost(150); 2712 ins_encode %{ 2713 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2714 %} 2715 ins_pipe(pipe_slow); 2716 %} 2717 2718 instruct divD_reg(regD dst, regD src) %{ 2719 predicate((UseSSE>=2) && (UseAVX == 0)); 2720 match(Set dst (DivD dst src)); 2721 2722 format %{ "divsd $dst, $src" %} 2723 ins_cost(150); 2724 ins_encode %{ 2725 __ divsd($dst$$XMMRegister, $src$$XMMRegister); 2726 %} 2727 ins_pipe(pipe_slow); 2728 %} 2729 2730 instruct divD_mem(regD dst, memory src) %{ 2731 predicate((UseSSE>=2) && (UseAVX == 0)); 2732 match(Set dst (DivD dst (LoadD src))); 2733 2734 format %{ "divsd $dst, $src" %} 2735 ins_cost(150); 2736 ins_encode %{ 2737 __ divsd($dst$$XMMRegister, $src$$Address); 2738 %} 2739 ins_pipe(pipe_slow); 2740 %} 2741 2742 instruct divD_imm(regD dst, immD con) %{ 2743 predicate((UseSSE>=2) && (UseAVX == 0)); 2744 match(Set dst (DivD dst con)); 2745 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2746 ins_cost(150); 2747 ins_encode %{ 2748 __ divsd($dst$$XMMRegister, $constantaddress($con)); 2749 %} 2750 ins_pipe(pipe_slow); 2751 %} 2752 2753 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{ 2754 predicate(UseAVX > 0); 2755 match(Set dst (DivD src1 src2)); 2756 2757 format %{ "vdivsd $dst, $src1, $src2" %} 2758 ins_cost(150); 2759 ins_encode %{ 2760 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2761 %} 2762 ins_pipe(pipe_slow); 2763 %} 2764 2765 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{ 2766 predicate(UseAVX > 0); 2767 match(Set dst (DivD src1 (LoadD src2))); 2768 2769 format %{ "vdivsd $dst, $src1, $src2" %} 2770 ins_cost(150); 2771 ins_encode %{ 2772 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2773 %} 2774 ins_pipe(pipe_slow); 2775 %} 2776 2777 instruct divD_reg_imm(regD dst, regD src, immD con) %{ 2778 predicate(UseAVX > 0); 2779 match(Set dst (DivD src con)); 2780 2781 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2782 ins_cost(150); 2783 ins_encode %{ 2784 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2785 %} 2786 ins_pipe(pipe_slow); 2787 %} 2788 2789 instruct absF_reg(regF dst) %{ 2790 predicate((UseSSE>=1) && (UseAVX == 0)); 2791 match(Set dst (AbsF dst)); 2792 ins_cost(150); 2793 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %} 2794 ins_encode %{ 2795 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask())); 2796 %} 2797 ins_pipe(pipe_slow); 2798 %} 2799 2800 instruct absF_reg_reg(regF dst, regF src) %{ 2801 predicate(VM_Version::supports_avxonly()); 2802 match(Set dst (AbsF src)); 2803 ins_cost(150); 2804 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 2805 ins_encode %{ 2806 int vector_len = 0; 2807 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 2808 ExternalAddress(float_signmask()), vector_len); 2809 %} 2810 ins_pipe(pipe_slow); 2811 %} 2812 2813 #ifdef _LP64 2814 instruct absF_reg_reg_evex(regF dst, regF src) %{ 2815 predicate(UseAVX > 2 && VM_Version::supports_avx512vl()); 2816 match(Set dst (AbsF src)); 2817 ins_cost(150); 2818 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 2819 ins_encode %{ 2820 int vector_len = 0; 2821 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 2822 ExternalAddress(float_signmask()), vector_len); 2823 %} 2824 ins_pipe(pipe_slow); 2825 %} 2826 2827 instruct absF_reg_reg_evex_special(regF dst, regF src1, regF src2) %{ 2828 predicate(VM_Version::supports_avx512novl()); 2829 match(Set dst (AbsF src1)); 2830 effect(TEMP src2); 2831 ins_cost(150); 2832 format %{ "vabsss $dst, $src1, $src2, [0x7fffffff]\t# abs float by sign masking" %} 2833 ins_encode %{ 2834 int vector_len = 0; 2835 __ vabsss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 2836 ExternalAddress(float_signmask()), vector_len); 2837 %} 2838 ins_pipe(pipe_slow); 2839 %} 2840 #else // _LP64 2841 instruct absF_reg_reg_evex(regF dst, regF src) %{ 2842 predicate(UseAVX > 2); 2843 match(Set dst (AbsF src)); 2844 ins_cost(150); 2845 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 2846 ins_encode %{ 2847 int vector_len = 0; 2848 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 2849 ExternalAddress(float_signmask()), vector_len); 2850 %} 2851 ins_pipe(pipe_slow); 2852 %} 2853 #endif 2854 2855 instruct absD_reg(regD dst) %{ 2856 predicate((UseSSE>=2) && (UseAVX == 0)); 2857 match(Set dst (AbsD dst)); 2858 ins_cost(150); 2859 format %{ "andpd $dst, [0x7fffffffffffffff]\t" 2860 "# abs double by sign masking" %} 2861 ins_encode %{ 2862 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask())); 2863 %} 2864 ins_pipe(pipe_slow); 2865 %} 2866 2867 instruct absD_reg_reg(regD dst, regD src) %{ 2868 predicate(VM_Version::supports_avxonly()); 2869 match(Set dst (AbsD src)); 2870 ins_cost(150); 2871 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 2872 "# abs double by sign masking" %} 2873 ins_encode %{ 2874 int vector_len = 0; 2875 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 2876 ExternalAddress(double_signmask()), vector_len); 2877 %} 2878 ins_pipe(pipe_slow); 2879 %} 2880 2881 #ifdef _LP64 2882 instruct absD_reg_reg_evex(regD dst, regD src) %{ 2883 predicate(UseAVX > 2 && VM_Version::supports_avx512vl()); 2884 match(Set dst (AbsD src)); 2885 ins_cost(150); 2886 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 2887 "# abs double by sign masking" %} 2888 ins_encode %{ 2889 int vector_len = 0; 2890 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 2891 ExternalAddress(double_signmask()), vector_len); 2892 %} 2893 ins_pipe(pipe_slow); 2894 %} 2895 2896 instruct absD_reg_reg_evex_special(regD dst, regD src1, regD src2) %{ 2897 predicate(VM_Version::supports_avx512novl()); 2898 match(Set dst (AbsD src1)); 2899 effect(TEMP src2); 2900 ins_cost(150); 2901 format %{ "vabssd $dst, $src1, $src2, [0x7fffffffffffffff]\t# abs float by sign masking" %} 2902 ins_encode %{ 2903 int vector_len = 0; 2904 __ vabssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 2905 ExternalAddress(double_signmask()), vector_len); 2906 %} 2907 ins_pipe(pipe_slow); 2908 %} 2909 #else // _LP64 2910 instruct absD_reg_reg_evex(regD dst, regD src) %{ 2911 predicate(UseAVX > 2); 2912 match(Set dst (AbsD src)); 2913 ins_cost(150); 2914 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 2915 "# abs double by sign masking" %} 2916 ins_encode %{ 2917 int vector_len = 0; 2918 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 2919 ExternalAddress(double_signmask()), vector_len); 2920 %} 2921 ins_pipe(pipe_slow); 2922 %} 2923 #endif 2924 2925 instruct negF_reg(regF dst) %{ 2926 predicate((UseSSE>=1) && (UseAVX == 0)); 2927 match(Set dst (NegF dst)); 2928 ins_cost(150); 2929 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %} 2930 ins_encode %{ 2931 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip())); 2932 %} 2933 ins_pipe(pipe_slow); 2934 %} 2935 2936 instruct negF_reg_reg(regF dst, regF src) %{ 2937 predicate(UseAVX > 0); 2938 match(Set dst (NegF src)); 2939 ins_cost(150); 2940 format %{ "vnegatess $dst, $src, [0x80000000]\t# neg float by sign flipping" %} 2941 ins_encode %{ 2942 __ vnegatess($dst$$XMMRegister, $src$$XMMRegister, 2943 ExternalAddress(float_signflip())); 2944 %} 2945 ins_pipe(pipe_slow); 2946 %} 2947 2948 instruct negD_reg(regD dst) %{ 2949 predicate((UseSSE>=2) && (UseAVX == 0)); 2950 match(Set dst (NegD dst)); 2951 ins_cost(150); 2952 format %{ "xorpd $dst, [0x8000000000000000]\t" 2953 "# neg double by sign flipping" %} 2954 ins_encode %{ 2955 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip())); 2956 %} 2957 ins_pipe(pipe_slow); 2958 %} 2959 2960 instruct negD_reg_reg(regD dst, regD src) %{ 2961 predicate(UseAVX > 0); 2962 match(Set dst (NegD src)); 2963 ins_cost(150); 2964 format %{ "vnegatess $dst, $src, [0x8000000000000000]\t" 2965 "# neg double by sign flipping" %} 2966 ins_encode %{ 2967 __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister, 2968 ExternalAddress(double_signflip())); 2969 %} 2970 ins_pipe(pipe_slow); 2971 %} 2972 2973 instruct sqrtF_reg(regF dst, regF src) %{ 2974 predicate(UseSSE>=1); 2975 match(Set dst (ConvD2F (SqrtD (ConvF2D src)))); 2976 2977 format %{ "sqrtss $dst, $src" %} 2978 ins_cost(150); 2979 ins_encode %{ 2980 __ sqrtss($dst$$XMMRegister, $src$$XMMRegister); 2981 %} 2982 ins_pipe(pipe_slow); 2983 %} 2984 2985 instruct sqrtF_mem(regF dst, memory src) %{ 2986 predicate(UseSSE>=1); 2987 match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF src))))); 2988 2989 format %{ "sqrtss $dst, $src" %} 2990 ins_cost(150); 2991 ins_encode %{ 2992 __ sqrtss($dst$$XMMRegister, $src$$Address); 2993 %} 2994 ins_pipe(pipe_slow); 2995 %} 2996 2997 instruct sqrtF_imm(regF dst, immF con) %{ 2998 predicate(UseSSE>=1); 2999 match(Set dst (ConvD2F (SqrtD (ConvF2D con)))); 3000 format %{ "sqrtss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 3001 ins_cost(150); 3002 ins_encode %{ 3003 __ sqrtss($dst$$XMMRegister, $constantaddress($con)); 3004 %} 3005 ins_pipe(pipe_slow); 3006 %} 3007 3008 instruct sqrtD_reg(regD dst, regD src) %{ 3009 predicate(UseSSE>=2); 3010 match(Set dst (SqrtD src)); 3011 3012 format %{ "sqrtsd $dst, $src" %} 3013 ins_cost(150); 3014 ins_encode %{ 3015 __ sqrtsd($dst$$XMMRegister, $src$$XMMRegister); 3016 %} 3017 ins_pipe(pipe_slow); 3018 %} 3019 3020 instruct sqrtD_mem(regD dst, memory src) %{ 3021 predicate(UseSSE>=2); 3022 match(Set dst (SqrtD (LoadD src))); 3023 3024 format %{ "sqrtsd $dst, $src" %} 3025 ins_cost(150); 3026 ins_encode %{ 3027 __ sqrtsd($dst$$XMMRegister, $src$$Address); 3028 %} 3029 ins_pipe(pipe_slow); 3030 %} 3031 3032 instruct sqrtD_imm(regD dst, immD con) %{ 3033 predicate(UseSSE>=2); 3034 match(Set dst (SqrtD con)); 3035 format %{ "sqrtsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 3036 ins_cost(150); 3037 ins_encode %{ 3038 __ sqrtsd($dst$$XMMRegister, $constantaddress($con)); 3039 %} 3040 ins_pipe(pipe_slow); 3041 %} 3042 3043 // ====================VECTOR INSTRUCTIONS===================================== 3044 3045 // Load vectors (4 bytes long) 3046 instruct loadV4(vecS dst, memory mem) %{ 3047 predicate(n->as_LoadVector()->memory_size() == 4); 3048 match(Set dst (LoadVector mem)); 3049 ins_cost(125); 3050 format %{ "movd $dst,$mem\t! load vector (4 bytes)" %} 3051 ins_encode %{ 3052 __ movdl($dst$$XMMRegister, $mem$$Address); 3053 %} 3054 ins_pipe( pipe_slow ); 3055 %} 3056 3057 // Load vectors (8 bytes long) 3058 instruct loadV8(vecD dst, memory mem) %{ 3059 predicate(n->as_LoadVector()->memory_size() == 8); 3060 match(Set dst (LoadVector mem)); 3061 ins_cost(125); 3062 format %{ "movq $dst,$mem\t! load vector (8 bytes)" %} 3063 ins_encode %{ 3064 __ movq($dst$$XMMRegister, $mem$$Address); 3065 %} 3066 ins_pipe( pipe_slow ); 3067 %} 3068 3069 // Load vectors (16 bytes long) 3070 instruct loadV16(vecX dst, memory mem) %{ 3071 predicate(n->as_LoadVector()->memory_size() == 16); 3072 match(Set dst (LoadVector mem)); 3073 ins_cost(125); 3074 format %{ "movdqu $dst,$mem\t! load vector (16 bytes)" %} 3075 ins_encode %{ 3076 __ movdqu($dst$$XMMRegister, $mem$$Address); 3077 %} 3078 ins_pipe( pipe_slow ); 3079 %} 3080 3081 // Load vectors (32 bytes long) 3082 instruct loadV32(vecY dst, memory mem) %{ 3083 predicate(n->as_LoadVector()->memory_size() == 32); 3084 match(Set dst (LoadVector mem)); 3085 ins_cost(125); 3086 format %{ "vmovdqu $dst,$mem\t! load vector (32 bytes)" %} 3087 ins_encode %{ 3088 __ vmovdqu($dst$$XMMRegister, $mem$$Address); 3089 %} 3090 ins_pipe( pipe_slow ); 3091 %} 3092 3093 // Load vectors (64 bytes long) 3094 instruct loadV64_dword(vecZ dst, memory mem) %{ 3095 predicate(n->as_LoadVector()->memory_size() == 64 && n->as_LoadVector()->element_size() <= 4); 3096 match(Set dst (LoadVector mem)); 3097 ins_cost(125); 3098 format %{ "vmovdqul $dst k0,$mem\t! load vector (64 bytes)" %} 3099 ins_encode %{ 3100 int vector_len = 2; 3101 __ evmovdqul($dst$$XMMRegister, $mem$$Address, vector_len); 3102 %} 3103 ins_pipe( pipe_slow ); 3104 %} 3105 3106 // Load vectors (64 bytes long) 3107 instruct loadV64_qword(vecZ dst, memory mem) %{ 3108 predicate(n->as_LoadVector()->memory_size() == 64 && n->as_LoadVector()->element_size() > 4); 3109 match(Set dst (LoadVector mem)); 3110 ins_cost(125); 3111 format %{ "vmovdquq $dst k0,$mem\t! load vector (64 bytes)" %} 3112 ins_encode %{ 3113 int vector_len = 2; 3114 __ evmovdquq($dst$$XMMRegister, $mem$$Address, vector_len); 3115 %} 3116 ins_pipe( pipe_slow ); 3117 %} 3118 3119 // Store vectors 3120 instruct storeV4(memory mem, vecS src) %{ 3121 predicate(n->as_StoreVector()->memory_size() == 4); 3122 match(Set mem (StoreVector mem src)); 3123 ins_cost(145); 3124 format %{ "movd $mem,$src\t! store vector (4 bytes)" %} 3125 ins_encode %{ 3126 __ movdl($mem$$Address, $src$$XMMRegister); 3127 %} 3128 ins_pipe( pipe_slow ); 3129 %} 3130 3131 instruct storeV8(memory mem, vecD src) %{ 3132 predicate(n->as_StoreVector()->memory_size() == 8); 3133 match(Set mem (StoreVector mem src)); 3134 ins_cost(145); 3135 format %{ "movq $mem,$src\t! store vector (8 bytes)" %} 3136 ins_encode %{ 3137 __ movq($mem$$Address, $src$$XMMRegister); 3138 %} 3139 ins_pipe( pipe_slow ); 3140 %} 3141 3142 instruct storeV16(memory mem, vecX src) %{ 3143 predicate(n->as_StoreVector()->memory_size() == 16); 3144 match(Set mem (StoreVector mem src)); 3145 ins_cost(145); 3146 format %{ "movdqu $mem,$src\t! store vector (16 bytes)" %} 3147 ins_encode %{ 3148 __ movdqu($mem$$Address, $src$$XMMRegister); 3149 %} 3150 ins_pipe( pipe_slow ); 3151 %} 3152 3153 instruct storeV32(memory mem, vecY src) %{ 3154 predicate(n->as_StoreVector()->memory_size() == 32); 3155 match(Set mem (StoreVector mem src)); 3156 ins_cost(145); 3157 format %{ "vmovdqu $mem,$src\t! store vector (32 bytes)" %} 3158 ins_encode %{ 3159 __ vmovdqu($mem$$Address, $src$$XMMRegister); 3160 %} 3161 ins_pipe( pipe_slow ); 3162 %} 3163 3164 instruct storeV64_dword(memory mem, vecZ src) %{ 3165 predicate(n->as_StoreVector()->memory_size() == 64 && n->as_StoreVector()->element_size() <= 4); 3166 match(Set mem (StoreVector mem src)); 3167 ins_cost(145); 3168 format %{ "vmovdqul $mem k0,$src\t! store vector (64 bytes)" %} 3169 ins_encode %{ 3170 int vector_len = 2; 3171 __ evmovdqul($mem$$Address, $src$$XMMRegister, vector_len); 3172 %} 3173 ins_pipe( pipe_slow ); 3174 %} 3175 3176 instruct storeV64_qword(memory mem, vecZ src) %{ 3177 predicate(n->as_StoreVector()->memory_size() == 64 && n->as_StoreVector()->element_size() > 4); 3178 match(Set mem (StoreVector mem src)); 3179 ins_cost(145); 3180 format %{ "vmovdquq $mem k0,$src\t! store vector (64 bytes)" %} 3181 ins_encode %{ 3182 int vector_len = 2; 3183 __ evmovdquq($mem$$Address, $src$$XMMRegister, vector_len); 3184 %} 3185 ins_pipe( pipe_slow ); 3186 %} 3187 3188 // ====================LEGACY REPLICATE======================================= 3189 3190 instruct Repl4B_mem(vecS dst, memory mem) %{ 3191 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3192 match(Set dst (ReplicateB (LoadB mem))); 3193 format %{ "punpcklbw $dst,$mem\n\t" 3194 "pshuflw $dst,$dst,0x00\t! replicate4B" %} 3195 ins_encode %{ 3196 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 3197 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3198 %} 3199 ins_pipe( pipe_slow ); 3200 %} 3201 3202 instruct Repl8B_mem(vecD dst, memory mem) %{ 3203 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3204 match(Set dst (ReplicateB (LoadB mem))); 3205 format %{ "punpcklbw $dst,$mem\n\t" 3206 "pshuflw $dst,$dst,0x00\t! replicate8B" %} 3207 ins_encode %{ 3208 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 3209 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3210 %} 3211 ins_pipe( pipe_slow ); 3212 %} 3213 3214 instruct Repl16B(vecX dst, rRegI src) %{ 3215 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3216 match(Set dst (ReplicateB src)); 3217 format %{ "movd $dst,$src\n\t" 3218 "punpcklbw $dst,$dst\n\t" 3219 "pshuflw $dst,$dst,0x00\n\t" 3220 "punpcklqdq $dst,$dst\t! replicate16B" %} 3221 ins_encode %{ 3222 __ movdl($dst$$XMMRegister, $src$$Register); 3223 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3224 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3225 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3226 %} 3227 ins_pipe( pipe_slow ); 3228 %} 3229 3230 instruct Repl16B_mem(vecX dst, memory mem) %{ 3231 predicate(n->as_Vector()->length() == 16 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3232 match(Set dst (ReplicateB (LoadB mem))); 3233 format %{ "punpcklbw $dst,$mem\n\t" 3234 "pshuflw $dst,$dst,0x00\n\t" 3235 "punpcklqdq $dst,$dst\t! replicate16B" %} 3236 ins_encode %{ 3237 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 3238 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3239 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3240 %} 3241 ins_pipe( pipe_slow ); 3242 %} 3243 3244 instruct Repl32B(vecY dst, rRegI src) %{ 3245 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3246 match(Set dst (ReplicateB src)); 3247 format %{ "movd $dst,$src\n\t" 3248 "punpcklbw $dst,$dst\n\t" 3249 "pshuflw $dst,$dst,0x00\n\t" 3250 "punpcklqdq $dst,$dst\n\t" 3251 "vinserti128_high $dst,$dst\t! replicate32B" %} 3252 ins_encode %{ 3253 __ movdl($dst$$XMMRegister, $src$$Register); 3254 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3255 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3256 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3257 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3258 %} 3259 ins_pipe( pipe_slow ); 3260 %} 3261 3262 instruct Repl32B_mem(vecY dst, memory mem) %{ 3263 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3264 match(Set dst (ReplicateB (LoadB mem))); 3265 format %{ "punpcklbw $dst,$mem\n\t" 3266 "pshuflw $dst,$dst,0x00\n\t" 3267 "punpcklqdq $dst,$dst\n\t" 3268 "vinserti128_high $dst,$dst\t! replicate32B" %} 3269 ins_encode %{ 3270 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 3271 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3272 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3273 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3274 %} 3275 ins_pipe( pipe_slow ); 3276 %} 3277 3278 instruct Repl16B_imm(vecX dst, immI con) %{ 3279 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3280 match(Set dst (ReplicateB con)); 3281 format %{ "movq $dst,[$constantaddress]\n\t" 3282 "punpcklqdq $dst,$dst\t! replicate16B($con)" %} 3283 ins_encode %{ 3284 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3285 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3286 %} 3287 ins_pipe( pipe_slow ); 3288 %} 3289 3290 instruct Repl32B_imm(vecY dst, immI con) %{ 3291 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3292 match(Set dst (ReplicateB con)); 3293 format %{ "movq $dst,[$constantaddress]\n\t" 3294 "punpcklqdq $dst,$dst\n\t" 3295 "vinserti128_high $dst,$dst\t! lreplicate32B($con)" %} 3296 ins_encode %{ 3297 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3298 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3299 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3300 %} 3301 ins_pipe( pipe_slow ); 3302 %} 3303 3304 instruct Repl4S(vecD dst, rRegI src) %{ 3305 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vlbw()); 3306 match(Set dst (ReplicateS src)); 3307 format %{ "movd $dst,$src\n\t" 3308 "pshuflw $dst,$dst,0x00\t! replicate4S" %} 3309 ins_encode %{ 3310 __ movdl($dst$$XMMRegister, $src$$Register); 3311 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3312 %} 3313 ins_pipe( pipe_slow ); 3314 %} 3315 3316 instruct Repl4S_mem(vecD dst, memory mem) %{ 3317 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3318 match(Set dst (ReplicateS (LoadS mem))); 3319 format %{ "pshuflw $dst,$mem,0x00\t! replicate4S" %} 3320 ins_encode %{ 3321 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3322 %} 3323 ins_pipe( pipe_slow ); 3324 %} 3325 3326 instruct Repl8S(vecX dst, rRegI src) %{ 3327 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vlbw()); 3328 match(Set dst (ReplicateS src)); 3329 format %{ "movd $dst,$src\n\t" 3330 "pshuflw $dst,$dst,0x00\n\t" 3331 "punpcklqdq $dst,$dst\t! replicate8S" %} 3332 ins_encode %{ 3333 __ movdl($dst$$XMMRegister, $src$$Register); 3334 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3335 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3336 %} 3337 ins_pipe( pipe_slow ); 3338 %} 3339 3340 instruct Repl8S_mem(vecX dst, memory mem) %{ 3341 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3342 match(Set dst (ReplicateS (LoadS mem))); 3343 format %{ "pshuflw $dst,$mem,0x00\n\t" 3344 "punpcklqdq $dst,$dst\t! replicate8S" %} 3345 ins_encode %{ 3346 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3347 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3348 %} 3349 ins_pipe( pipe_slow ); 3350 %} 3351 3352 instruct Repl8S_imm(vecX dst, immI con) %{ 3353 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vlbw()); 3354 match(Set dst (ReplicateS con)); 3355 format %{ "movq $dst,[$constantaddress]\n\t" 3356 "punpcklqdq $dst,$dst\t! replicate8S($con)" %} 3357 ins_encode %{ 3358 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3359 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3360 %} 3361 ins_pipe( pipe_slow ); 3362 %} 3363 3364 instruct Repl16S(vecY dst, rRegI src) %{ 3365 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3366 match(Set dst (ReplicateS src)); 3367 format %{ "movd $dst,$src\n\t" 3368 "pshuflw $dst,$dst,0x00\n\t" 3369 "punpcklqdq $dst,$dst\n\t" 3370 "vinserti128_high $dst,$dst\t! replicate16S" %} 3371 ins_encode %{ 3372 __ movdl($dst$$XMMRegister, $src$$Register); 3373 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3374 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3375 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3376 %} 3377 ins_pipe( pipe_slow ); 3378 %} 3379 3380 instruct Repl16S_mem(vecY dst, memory mem) %{ 3381 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3382 match(Set dst (ReplicateS (LoadS mem))); 3383 format %{ "pshuflw $dst,$mem,0x00\n\t" 3384 "punpcklqdq $dst,$dst\n\t" 3385 "vinserti128_high $dst,$dst\t! replicate16S" %} 3386 ins_encode %{ 3387 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3388 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3389 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3390 %} 3391 ins_pipe( pipe_slow ); 3392 %} 3393 3394 instruct Repl16S_imm(vecY dst, immI con) %{ 3395 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3396 match(Set dst (ReplicateS con)); 3397 format %{ "movq $dst,[$constantaddress]\n\t" 3398 "punpcklqdq $dst,$dst\n\t" 3399 "vinserti128_high $dst,$dst\t! replicate16S($con)" %} 3400 ins_encode %{ 3401 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3402 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3403 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3404 %} 3405 ins_pipe( pipe_slow ); 3406 %} 3407 3408 instruct Repl4I(vecX dst, rRegI src) %{ 3409 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3410 match(Set dst (ReplicateI src)); 3411 format %{ "movd $dst,$src\n\t" 3412 "pshufd $dst,$dst,0x00\t! replicate4I" %} 3413 ins_encode %{ 3414 __ movdl($dst$$XMMRegister, $src$$Register); 3415 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3416 %} 3417 ins_pipe( pipe_slow ); 3418 %} 3419 3420 instruct Repl4I_mem(vecX dst, memory mem) %{ 3421 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3422 match(Set dst (ReplicateI (LoadI mem))); 3423 format %{ "pshufd $dst,$mem,0x00\t! replicate4I" %} 3424 ins_encode %{ 3425 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3426 %} 3427 ins_pipe( pipe_slow ); 3428 %} 3429 3430 instruct Repl8I(vecY dst, rRegI src) %{ 3431 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3432 match(Set dst (ReplicateI src)); 3433 format %{ "movd $dst,$src\n\t" 3434 "pshufd $dst,$dst,0x00\n\t" 3435 "vinserti128_high $dst,$dst\t! replicate8I" %} 3436 ins_encode %{ 3437 __ movdl($dst$$XMMRegister, $src$$Register); 3438 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3439 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3440 %} 3441 ins_pipe( pipe_slow ); 3442 %} 3443 3444 instruct Repl8I_mem(vecY dst, memory mem) %{ 3445 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3446 match(Set dst (ReplicateI (LoadI mem))); 3447 format %{ "pshufd $dst,$mem,0x00\n\t" 3448 "vinserti128_high $dst,$dst\t! replicate8I" %} 3449 ins_encode %{ 3450 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3451 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3452 %} 3453 ins_pipe( pipe_slow ); 3454 %} 3455 3456 instruct Repl4I_imm(vecX dst, immI con) %{ 3457 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3458 match(Set dst (ReplicateI con)); 3459 format %{ "movq $dst,[$constantaddress]\t! replicate4I($con)\n\t" 3460 "punpcklqdq $dst,$dst" %} 3461 ins_encode %{ 3462 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3463 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3464 %} 3465 ins_pipe( pipe_slow ); 3466 %} 3467 3468 instruct Repl8I_imm(vecY dst, immI con) %{ 3469 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3470 match(Set dst (ReplicateI con)); 3471 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 3472 "punpcklqdq $dst,$dst\n\t" 3473 "vinserti128_high $dst,$dst" %} 3474 ins_encode %{ 3475 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3476 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3477 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3478 %} 3479 ins_pipe( pipe_slow ); 3480 %} 3481 3482 // Long could be loaded into xmm register directly from memory. 3483 instruct Repl2L_mem(vecX dst, memory mem) %{ 3484 predicate(n->as_Vector()->length() == 2 && !VM_Version::supports_avx512vlbw()); 3485 match(Set dst (ReplicateL (LoadL mem))); 3486 format %{ "movq $dst,$mem\n\t" 3487 "punpcklqdq $dst,$dst\t! replicate2L" %} 3488 ins_encode %{ 3489 __ movq($dst$$XMMRegister, $mem$$Address); 3490 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3491 %} 3492 ins_pipe( pipe_slow ); 3493 %} 3494 3495 // Replicate long (8 byte) scalar to be vector 3496 #ifdef _LP64 3497 instruct Repl4L(vecY dst, rRegL src) %{ 3498 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3499 match(Set dst (ReplicateL src)); 3500 format %{ "movdq $dst,$src\n\t" 3501 "punpcklqdq $dst,$dst\n\t" 3502 "vinserti128_high $dst,$dst\t! replicate4L" %} 3503 ins_encode %{ 3504 __ movdq($dst$$XMMRegister, $src$$Register); 3505 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3506 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3507 %} 3508 ins_pipe( pipe_slow ); 3509 %} 3510 #else // _LP64 3511 instruct Repl4L(vecY dst, eRegL src, regD tmp) %{ 3512 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3513 match(Set dst (ReplicateL src)); 3514 effect(TEMP dst, USE src, TEMP tmp); 3515 format %{ "movdl $dst,$src.lo\n\t" 3516 "movdl $tmp,$src.hi\n\t" 3517 "punpckldq $dst,$tmp\n\t" 3518 "punpcklqdq $dst,$dst\n\t" 3519 "vinserti128_high $dst,$dst\t! replicate4L" %} 3520 ins_encode %{ 3521 __ movdl($dst$$XMMRegister, $src$$Register); 3522 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3523 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3524 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3525 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3526 %} 3527 ins_pipe( pipe_slow ); 3528 %} 3529 #endif // _LP64 3530 3531 instruct Repl4L_imm(vecY dst, immL con) %{ 3532 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3533 match(Set dst (ReplicateL con)); 3534 format %{ "movq $dst,[$constantaddress]\n\t" 3535 "punpcklqdq $dst,$dst\n\t" 3536 "vinserti128_high $dst,$dst\t! replicate4L($con)" %} 3537 ins_encode %{ 3538 __ movq($dst$$XMMRegister, $constantaddress($con)); 3539 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3540 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3541 %} 3542 ins_pipe( pipe_slow ); 3543 %} 3544 3545 instruct Repl4L_mem(vecY dst, memory mem) %{ 3546 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3547 match(Set dst (ReplicateL (LoadL mem))); 3548 format %{ "movq $dst,$mem\n\t" 3549 "punpcklqdq $dst,$dst\n\t" 3550 "vinserti128_high $dst,$dst\t! replicate4L" %} 3551 ins_encode %{ 3552 __ movq($dst$$XMMRegister, $mem$$Address); 3553 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3554 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3555 %} 3556 ins_pipe( pipe_slow ); 3557 %} 3558 3559 instruct Repl2F_mem(vecD dst, memory mem) %{ 3560 predicate(n->as_Vector()->length() == 2 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3561 match(Set dst (ReplicateF (LoadF mem))); 3562 format %{ "pshufd $dst,$mem,0x00\t! replicate2F" %} 3563 ins_encode %{ 3564 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3565 %} 3566 ins_pipe( pipe_slow ); 3567 %} 3568 3569 instruct Repl4F_mem(vecX dst, memory mem) %{ 3570 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3571 match(Set dst (ReplicateF (LoadF mem))); 3572 format %{ "pshufd $dst,$mem,0x00\t! replicate4F" %} 3573 ins_encode %{ 3574 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3575 %} 3576 ins_pipe( pipe_slow ); 3577 %} 3578 3579 instruct Repl8F(vecY dst, regF src) %{ 3580 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3581 match(Set dst (ReplicateF src)); 3582 format %{ "pshufd $dst,$src,0x00\n\t" 3583 "vinsertf128_high $dst,$dst\t! replicate8F" %} 3584 ins_encode %{ 3585 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3586 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3587 %} 3588 ins_pipe( pipe_slow ); 3589 %} 3590 3591 instruct Repl8F_mem(vecY dst, memory mem) %{ 3592 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3593 match(Set dst (ReplicateF (LoadF mem))); 3594 format %{ "pshufd $dst,$mem,0x00\n\t" 3595 "vinsertf128_high $dst,$dst\t! replicate8F" %} 3596 ins_encode %{ 3597 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3598 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3599 %} 3600 ins_pipe( pipe_slow ); 3601 %} 3602 3603 instruct Repl2F_zero(vecD dst, immF0 zero) %{ 3604 predicate(n->as_Vector()->length() == 2 && UseAVX < 3); 3605 match(Set dst (ReplicateF zero)); 3606 format %{ "xorps $dst,$dst\t! replicate2F zero" %} 3607 ins_encode %{ 3608 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 3609 %} 3610 ins_pipe( fpu_reg_reg ); 3611 %} 3612 3613 instruct Repl4F_zero(vecX dst, immF0 zero) %{ 3614 predicate(n->as_Vector()->length() == 4 && UseAVX < 3); 3615 match(Set dst (ReplicateF zero)); 3616 format %{ "xorps $dst,$dst\t! replicate4F zero" %} 3617 ins_encode %{ 3618 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 3619 %} 3620 ins_pipe( fpu_reg_reg ); 3621 %} 3622 3623 instruct Repl8F_zero(vecY dst, immF0 zero) %{ 3624 predicate(n->as_Vector()->length() == 8 && UseAVX < 3); 3625 match(Set dst (ReplicateF zero)); 3626 format %{ "vxorps $dst,$dst,$dst\t! replicate8F zero" %} 3627 ins_encode %{ 3628 int vector_len = 1; 3629 __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3630 %} 3631 ins_pipe( fpu_reg_reg ); 3632 %} 3633 3634 instruct Repl2D_mem(vecX dst, memory mem) %{ 3635 predicate(n->as_Vector()->length() == 2 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3636 match(Set dst (ReplicateD (LoadD mem))); 3637 format %{ "pshufd $dst,$mem,0x44\t! replicate2D" %} 3638 ins_encode %{ 3639 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44); 3640 %} 3641 ins_pipe( pipe_slow ); 3642 %} 3643 3644 instruct Repl4D(vecY dst, regD src) %{ 3645 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3646 match(Set dst (ReplicateD src)); 3647 format %{ "pshufd $dst,$src,0x44\n\t" 3648 "vinsertf128_high $dst,$dst\t! replicate4D" %} 3649 ins_encode %{ 3650 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 3651 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3652 %} 3653 ins_pipe( pipe_slow ); 3654 %} 3655 3656 instruct Repl4D_mem(vecY dst, memory mem) %{ 3657 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3658 match(Set dst (ReplicateD (LoadD mem))); 3659 format %{ "pshufd $dst,$mem,0x44\n\t" 3660 "vinsertf128_high $dst,$dst\t! replicate4D" %} 3661 ins_encode %{ 3662 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44); 3663 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3664 %} 3665 ins_pipe( pipe_slow ); 3666 %} 3667 3668 // Replicate double (8 byte) scalar zero to be vector 3669 instruct Repl2D_zero(vecX dst, immD0 zero) %{ 3670 predicate(n->as_Vector()->length() == 2 && UseAVX < 3); 3671 match(Set dst (ReplicateD zero)); 3672 format %{ "xorpd $dst,$dst\t! replicate2D zero" %} 3673 ins_encode %{ 3674 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister); 3675 %} 3676 ins_pipe( fpu_reg_reg ); 3677 %} 3678 3679 instruct Repl4D_zero(vecY dst, immD0 zero) %{ 3680 predicate(n->as_Vector()->length() == 4 && UseAVX < 3); 3681 match(Set dst (ReplicateD zero)); 3682 format %{ "vxorpd $dst,$dst,$dst,vect256\t! replicate4D zero" %} 3683 ins_encode %{ 3684 int vector_len = 1; 3685 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3686 %} 3687 ins_pipe( fpu_reg_reg ); 3688 %} 3689 3690 // ====================GENERIC REPLICATE========================================== 3691 3692 // Replicate byte scalar to be vector 3693 instruct Repl4B(vecS dst, rRegI src) %{ 3694 predicate(n->as_Vector()->length() == 4); 3695 match(Set dst (ReplicateB src)); 3696 format %{ "movd $dst,$src\n\t" 3697 "punpcklbw $dst,$dst\n\t" 3698 "pshuflw $dst,$dst,0x00\t! replicate4B" %} 3699 ins_encode %{ 3700 __ movdl($dst$$XMMRegister, $src$$Register); 3701 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3702 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3703 %} 3704 ins_pipe( pipe_slow ); 3705 %} 3706 3707 instruct Repl8B(vecD dst, rRegI src) %{ 3708 predicate(n->as_Vector()->length() == 8); 3709 match(Set dst (ReplicateB src)); 3710 format %{ "movd $dst,$src\n\t" 3711 "punpcklbw $dst,$dst\n\t" 3712 "pshuflw $dst,$dst,0x00\t! replicate8B" %} 3713 ins_encode %{ 3714 __ movdl($dst$$XMMRegister, $src$$Register); 3715 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3716 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3717 %} 3718 ins_pipe( pipe_slow ); 3719 %} 3720 3721 // Replicate byte scalar immediate to be vector by loading from const table. 3722 instruct Repl4B_imm(vecS dst, immI con) %{ 3723 predicate(n->as_Vector()->length() == 4); 3724 match(Set dst (ReplicateB con)); 3725 format %{ "movdl $dst,[$constantaddress]\t! replicate4B($con)" %} 3726 ins_encode %{ 3727 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 1))); 3728 %} 3729 ins_pipe( pipe_slow ); 3730 %} 3731 3732 instruct Repl8B_imm(vecD dst, immI con) %{ 3733 predicate(n->as_Vector()->length() == 8); 3734 match(Set dst (ReplicateB con)); 3735 format %{ "movq $dst,[$constantaddress]\t! replicate8B($con)" %} 3736 ins_encode %{ 3737 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3738 %} 3739 ins_pipe( pipe_slow ); 3740 %} 3741 3742 // Replicate byte scalar zero to be vector 3743 instruct Repl4B_zero(vecS dst, immI0 zero) %{ 3744 predicate(n->as_Vector()->length() == 4); 3745 match(Set dst (ReplicateB zero)); 3746 format %{ "pxor $dst,$dst\t! replicate4B zero" %} 3747 ins_encode %{ 3748 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3749 %} 3750 ins_pipe( fpu_reg_reg ); 3751 %} 3752 3753 instruct Repl8B_zero(vecD dst, immI0 zero) %{ 3754 predicate(n->as_Vector()->length() == 8); 3755 match(Set dst (ReplicateB zero)); 3756 format %{ "pxor $dst,$dst\t! replicate8B zero" %} 3757 ins_encode %{ 3758 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3759 %} 3760 ins_pipe( fpu_reg_reg ); 3761 %} 3762 3763 instruct Repl16B_zero(vecX dst, immI0 zero) %{ 3764 predicate(n->as_Vector()->length() == 16); 3765 match(Set dst (ReplicateB zero)); 3766 format %{ "pxor $dst,$dst\t! replicate16B zero" %} 3767 ins_encode %{ 3768 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3769 %} 3770 ins_pipe( fpu_reg_reg ); 3771 %} 3772 3773 instruct Repl32B_zero(vecY dst, immI0 zero) %{ 3774 predicate(n->as_Vector()->length() == 32); 3775 match(Set dst (ReplicateB zero)); 3776 format %{ "vpxor $dst,$dst,$dst\t! replicate32B zero" %} 3777 ins_encode %{ 3778 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3779 int vector_len = 1; 3780 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3781 %} 3782 ins_pipe( fpu_reg_reg ); 3783 %} 3784 3785 // Replicate char/short (2 byte) scalar to be vector 3786 instruct Repl2S(vecS dst, rRegI src) %{ 3787 predicate(n->as_Vector()->length() == 2); 3788 match(Set dst (ReplicateS src)); 3789 format %{ "movd $dst,$src\n\t" 3790 "pshuflw $dst,$dst,0x00\t! replicate2S" %} 3791 ins_encode %{ 3792 __ movdl($dst$$XMMRegister, $src$$Register); 3793 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3794 %} 3795 ins_pipe( fpu_reg_reg ); 3796 %} 3797 3798 // Replicate char/short (2 byte) scalar immediate to be vector by loading from const table. 3799 instruct Repl2S_imm(vecS dst, immI con) %{ 3800 predicate(n->as_Vector()->length() == 2); 3801 match(Set dst (ReplicateS con)); 3802 format %{ "movdl $dst,[$constantaddress]\t! replicate2S($con)" %} 3803 ins_encode %{ 3804 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 2))); 3805 %} 3806 ins_pipe( fpu_reg_reg ); 3807 %} 3808 3809 instruct Repl4S_imm(vecD dst, immI con) %{ 3810 predicate(n->as_Vector()->length() == 4); 3811 match(Set dst (ReplicateS con)); 3812 format %{ "movq $dst,[$constantaddress]\t! replicate4S($con)" %} 3813 ins_encode %{ 3814 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3815 %} 3816 ins_pipe( fpu_reg_reg ); 3817 %} 3818 3819 // Replicate char/short (2 byte) scalar zero to be vector 3820 instruct Repl2S_zero(vecS dst, immI0 zero) %{ 3821 predicate(n->as_Vector()->length() == 2); 3822 match(Set dst (ReplicateS zero)); 3823 format %{ "pxor $dst,$dst\t! replicate2S zero" %} 3824 ins_encode %{ 3825 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3826 %} 3827 ins_pipe( fpu_reg_reg ); 3828 %} 3829 3830 instruct Repl4S_zero(vecD dst, immI0 zero) %{ 3831 predicate(n->as_Vector()->length() == 4); 3832 match(Set dst (ReplicateS zero)); 3833 format %{ "pxor $dst,$dst\t! replicate4S zero" %} 3834 ins_encode %{ 3835 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3836 %} 3837 ins_pipe( fpu_reg_reg ); 3838 %} 3839 3840 instruct Repl8S_zero(vecX dst, immI0 zero) %{ 3841 predicate(n->as_Vector()->length() == 8); 3842 match(Set dst (ReplicateS zero)); 3843 format %{ "pxor $dst,$dst\t! replicate8S zero" %} 3844 ins_encode %{ 3845 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3846 %} 3847 ins_pipe( fpu_reg_reg ); 3848 %} 3849 3850 instruct Repl16S_zero(vecY dst, immI0 zero) %{ 3851 predicate(n->as_Vector()->length() == 16); 3852 match(Set dst (ReplicateS zero)); 3853 format %{ "vpxor $dst,$dst,$dst\t! replicate16S zero" %} 3854 ins_encode %{ 3855 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3856 int vector_len = 1; 3857 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3858 %} 3859 ins_pipe( fpu_reg_reg ); 3860 %} 3861 3862 // Replicate integer (4 byte) scalar to be vector 3863 instruct Repl2I(vecD dst, rRegI src) %{ 3864 predicate(n->as_Vector()->length() == 2); 3865 match(Set dst (ReplicateI src)); 3866 format %{ "movd $dst,$src\n\t" 3867 "pshufd $dst,$dst,0x00\t! replicate2I" %} 3868 ins_encode %{ 3869 __ movdl($dst$$XMMRegister, $src$$Register); 3870 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3871 %} 3872 ins_pipe( fpu_reg_reg ); 3873 %} 3874 3875 // Integer could be loaded into xmm register directly from memory. 3876 instruct Repl2I_mem(vecD dst, memory mem) %{ 3877 predicate(n->as_Vector()->length() == 2); 3878 match(Set dst (ReplicateI (LoadI mem))); 3879 format %{ "movd $dst,$mem\n\t" 3880 "pshufd $dst,$dst,0x00\t! replicate2I" %} 3881 ins_encode %{ 3882 __ movdl($dst$$XMMRegister, $mem$$Address); 3883 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3884 %} 3885 ins_pipe( fpu_reg_reg ); 3886 %} 3887 3888 // Replicate integer (4 byte) scalar immediate to be vector by loading from const table. 3889 instruct Repl2I_imm(vecD dst, immI con) %{ 3890 predicate(n->as_Vector()->length() == 2); 3891 match(Set dst (ReplicateI con)); 3892 format %{ "movq $dst,[$constantaddress]\t! replicate2I($con)" %} 3893 ins_encode %{ 3894 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3895 %} 3896 ins_pipe( fpu_reg_reg ); 3897 %} 3898 3899 // Replicate integer (4 byte) scalar zero to be vector 3900 instruct Repl2I_zero(vecD dst, immI0 zero) %{ 3901 predicate(n->as_Vector()->length() == 2); 3902 match(Set dst (ReplicateI zero)); 3903 format %{ "pxor $dst,$dst\t! replicate2I" %} 3904 ins_encode %{ 3905 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3906 %} 3907 ins_pipe( fpu_reg_reg ); 3908 %} 3909 3910 instruct Repl4I_zero(vecX dst, immI0 zero) %{ 3911 predicate(n->as_Vector()->length() == 4); 3912 match(Set dst (ReplicateI zero)); 3913 format %{ "pxor $dst,$dst\t! replicate4I zero)" %} 3914 ins_encode %{ 3915 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3916 %} 3917 ins_pipe( fpu_reg_reg ); 3918 %} 3919 3920 instruct Repl8I_zero(vecY dst, immI0 zero) %{ 3921 predicate(n->as_Vector()->length() == 8); 3922 match(Set dst (ReplicateI zero)); 3923 format %{ "vpxor $dst,$dst,$dst\t! replicate8I zero" %} 3924 ins_encode %{ 3925 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3926 int vector_len = 1; 3927 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3928 %} 3929 ins_pipe( fpu_reg_reg ); 3930 %} 3931 3932 // Replicate long (8 byte) scalar to be vector 3933 #ifdef _LP64 3934 instruct Repl2L(vecX dst, rRegL src) %{ 3935 predicate(n->as_Vector()->length() == 2); 3936 match(Set dst (ReplicateL src)); 3937 format %{ "movdq $dst,$src\n\t" 3938 "punpcklqdq $dst,$dst\t! replicate2L" %} 3939 ins_encode %{ 3940 __ movdq($dst$$XMMRegister, $src$$Register); 3941 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3942 %} 3943 ins_pipe( pipe_slow ); 3944 %} 3945 #else // _LP64 3946 instruct Repl2L(vecX dst, eRegL src, regD tmp) %{ 3947 predicate(n->as_Vector()->length() == 2); 3948 match(Set dst (ReplicateL src)); 3949 effect(TEMP dst, USE src, TEMP tmp); 3950 format %{ "movdl $dst,$src.lo\n\t" 3951 "movdl $tmp,$src.hi\n\t" 3952 "punpckldq $dst,$tmp\n\t" 3953 "punpcklqdq $dst,$dst\t! replicate2L"%} 3954 ins_encode %{ 3955 __ movdl($dst$$XMMRegister, $src$$Register); 3956 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3957 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3958 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3959 %} 3960 ins_pipe( pipe_slow ); 3961 %} 3962 #endif // _LP64 3963 3964 // Replicate long (8 byte) scalar immediate to be vector by loading from const table. 3965 instruct Repl2L_imm(vecX dst, immL con) %{ 3966 predicate(n->as_Vector()->length() == 2); 3967 match(Set dst (ReplicateL con)); 3968 format %{ "movq $dst,[$constantaddress]\n\t" 3969 "punpcklqdq $dst,$dst\t! replicate2L($con)" %} 3970 ins_encode %{ 3971 __ movq($dst$$XMMRegister, $constantaddress($con)); 3972 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3973 %} 3974 ins_pipe( pipe_slow ); 3975 %} 3976 3977 // Replicate long (8 byte) scalar zero to be vector 3978 instruct Repl2L_zero(vecX dst, immL0 zero) %{ 3979 predicate(n->as_Vector()->length() == 2); 3980 match(Set dst (ReplicateL zero)); 3981 format %{ "pxor $dst,$dst\t! replicate2L zero" %} 3982 ins_encode %{ 3983 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3984 %} 3985 ins_pipe( fpu_reg_reg ); 3986 %} 3987 3988 instruct Repl4L_zero(vecY dst, immL0 zero) %{ 3989 predicate(n->as_Vector()->length() == 4); 3990 match(Set dst (ReplicateL zero)); 3991 format %{ "vpxor $dst,$dst,$dst\t! replicate4L zero" %} 3992 ins_encode %{ 3993 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3994 int vector_len = 1; 3995 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3996 %} 3997 ins_pipe( fpu_reg_reg ); 3998 %} 3999 4000 // Replicate float (4 byte) scalar to be vector 4001 instruct Repl2F(vecD dst, regF src) %{ 4002 predicate(n->as_Vector()->length() == 2); 4003 match(Set dst (ReplicateF src)); 4004 format %{ "pshufd $dst,$dst,0x00\t! replicate2F" %} 4005 ins_encode %{ 4006 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 4007 %} 4008 ins_pipe( fpu_reg_reg ); 4009 %} 4010 4011 instruct Repl4F(vecX dst, regF src) %{ 4012 predicate(n->as_Vector()->length() == 4); 4013 match(Set dst (ReplicateF src)); 4014 format %{ "pshufd $dst,$dst,0x00\t! replicate4F" %} 4015 ins_encode %{ 4016 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 4017 %} 4018 ins_pipe( pipe_slow ); 4019 %} 4020 4021 // Replicate double (8 bytes) scalar to be vector 4022 instruct Repl2D(vecX dst, regD src) %{ 4023 predicate(n->as_Vector()->length() == 2); 4024 match(Set dst (ReplicateD src)); 4025 format %{ "pshufd $dst,$src,0x44\t! replicate2D" %} 4026 ins_encode %{ 4027 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 4028 %} 4029 ins_pipe( pipe_slow ); 4030 %} 4031 4032 // ====================EVEX REPLICATE============================================= 4033 4034 instruct Repl4B_mem_evex(vecS dst, memory mem) %{ 4035 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vlbw()); 4036 match(Set dst (ReplicateB (LoadB mem))); 4037 format %{ "vpbroadcastb $dst,$mem\t! replicate4B" %} 4038 ins_encode %{ 4039 int vector_len = 0; 4040 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 4041 %} 4042 ins_pipe( pipe_slow ); 4043 %} 4044 4045 instruct Repl8B_mem_evex(vecD dst, memory mem) %{ 4046 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); 4047 match(Set dst (ReplicateB (LoadB mem))); 4048 format %{ "vpbroadcastb $dst,$mem\t! replicate8B" %} 4049 ins_encode %{ 4050 int vector_len = 0; 4051 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 4052 %} 4053 ins_pipe( pipe_slow ); 4054 %} 4055 4056 instruct Repl16B_evex(vecX dst, rRegI src) %{ 4057 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 4058 match(Set dst (ReplicateB src)); 4059 format %{ "vpbroadcastb $dst,$src\t! replicate16B" %} 4060 ins_encode %{ 4061 int vector_len = 0; 4062 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 4063 %} 4064 ins_pipe( pipe_slow ); 4065 %} 4066 4067 instruct Repl16B_mem_evex(vecX dst, memory mem) %{ 4068 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 4069 match(Set dst (ReplicateB (LoadB mem))); 4070 format %{ "vpbroadcastb $dst,$mem\t! replicate16B" %} 4071 ins_encode %{ 4072 int vector_len = 0; 4073 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 4074 %} 4075 ins_pipe( pipe_slow ); 4076 %} 4077 4078 instruct Repl32B_evex(vecY dst, rRegI src) %{ 4079 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw()); 4080 match(Set dst (ReplicateB src)); 4081 format %{ "vpbroadcastb $dst,$src\t! replicate32B" %} 4082 ins_encode %{ 4083 int vector_len = 1; 4084 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 4085 %} 4086 ins_pipe( pipe_slow ); 4087 %} 4088 4089 instruct Repl32B_mem_evex(vecY dst, memory mem) %{ 4090 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw()); 4091 match(Set dst (ReplicateB (LoadB mem))); 4092 format %{ "vpbroadcastb $dst,$mem\t! replicate32B" %} 4093 ins_encode %{ 4094 int vector_len = 1; 4095 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 4096 %} 4097 ins_pipe( pipe_slow ); 4098 %} 4099 4100 instruct Repl64B_evex(vecZ dst, rRegI src) %{ 4101 predicate(n->as_Vector()->length() == 64 && VM_Version::supports_avx512bw()); 4102 match(Set dst (ReplicateB src)); 4103 format %{ "vpbroadcastb $dst,$src\t! upper replicate64B" %} 4104 ins_encode %{ 4105 int vector_len = 2; 4106 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 4107 %} 4108 ins_pipe( pipe_slow ); 4109 %} 4110 4111 instruct Repl64B_mem_evex(vecZ dst, memory mem) %{ 4112 predicate(n->as_Vector()->length() == 64 && VM_Version::supports_avx512bw()); 4113 match(Set dst (ReplicateB (LoadB mem))); 4114 format %{ "vpbroadcastb $dst,$mem\t! replicate64B" %} 4115 ins_encode %{ 4116 int vector_len = 2; 4117 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 4118 %} 4119 ins_pipe( pipe_slow ); 4120 %} 4121 4122 instruct Repl16B_imm_evex(vecX dst, immI con) %{ 4123 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 4124 match(Set dst (ReplicateB con)); 4125 format %{ "movq $dst,[$constantaddress]\n\t" 4126 "vpbroadcastb $dst,$dst\t! replicate16B" %} 4127 ins_encode %{ 4128 int vector_len = 0; 4129 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 4130 __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4131 %} 4132 ins_pipe( pipe_slow ); 4133 %} 4134 4135 instruct Repl32B_imm_evex(vecY dst, immI con) %{ 4136 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw()); 4137 match(Set dst (ReplicateB con)); 4138 format %{ "movq $dst,[$constantaddress]\n\t" 4139 "vpbroadcastb $dst,$dst\t! replicate32B" %} 4140 ins_encode %{ 4141 int vector_len = 1; 4142 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 4143 __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4144 %} 4145 ins_pipe( pipe_slow ); 4146 %} 4147 4148 instruct Repl64B_imm_evex(vecZ dst, immI con) %{ 4149 predicate(n->as_Vector()->length() == 64 && VM_Version::supports_avx512bw()); 4150 match(Set dst (ReplicateB con)); 4151 format %{ "movq $dst,[$constantaddress]\n\t" 4152 "vpbroadcastb $dst,$dst\t! upper replicate64B" %} 4153 ins_encode %{ 4154 int vector_len = 2; 4155 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 4156 __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4157 %} 4158 ins_pipe( pipe_slow ); 4159 %} 4160 4161 instruct Repl64B_zero_evex(vecZ dst, immI0 zero) %{ 4162 predicate(n->as_Vector()->length() == 64 && UseAVX > 2); 4163 match(Set dst (ReplicateB zero)); 4164 format %{ "vpxor $dst k0,$dst,$dst\t! replicate64B zero" %} 4165 ins_encode %{ 4166 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 4167 int vector_len = 2; 4168 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4169 %} 4170 ins_pipe( fpu_reg_reg ); 4171 %} 4172 4173 instruct Repl4S_evex(vecD dst, rRegI src) %{ 4174 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vlbw()); 4175 match(Set dst (ReplicateS src)); 4176 format %{ "vpbroadcastw $dst,$src\t! replicate4S" %} 4177 ins_encode %{ 4178 int vector_len = 0; 4179 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 4180 %} 4181 ins_pipe( pipe_slow ); 4182 %} 4183 4184 instruct Repl4S_mem_evex(vecD dst, memory mem) %{ 4185 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vlbw()); 4186 match(Set dst (ReplicateS (LoadS mem))); 4187 format %{ "vpbroadcastw $dst,$mem\t! replicate4S" %} 4188 ins_encode %{ 4189 int vector_len = 0; 4190 __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 4191 %} 4192 ins_pipe( pipe_slow ); 4193 %} 4194 4195 instruct Repl8S_evex(vecX dst, rRegI src) %{ 4196 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); 4197 match(Set dst (ReplicateS src)); 4198 format %{ "vpbroadcastw $dst,$src\t! replicate8S" %} 4199 ins_encode %{ 4200 int vector_len = 0; 4201 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 4202 %} 4203 ins_pipe( pipe_slow ); 4204 %} 4205 4206 instruct Repl8S_mem_evex(vecX dst, memory mem) %{ 4207 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); 4208 match(Set dst (ReplicateS (LoadS mem))); 4209 format %{ "vpbroadcastw $dst,$mem\t! replicate8S" %} 4210 ins_encode %{ 4211 int vector_len = 0; 4212 __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 4213 %} 4214 ins_pipe( pipe_slow ); 4215 %} 4216 4217 instruct Repl16S_evex(vecY dst, rRegI src) %{ 4218 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 4219 match(Set dst (ReplicateS src)); 4220 format %{ "vpbroadcastw $dst,$src\t! replicate16S" %} 4221 ins_encode %{ 4222 int vector_len = 1; 4223 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 4224 %} 4225 ins_pipe( pipe_slow ); 4226 %} 4227 4228 instruct Repl16S_mem_evex(vecY dst, memory mem) %{ 4229 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 4230 match(Set dst (ReplicateS (LoadS mem))); 4231 format %{ "vpbroadcastw $dst,$mem\t! replicate16S" %} 4232 ins_encode %{ 4233 int vector_len = 1; 4234 __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 4235 %} 4236 ins_pipe( pipe_slow ); 4237 %} 4238 4239 instruct Repl32S_evex(vecZ dst, rRegI src) %{ 4240 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512bw()); 4241 match(Set dst (ReplicateS src)); 4242 format %{ "vpbroadcastw $dst,$src\t! replicate32S" %} 4243 ins_encode %{ 4244 int vector_len = 2; 4245 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 4246 %} 4247 ins_pipe( pipe_slow ); 4248 %} 4249 4250 instruct Repl32S_mem_evex(vecZ dst, memory mem) %{ 4251 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512bw()); 4252 match(Set dst (ReplicateS (LoadS mem))); 4253 format %{ "vpbroadcastw $dst,$mem\t! replicate32S" %} 4254 ins_encode %{ 4255 int vector_len = 2; 4256 __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 4257 %} 4258 ins_pipe( pipe_slow ); 4259 %} 4260 4261 instruct Repl8S_imm_evex(vecX dst, immI con) %{ 4262 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); 4263 match(Set dst (ReplicateS con)); 4264 format %{ "movq $dst,[$constantaddress]\n\t" 4265 "vpbroadcastw $dst,$dst\t! replicate8S" %} 4266 ins_encode %{ 4267 int vector_len = 0; 4268 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 4269 __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4270 %} 4271 ins_pipe( pipe_slow ); 4272 %} 4273 4274 instruct Repl16S_imm_evex(vecY dst, immI con) %{ 4275 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 4276 match(Set dst (ReplicateS con)); 4277 format %{ "movq $dst,[$constantaddress]\n\t" 4278 "vpbroadcastw $dst,$dst\t! replicate16S" %} 4279 ins_encode %{ 4280 int vector_len = 1; 4281 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 4282 __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4283 %} 4284 ins_pipe( pipe_slow ); 4285 %} 4286 4287 instruct Repl32S_imm_evex(vecZ dst, immI con) %{ 4288 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512bw()); 4289 match(Set dst (ReplicateS con)); 4290 format %{ "movq $dst,[$constantaddress]\n\t" 4291 "vpbroadcastw $dst,$dst\t! replicate32S" %} 4292 ins_encode %{ 4293 int vector_len = 2; 4294 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 4295 __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4296 %} 4297 ins_pipe( pipe_slow ); 4298 %} 4299 4300 instruct Repl32S_zero_evex(vecZ dst, immI0 zero) %{ 4301 predicate(n->as_Vector()->length() == 32 && UseAVX > 2); 4302 match(Set dst (ReplicateS zero)); 4303 format %{ "vpxor $dst k0,$dst,$dst\t! replicate32S zero" %} 4304 ins_encode %{ 4305 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 4306 int vector_len = 2; 4307 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4308 %} 4309 ins_pipe( fpu_reg_reg ); 4310 %} 4311 4312 instruct Repl4I_evex(vecX dst, rRegI src) %{ 4313 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4314 match(Set dst (ReplicateI src)); 4315 format %{ "vpbroadcastd $dst,$src\t! replicate4I" %} 4316 ins_encode %{ 4317 int vector_len = 0; 4318 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 4319 %} 4320 ins_pipe( pipe_slow ); 4321 %} 4322 4323 instruct Repl4I_mem_evex(vecX dst, memory mem) %{ 4324 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4325 match(Set dst (ReplicateI (LoadI mem))); 4326 format %{ "vpbroadcastd $dst,$mem\t! replicate4I" %} 4327 ins_encode %{ 4328 int vector_len = 0; 4329 __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 4330 %} 4331 ins_pipe( pipe_slow ); 4332 %} 4333 4334 instruct Repl8I_evex(vecY dst, rRegI src) %{ 4335 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4336 match(Set dst (ReplicateI src)); 4337 format %{ "vpbroadcastd $dst,$src\t! replicate8I" %} 4338 ins_encode %{ 4339 int vector_len = 1; 4340 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 4341 %} 4342 ins_pipe( pipe_slow ); 4343 %} 4344 4345 instruct Repl8I_mem_evex(vecY dst, memory mem) %{ 4346 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4347 match(Set dst (ReplicateI (LoadI mem))); 4348 format %{ "vpbroadcastd $dst,$mem\t! replicate8I" %} 4349 ins_encode %{ 4350 int vector_len = 1; 4351 __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 4352 %} 4353 ins_pipe( pipe_slow ); 4354 %} 4355 4356 instruct Repl16I_evex(vecZ dst, rRegI src) %{ 4357 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4358 match(Set dst (ReplicateI src)); 4359 format %{ "vpbroadcastd $dst,$src\t! replicate16I" %} 4360 ins_encode %{ 4361 int vector_len = 2; 4362 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 4363 %} 4364 ins_pipe( pipe_slow ); 4365 %} 4366 4367 instruct Repl16I_mem_evex(vecZ dst, memory mem) %{ 4368 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4369 match(Set dst (ReplicateI (LoadI mem))); 4370 format %{ "vpbroadcastd $dst,$mem\t! replicate16I" %} 4371 ins_encode %{ 4372 int vector_len = 2; 4373 __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 4374 %} 4375 ins_pipe( pipe_slow ); 4376 %} 4377 4378 instruct Repl4I_imm_evex(vecX dst, immI con) %{ 4379 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4380 match(Set dst (ReplicateI con)); 4381 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 4382 "vpbroadcastd $dst,$dst\t! replicate4I" %} 4383 ins_encode %{ 4384 int vector_len = 0; 4385 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4386 __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4387 %} 4388 ins_pipe( pipe_slow ); 4389 %} 4390 4391 instruct Repl8I_imm_evex(vecY dst, immI con) %{ 4392 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4393 match(Set dst (ReplicateI con)); 4394 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 4395 "vpbroadcastd $dst,$dst\t! replicate8I" %} 4396 ins_encode %{ 4397 int vector_len = 1; 4398 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4399 __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4400 %} 4401 ins_pipe( pipe_slow ); 4402 %} 4403 4404 instruct Repl16I_imm_evex(vecZ dst, immI con) %{ 4405 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4406 match(Set dst (ReplicateI con)); 4407 format %{ "movq $dst,[$constantaddress]\t! replicate16I($con)\n\t" 4408 "vpbroadcastd $dst,$dst\t! replicate16I" %} 4409 ins_encode %{ 4410 int vector_len = 2; 4411 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4412 __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4413 %} 4414 ins_pipe( pipe_slow ); 4415 %} 4416 4417 instruct Repl16I_zero_evex(vecZ dst, immI0 zero) %{ 4418 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4419 match(Set dst (ReplicateI zero)); 4420 format %{ "vpxor $dst k0,$dst,$dst\t! replicate16I zero" %} 4421 ins_encode %{ 4422 // Use vxorpd since AVX does not have vpxor for 512-bit (AVX2 will have it). 4423 int vector_len = 2; 4424 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4425 %} 4426 ins_pipe( fpu_reg_reg ); 4427 %} 4428 4429 // Replicate long (8 byte) scalar to be vector 4430 #ifdef _LP64 4431 instruct Repl4L_evex(vecY dst, rRegL src) %{ 4432 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4433 match(Set dst (ReplicateL src)); 4434 format %{ "vpbroadcastq $dst,$src\t! replicate4L" %} 4435 ins_encode %{ 4436 int vector_len = 1; 4437 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vector_len); 4438 %} 4439 ins_pipe( pipe_slow ); 4440 %} 4441 4442 instruct Repl8L_evex(vecZ dst, rRegL src) %{ 4443 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4444 match(Set dst (ReplicateL src)); 4445 format %{ "vpbroadcastq $dst,$src\t! replicate8L" %} 4446 ins_encode %{ 4447 int vector_len = 2; 4448 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vector_len); 4449 %} 4450 ins_pipe( pipe_slow ); 4451 %} 4452 #else // _LP64 4453 instruct Repl4L_evex(vecY dst, eRegL src, regD tmp) %{ 4454 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4455 match(Set dst (ReplicateL src)); 4456 effect(TEMP dst, USE src, TEMP tmp); 4457 format %{ "movdl $dst,$src.lo\n\t" 4458 "movdl $tmp,$src.hi\n\t" 4459 "punpckldq $dst,$tmp\n\t" 4460 "vpbroadcastq $dst,$dst\t! replicate4L" %} 4461 ins_encode %{ 4462 int vector_len = 1; 4463 __ movdl($dst$$XMMRegister, $src$$Register); 4464 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4465 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4466 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4467 %} 4468 ins_pipe( pipe_slow ); 4469 %} 4470 4471 instruct Repl8L_evex(vecZ dst, eRegL src, regD tmp) %{ 4472 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4473 match(Set dst (ReplicateL src)); 4474 effect(TEMP dst, USE src, TEMP tmp); 4475 format %{ "movdl $dst,$src.lo\n\t" 4476 "movdl $tmp,$src.hi\n\t" 4477 "punpckldq $dst,$tmp\n\t" 4478 "vpbroadcastq $dst,$dst\t! replicate8L" %} 4479 ins_encode %{ 4480 int vector_len = 2; 4481 __ movdl($dst$$XMMRegister, $src$$Register); 4482 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4483 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4484 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4485 %} 4486 ins_pipe( pipe_slow ); 4487 %} 4488 #endif // _LP64 4489 4490 instruct Repl4L_imm_evex(vecY dst, immL con) %{ 4491 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4492 match(Set dst (ReplicateL con)); 4493 format %{ "movq $dst,[$constantaddress]\n\t" 4494 "vpbroadcastq $dst,$dst\t! replicate4L" %} 4495 ins_encode %{ 4496 int vector_len = 1; 4497 __ movq($dst$$XMMRegister, $constantaddress($con)); 4498 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4499 %} 4500 ins_pipe( pipe_slow ); 4501 %} 4502 4503 instruct Repl8L_imm_evex(vecZ dst, immL con) %{ 4504 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4505 match(Set dst (ReplicateL con)); 4506 format %{ "movq $dst,[$constantaddress]\n\t" 4507 "vpbroadcastq $dst,$dst\t! replicate8L" %} 4508 ins_encode %{ 4509 int vector_len = 2; 4510 __ movq($dst$$XMMRegister, $constantaddress($con)); 4511 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4512 %} 4513 ins_pipe( pipe_slow ); 4514 %} 4515 4516 instruct Repl2L_mem_evex(vecX dst, memory mem) %{ 4517 predicate(n->as_Vector()->length() == 2 && VM_Version::supports_avx512vl()); 4518 match(Set dst (ReplicateL (LoadL mem))); 4519 format %{ "vpbroadcastd $dst,$mem\t! replicate2L" %} 4520 ins_encode %{ 4521 int vector_len = 0; 4522 __ evpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4523 %} 4524 ins_pipe( pipe_slow ); 4525 %} 4526 4527 instruct Repl4L_mem_evex(vecY dst, memory mem) %{ 4528 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4529 match(Set dst (ReplicateL (LoadL mem))); 4530 format %{ "vpbroadcastd $dst,$mem\t! replicate4L" %} 4531 ins_encode %{ 4532 int vector_len = 1; 4533 __ evpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4534 %} 4535 ins_pipe( pipe_slow ); 4536 %} 4537 4538 instruct Repl8L_mem_evex(vecZ dst, memory mem) %{ 4539 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4540 match(Set dst (ReplicateL (LoadL mem))); 4541 format %{ "vpbroadcastd $dst,$mem\t! replicate8L" %} 4542 ins_encode %{ 4543 int vector_len = 2; 4544 __ evpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4545 %} 4546 ins_pipe( pipe_slow ); 4547 %} 4548 4549 instruct Repl8L_zero_evex(vecZ dst, immL0 zero) %{ 4550 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4551 match(Set dst (ReplicateL zero)); 4552 format %{ "vpxor $dst k0,$dst,$dst\t! replicate8L zero" %} 4553 ins_encode %{ 4554 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 4555 int vector_len = 2; 4556 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4557 %} 4558 ins_pipe( fpu_reg_reg ); 4559 %} 4560 4561 instruct Repl8F_evex(vecY dst, regF src) %{ 4562 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4563 match(Set dst (ReplicateF src)); 4564 format %{ "vbroadcastss $dst,$src\t! replicate8F" %} 4565 ins_encode %{ 4566 int vector_len = 1; 4567 __ evpbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4568 %} 4569 ins_pipe( pipe_slow ); 4570 %} 4571 4572 instruct Repl8F_mem_evex(vecY dst, memory mem) %{ 4573 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4574 match(Set dst (ReplicateF (LoadF mem))); 4575 format %{ "vbroadcastss $dst,$mem\t! replicate8F" %} 4576 ins_encode %{ 4577 int vector_len = 1; 4578 __ evpbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len); 4579 %} 4580 ins_pipe( pipe_slow ); 4581 %} 4582 4583 instruct Repl16F_evex(vecZ dst, regF src) %{ 4584 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4585 match(Set dst (ReplicateF src)); 4586 format %{ "vbroadcastss $dst,$src\t! replicate16F" %} 4587 ins_encode %{ 4588 int vector_len = 2; 4589 __ evpbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4590 %} 4591 ins_pipe( pipe_slow ); 4592 %} 4593 4594 instruct Repl16F_mem_evex(vecZ dst, memory mem) %{ 4595 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4596 match(Set dst (ReplicateF (LoadF mem))); 4597 format %{ "vbroadcastss $dst,$mem\t! replicate16F" %} 4598 ins_encode %{ 4599 int vector_len = 2; 4600 __ evpbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len); 4601 %} 4602 ins_pipe( pipe_slow ); 4603 %} 4604 4605 instruct Repl2F_zero_evex(vecD dst, immF0 zero) %{ 4606 predicate(n->as_Vector()->length() == 2 && UseAVX > 2); 4607 match(Set dst (ReplicateF zero)); 4608 format %{ "vpxor $dst k0,$dst,$dst\t! replicate2F zero" %} 4609 ins_encode %{ 4610 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4611 int vector_len = 2; 4612 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4613 %} 4614 ins_pipe( fpu_reg_reg ); 4615 %} 4616 4617 instruct Repl4F_zero_evex(vecX dst, immF0 zero) %{ 4618 predicate(n->as_Vector()->length() == 4 && UseAVX > 2); 4619 match(Set dst (ReplicateF zero)); 4620 format %{ "vpxor $dst k0,$dst,$dst\t! replicate4F zero" %} 4621 ins_encode %{ 4622 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4623 int vector_len = 2; 4624 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4625 %} 4626 ins_pipe( fpu_reg_reg ); 4627 %} 4628 4629 instruct Repl8F_zero_evex(vecY dst, immF0 zero) %{ 4630 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4631 match(Set dst (ReplicateF zero)); 4632 format %{ "vpxor $dst k0,$dst,$dst\t! replicate8F zero" %} 4633 ins_encode %{ 4634 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4635 int vector_len = 2; 4636 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4637 %} 4638 ins_pipe( fpu_reg_reg ); 4639 %} 4640 4641 instruct Repl16F_zero_evex(vecZ dst, immF0 zero) %{ 4642 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4643 match(Set dst (ReplicateF zero)); 4644 format %{ "vpxor $dst k0,$dst,$dst\t! replicate16F zero" %} 4645 ins_encode %{ 4646 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4647 int vector_len = 2; 4648 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4649 %} 4650 ins_pipe( fpu_reg_reg ); 4651 %} 4652 4653 instruct Repl4D_evex(vecY dst, regD src) %{ 4654 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4655 match(Set dst (ReplicateD src)); 4656 format %{ "vbroadcastsd $dst,$src\t! replicate4D" %} 4657 ins_encode %{ 4658 int vector_len = 1; 4659 __ evpbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4660 %} 4661 ins_pipe( pipe_slow ); 4662 %} 4663 4664 instruct Repl4D_mem_evex(vecY dst, memory mem) %{ 4665 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4666 match(Set dst (ReplicateD (LoadD mem))); 4667 format %{ "vbroadcastsd $dst,$mem\t! replicate4D" %} 4668 ins_encode %{ 4669 int vector_len = 1; 4670 __ evpbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len); 4671 %} 4672 ins_pipe( pipe_slow ); 4673 %} 4674 4675 instruct Repl8D_evex(vecZ dst, regD src) %{ 4676 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4677 match(Set dst (ReplicateD src)); 4678 format %{ "vbroadcastsd $dst,$src\t! replicate8D" %} 4679 ins_encode %{ 4680 int vector_len = 2; 4681 __ evpbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4682 %} 4683 ins_pipe( pipe_slow ); 4684 %} 4685 4686 instruct Repl8D_mem_evex(vecZ dst, memory mem) %{ 4687 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4688 match(Set dst (ReplicateD (LoadD mem))); 4689 format %{ "vbroadcastsd $dst,$mem\t! replicate8D" %} 4690 ins_encode %{ 4691 int vector_len = 2; 4692 __ evpbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len); 4693 %} 4694 ins_pipe( pipe_slow ); 4695 %} 4696 4697 instruct Repl2D_zero_evex(vecX dst, immD0 zero) %{ 4698 predicate(n->as_Vector()->length() == 2 && UseAVX > 2); 4699 match(Set dst (ReplicateD zero)); 4700 format %{ "vpxor $dst k0,$dst,$dst\t! replicate2D zero" %} 4701 ins_encode %{ 4702 // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation 4703 int vector_len = 2; 4704 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4705 %} 4706 ins_pipe( fpu_reg_reg ); 4707 %} 4708 4709 instruct Repl4D_zero_evex(vecY dst, immD0 zero) %{ 4710 predicate(n->as_Vector()->length() == 4 && UseAVX > 2); 4711 match(Set dst (ReplicateD zero)); 4712 format %{ "vpxor $dst k0,$dst,$dst\t! replicate4D zero" %} 4713 ins_encode %{ 4714 // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation 4715 int vector_len = 2; 4716 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4717 %} 4718 ins_pipe( fpu_reg_reg ); 4719 %} 4720 4721 instruct Repl8D_zero_evex(vecZ dst, immD0 zero) %{ 4722 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4723 match(Set dst (ReplicateD zero)); 4724 format %{ "vpxor $dst k0,$dst,$dst,vect512\t! replicate8D zero" %} 4725 ins_encode %{ 4726 // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation 4727 int vector_len = 2; 4728 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4729 %} 4730 ins_pipe( fpu_reg_reg ); 4731 %} 4732 4733 // ====================REDUCTION ARITHMETIC======================================= 4734 4735 instruct rsadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 4736 predicate(UseSSE > 2 && UseAVX == 0); 4737 match(Set dst (AddReductionVI src1 src2)); 4738 effect(TEMP tmp2, TEMP tmp); 4739 format %{ "movdqu $tmp2,$src2\n\t" 4740 "phaddd $tmp2,$tmp2\n\t" 4741 "movd $tmp,$src1\n\t" 4742 "paddd $tmp,$tmp2\n\t" 4743 "movd $dst,$tmp\t! add reduction2I" %} 4744 ins_encode %{ 4745 __ movdqu($tmp2$$XMMRegister, $src2$$XMMRegister); 4746 __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister); 4747 __ movdl($tmp$$XMMRegister, $src1$$Register); 4748 __ paddd($tmp$$XMMRegister, $tmp2$$XMMRegister); 4749 __ movdl($dst$$Register, $tmp$$XMMRegister); 4750 %} 4751 ins_pipe( pipe_slow ); 4752 %} 4753 4754 instruct rvadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 4755 predicate(VM_Version::supports_avxonly()); 4756 match(Set dst (AddReductionVI src1 src2)); 4757 effect(TEMP tmp, TEMP tmp2); 4758 format %{ "vphaddd $tmp,$src2,$src2\n\t" 4759 "movd $tmp2,$src1\n\t" 4760 "vpaddd $tmp2,$tmp2,$tmp\n\t" 4761 "movd $dst,$tmp2\t! add reduction2I" %} 4762 ins_encode %{ 4763 int vector_len = 0; 4764 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 4765 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4766 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 4767 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4768 %} 4769 ins_pipe( pipe_slow ); 4770 %} 4771 4772 instruct rvadd2I_reduction_reg_evex(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 4773 predicate(UseAVX > 2); 4774 match(Set dst (AddReductionVI src1 src2)); 4775 effect(TEMP tmp, TEMP tmp2); 4776 format %{ "pshufd $tmp2,$src2,0x1\n\t" 4777 "vpaddd $tmp,$src2,$tmp2\n\t" 4778 "movd $tmp2,$src1\n\t" 4779 "vpaddd $tmp2,$tmp,$tmp2\n\t" 4780 "movd $dst,$tmp2\t! add reduction2I" %} 4781 ins_encode %{ 4782 int vector_len = 0; 4783 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 4784 __ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4785 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4786 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4787 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4788 %} 4789 ins_pipe( pipe_slow ); 4790 %} 4791 4792 instruct rsadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 4793 predicate(UseSSE > 2 && UseAVX == 0); 4794 match(Set dst (AddReductionVI src1 src2)); 4795 effect(TEMP tmp, TEMP tmp2); 4796 format %{ "movdqu $tmp,$src2\n\t" 4797 "phaddd $tmp,$tmp\n\t" 4798 "phaddd $tmp,$tmp\n\t" 4799 "movd $tmp2,$src1\n\t" 4800 "paddd $tmp2,$tmp\n\t" 4801 "movd $dst,$tmp2\t! add reduction4I" %} 4802 ins_encode %{ 4803 __ movdqu($tmp$$XMMRegister, $src2$$XMMRegister); 4804 __ phaddd($tmp$$XMMRegister, $tmp$$XMMRegister); 4805 __ phaddd($tmp$$XMMRegister, $tmp$$XMMRegister); 4806 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4807 __ paddd($tmp2$$XMMRegister, $tmp$$XMMRegister); 4808 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4809 %} 4810 ins_pipe( pipe_slow ); 4811 %} 4812 4813 instruct rvadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 4814 predicate(VM_Version::supports_avxonly()); 4815 match(Set dst (AddReductionVI src1 src2)); 4816 effect(TEMP tmp, TEMP tmp2); 4817 format %{ "vphaddd $tmp,$src2,$src2\n\t" 4818 "vphaddd $tmp,$tmp,$tmp\n\t" 4819 "movd $tmp2,$src1\n\t" 4820 "vpaddd $tmp2,$tmp2,$tmp\n\t" 4821 "movd $dst,$tmp2\t! add reduction4I" %} 4822 ins_encode %{ 4823 int vector_len = 0; 4824 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 4825 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); 4826 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4827 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 4828 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4829 %} 4830 ins_pipe( pipe_slow ); 4831 %} 4832 4833 instruct rvadd4I_reduction_reg_evex(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 4834 predicate(UseAVX > 2); 4835 match(Set dst (AddReductionVI src1 src2)); 4836 effect(TEMP tmp, TEMP tmp2); 4837 format %{ "pshufd $tmp2,$src2,0xE\n\t" 4838 "vpaddd $tmp,$src2,$tmp2\n\t" 4839 "pshufd $tmp2,$tmp,0x1\n\t" 4840 "vpaddd $tmp,$tmp,$tmp2\n\t" 4841 "movd $tmp2,$src1\n\t" 4842 "vpaddd $tmp2,$tmp,$tmp2\n\t" 4843 "movd $dst,$tmp2\t! add reduction4I" %} 4844 ins_encode %{ 4845 int vector_len = 0; 4846 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 4847 __ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4848 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 4849 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4850 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4851 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4852 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4853 %} 4854 ins_pipe( pipe_slow ); 4855 %} 4856 4857 instruct rvadd8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 4858 predicate(VM_Version::supports_avxonly()); 4859 match(Set dst (AddReductionVI src1 src2)); 4860 effect(TEMP tmp, TEMP tmp2); 4861 format %{ "vphaddd $tmp,$src2,$src2\n\t" 4862 "vphaddd $tmp,$tmp,$tmp2\n\t" 4863 "vextracti128_high $tmp2,$tmp\n\t" 4864 "vpaddd $tmp,$tmp,$tmp2\n\t" 4865 "movd $tmp2,$src1\n\t" 4866 "vpaddd $tmp2,$tmp2,$tmp\n\t" 4867 "movd $dst,$tmp2\t! add reduction8I" %} 4868 ins_encode %{ 4869 int vector_len = 1; 4870 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 4871 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4872 __ vextracti128_high($tmp2$$XMMRegister, $tmp$$XMMRegister); 4873 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4874 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4875 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4876 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4877 %} 4878 ins_pipe( pipe_slow ); 4879 %} 4880 4881 instruct rvadd8I_reduction_reg_evex(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 4882 predicate(UseAVX > 2); 4883 match(Set dst (AddReductionVI src1 src2)); 4884 effect(TEMP tmp, TEMP tmp2); 4885 format %{ "vextracti128_high $tmp,$src2\n\t" 4886 "vpaddd $tmp,$tmp,$src2\n\t" 4887 "pshufd $tmp2,$tmp,0xE\n\t" 4888 "vpaddd $tmp,$tmp,$tmp2\n\t" 4889 "pshufd $tmp2,$tmp,0x1\n\t" 4890 "vpaddd $tmp,$tmp,$tmp2\n\t" 4891 "movd $tmp2,$src1\n\t" 4892 "vpaddd $tmp2,$tmp,$tmp2\n\t" 4893 "movd $dst,$tmp2\t! add reduction8I" %} 4894 ins_encode %{ 4895 int vector_len = 0; 4896 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 4897 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 4898 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 4899 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4900 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 4901 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4902 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4903 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4904 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4905 %} 4906 ins_pipe( pipe_slow ); 4907 %} 4908 4909 instruct rvadd16I_reduction_reg_evex(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ 4910 predicate(UseAVX > 2); 4911 match(Set dst (AddReductionVI src1 src2)); 4912 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 4913 format %{ "vextracti64x4_high $tmp3,$src2\n\t" 4914 "vpaddd $tmp3,$tmp3,$src2\n\t" 4915 "vextracti128_high $tmp,$tmp3\n\t" 4916 "vpaddd $tmp,$tmp,$tmp3\n\t" 4917 "pshufd $tmp2,$tmp,0xE\n\t" 4918 "vpaddd $tmp,$tmp,$tmp2\n\t" 4919 "pshufd $tmp2,$tmp,0x1\n\t" 4920 "vpaddd $tmp,$tmp,$tmp2\n\t" 4921 "movd $tmp2,$src1\n\t" 4922 "vpaddd $tmp2,$tmp,$tmp2\n\t" 4923 "movd $dst,$tmp2\t! mul reduction16I" %} 4924 ins_encode %{ 4925 __ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister); 4926 __ vpaddd($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); 4927 __ vextracti128_high($tmp$$XMMRegister, $tmp3$$XMMRegister); 4928 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); 4929 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 4930 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4931 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 4932 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4933 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4934 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4935 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4936 %} 4937 ins_pipe( pipe_slow ); 4938 %} 4939 4940 #ifdef _LP64 4941 instruct rvadd2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, regF tmp, regF tmp2) %{ 4942 predicate(UseAVX > 2); 4943 match(Set dst (AddReductionVL src1 src2)); 4944 effect(TEMP tmp, TEMP tmp2); 4945 format %{ "pshufd $tmp2,$src2,0xE\n\t" 4946 "vpaddq $tmp,$src2,$tmp2\n\t" 4947 "movdq $tmp2,$src1\n\t" 4948 "vpaddq $tmp2,$tmp,$tmp2\n\t" 4949 "movdq $dst,$tmp2\t! add reduction2L" %} 4950 ins_encode %{ 4951 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 4952 __ vpaddq($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, 0); 4953 __ movdq($tmp2$$XMMRegister, $src1$$Register); 4954 __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4955 __ movdq($dst$$Register, $tmp2$$XMMRegister); 4956 %} 4957 ins_pipe( pipe_slow ); 4958 %} 4959 4960 instruct rvadd4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{ 4961 predicate(UseAVX > 2); 4962 match(Set dst (AddReductionVL src1 src2)); 4963 effect(TEMP tmp, TEMP tmp2); 4964 format %{ "vextracti128_high $tmp,$src2\n\t" 4965 "vpaddq $tmp2,$tmp,$src2\n\t" 4966 "pshufd $tmp,$tmp2,0xE\n\t" 4967 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4968 "movdq $tmp,$src1\n\t" 4969 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4970 "movdq $dst,$tmp2\t! add reduction4L" %} 4971 ins_encode %{ 4972 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 4973 __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); 4974 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 4975 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4976 __ movdq($tmp$$XMMRegister, $src1$$Register); 4977 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4978 __ movdq($dst$$Register, $tmp2$$XMMRegister); 4979 %} 4980 ins_pipe( pipe_slow ); 4981 %} 4982 4983 instruct rvadd8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{ 4984 predicate(UseAVX > 2); 4985 match(Set dst (AddReductionVL src1 src2)); 4986 effect(TEMP tmp, TEMP tmp2); 4987 format %{ "vextracti64x4_high $tmp2,$src2\n\t" 4988 "vpaddq $tmp2,$tmp2,$src2\n\t" 4989 "vextracti128_high $tmp,$tmp2\n\t" 4990 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4991 "pshufd $tmp,$tmp2,0xE\n\t" 4992 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4993 "movdq $tmp,$src1\n\t" 4994 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4995 "movdq $dst,$tmp2\t! add reduction8L" %} 4996 ins_encode %{ 4997 __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); 4998 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 4999 __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); 5000 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5001 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5002 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5003 __ movdq($tmp$$XMMRegister, $src1$$Register); 5004 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5005 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5006 %} 5007 ins_pipe( pipe_slow ); 5008 %} 5009 #endif 5010 5011 instruct rsadd2F_reduction_reg(regF dst, vecD src2, regF tmp) %{ 5012 predicate(UseSSE >= 1 && UseAVX == 0); 5013 match(Set dst (AddReductionVF dst src2)); 5014 effect(TEMP dst, TEMP tmp); 5015 format %{ "addss $dst,$src2\n\t" 5016 "pshufd $tmp,$src2,0x01\n\t" 5017 "addss $dst,$tmp\t! add reduction2F" %} 5018 ins_encode %{ 5019 __ addss($dst$$XMMRegister, $src2$$XMMRegister); 5020 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5021 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 5022 %} 5023 ins_pipe( pipe_slow ); 5024 %} 5025 5026 instruct rvadd2F_reduction_reg(regF dst, vecD src2, regF tmp) %{ 5027 predicate(UseAVX > 0); 5028 match(Set dst (AddReductionVF dst src2)); 5029 effect(TEMP dst, TEMP tmp); 5030 format %{ "vaddss $dst,$dst,$src2\n\t" 5031 "pshufd $tmp,$src2,0x01\n\t" 5032 "vaddss $dst,$dst,$tmp\t! add reduction2F" %} 5033 ins_encode %{ 5034 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5035 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5036 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5037 %} 5038 ins_pipe( pipe_slow ); 5039 %} 5040 5041 instruct rsadd4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ 5042 predicate(UseSSE >= 1 && UseAVX == 0); 5043 match(Set dst (AddReductionVF dst src2)); 5044 effect(TEMP dst, TEMP tmp); 5045 format %{ "addss $dst,$src2\n\t" 5046 "pshufd $tmp,$src2,0x01\n\t" 5047 "addss $dst,$tmp\n\t" 5048 "pshufd $tmp,$src2,0x02\n\t" 5049 "addss $dst,$tmp\n\t" 5050 "pshufd $tmp,$src2,0x03\n\t" 5051 "addss $dst,$tmp\t! add reduction4F" %} 5052 ins_encode %{ 5053 __ addss($dst$$XMMRegister, $src2$$XMMRegister); 5054 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5055 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 5056 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5057 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 5058 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5059 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 5060 %} 5061 ins_pipe( pipe_slow ); 5062 %} 5063 5064 instruct rvadd4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ 5065 predicate(UseAVX > 0); 5066 match(Set dst (AddReductionVF dst src2)); 5067 effect(TEMP tmp, TEMP dst); 5068 format %{ "vaddss $dst,dst,$src2\n\t" 5069 "pshufd $tmp,$src2,0x01\n\t" 5070 "vaddss $dst,$dst,$tmp\n\t" 5071 "pshufd $tmp,$src2,0x02\n\t" 5072 "vaddss $dst,$dst,$tmp\n\t" 5073 "pshufd $tmp,$src2,0x03\n\t" 5074 "vaddss $dst,$dst,$tmp\t! add reduction4F" %} 5075 ins_encode %{ 5076 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5077 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5078 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5079 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5080 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5081 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5082 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5083 %} 5084 ins_pipe( pipe_slow ); 5085 %} 5086 5087 instruct radd8F_reduction_reg(regF dst, vecY src2, regF tmp, regF tmp2) %{ 5088 predicate(UseAVX > 0); 5089 match(Set dst (AddReductionVF dst src2)); 5090 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5091 format %{ "vaddss $dst,$dst,$src2\n\t" 5092 "pshufd $tmp,$src2,0x01\n\t" 5093 "vaddss $dst,$dst,$tmp\n\t" 5094 "pshufd $tmp,$src2,0x02\n\t" 5095 "vaddss $dst,$dst,$tmp\n\t" 5096 "pshufd $tmp,$src2,0x03\n\t" 5097 "vaddss $dst,$dst,$tmp\n\t" 5098 "vextractf128_high $tmp2,$src2\n\t" 5099 "vaddss $dst,$dst,$tmp2\n\t" 5100 "pshufd $tmp,$tmp2,0x01\n\t" 5101 "vaddss $dst,$dst,$tmp\n\t" 5102 "pshufd $tmp,$tmp2,0x02\n\t" 5103 "vaddss $dst,$dst,$tmp\n\t" 5104 "pshufd $tmp,$tmp2,0x03\n\t" 5105 "vaddss $dst,$dst,$tmp\t! add reduction8F" %} 5106 ins_encode %{ 5107 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5108 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5109 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5110 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5111 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5112 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5113 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5114 __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5115 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5116 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5117 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5118 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5119 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5120 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5121 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5122 %} 5123 ins_pipe( pipe_slow ); 5124 %} 5125 5126 instruct radd16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{ 5127 predicate(UseAVX > 2); 5128 match(Set dst (AddReductionVF dst src2)); 5129 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5130 format %{ "vaddss $dst,$dst,$src2\n\t" 5131 "pshufd $tmp,$src2,0x01\n\t" 5132 "vaddss $dst,$dst,$tmp\n\t" 5133 "pshufd $tmp,$src2,0x02\n\t" 5134 "vaddss $dst,$dst,$tmp\n\t" 5135 "pshufd $tmp,$src2,0x03\n\t" 5136 "vaddss $dst,$dst,$tmp\n\t" 5137 "vextractf32x4 $tmp2,$src2,0x1\n\t" 5138 "vaddss $dst,$dst,$tmp2\n\t" 5139 "pshufd $tmp,$tmp2,0x01\n\t" 5140 "vaddss $dst,$dst,$tmp\n\t" 5141 "pshufd $tmp,$tmp2,0x02\n\t" 5142 "vaddss $dst,$dst,$tmp\n\t" 5143 "pshufd $tmp,$tmp2,0x03\n\t" 5144 "vaddss $dst,$dst,$tmp\n\t" 5145 "vextractf32x4 $tmp2,$src2,0x2\n\t" 5146 "vaddss $dst,$dst,$tmp2\n\t" 5147 "pshufd $tmp,$tmp2,0x01\n\t" 5148 "vaddss $dst,$dst,$tmp\n\t" 5149 "pshufd $tmp,$tmp2,0x02\n\t" 5150 "vaddss $dst,$dst,$tmp\n\t" 5151 "pshufd $tmp,$tmp2,0x03\n\t" 5152 "vaddss $dst,$dst,$tmp\n\t" 5153 "vextractf32x4 $tmp2,$src2,0x3\n\t" 5154 "vaddss $dst,$dst,$tmp2\n\t" 5155 "pshufd $tmp,$tmp2,0x01\n\t" 5156 "vaddss $dst,$dst,$tmp\n\t" 5157 "pshufd $tmp,$tmp2,0x02\n\t" 5158 "vaddss $dst,$dst,$tmp\n\t" 5159 "pshufd $tmp,$tmp2,0x03\n\t" 5160 "vaddss $dst,$dst,$tmp\t! add reduction16F" %} 5161 ins_encode %{ 5162 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5163 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5164 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5165 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5166 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5167 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5168 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5169 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5170 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5171 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5172 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5173 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5174 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5175 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5176 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5177 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 5178 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5179 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5180 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5181 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5182 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5183 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5184 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5185 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 5186 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5187 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5188 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5189 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5190 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5191 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5192 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5193 %} 5194 ins_pipe( pipe_slow ); 5195 %} 5196 5197 instruct rsadd2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ 5198 predicate(UseSSE >= 1 && UseAVX == 0); 5199 match(Set dst (AddReductionVD dst src2)); 5200 effect(TEMP tmp, TEMP dst); 5201 format %{ "addsd $dst,$src2\n\t" 5202 "pshufd $tmp,$src2,0xE\n\t" 5203 "addsd $dst,$tmp\t! add reduction2D" %} 5204 ins_encode %{ 5205 __ addsd($dst$$XMMRegister, $src2$$XMMRegister); 5206 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5207 __ addsd($dst$$XMMRegister, $tmp$$XMMRegister); 5208 %} 5209 ins_pipe( pipe_slow ); 5210 %} 5211 5212 instruct rvadd2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ 5213 predicate(UseAVX > 0); 5214 match(Set dst (AddReductionVD dst src2)); 5215 effect(TEMP tmp, TEMP dst); 5216 format %{ "vaddsd $dst,$dst,$src2\n\t" 5217 "pshufd $tmp,$src2,0xE\n\t" 5218 "vaddsd $dst,$dst,$tmp\t! add reduction2D" %} 5219 ins_encode %{ 5220 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5221 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5222 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5223 %} 5224 ins_pipe( pipe_slow ); 5225 %} 5226 5227 instruct rvadd4D_reduction_reg(regD dst, vecY src2, regD tmp, regD tmp2) %{ 5228 predicate(UseAVX > 0); 5229 match(Set dst (AddReductionVD dst src2)); 5230 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5231 format %{ "vaddsd $dst,$dst,$src2\n\t" 5232 "pshufd $tmp,$src2,0xE\n\t" 5233 "vaddsd $dst,$dst,$tmp\n\t" 5234 "vextractf32x4 $tmp2,$src2,0x1\n\t" 5235 "vaddsd $dst,$dst,$tmp2\n\t" 5236 "pshufd $tmp,$tmp2,0xE\n\t" 5237 "vaddsd $dst,$dst,$tmp\t! add reduction4D" %} 5238 ins_encode %{ 5239 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5240 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5241 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5242 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5243 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5244 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5245 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5246 %} 5247 ins_pipe( pipe_slow ); 5248 %} 5249 5250 instruct rvadd8D_reduction_reg(regD dst, vecZ src2, regD tmp, regD tmp2) %{ 5251 predicate(UseAVX > 2); 5252 match(Set dst (AddReductionVD dst src2)); 5253 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5254 format %{ "vaddsd $dst,$dst,$src2\n\t" 5255 "pshufd $tmp,$src2,0xE\n\t" 5256 "vaddsd $dst,$dst,$tmp\n\t" 5257 "vextractf32x4 $tmp2,$src2,0x1\n\t" 5258 "vaddsd $dst,$dst,$tmp2\n\t" 5259 "pshufd $tmp,$tmp2,0xE\n\t" 5260 "vaddsd $dst,$dst,$tmp\n\t" 5261 "vextractf32x4 $tmp2,$src2,0x2\n\t" 5262 "vaddsd $dst,$dst,$tmp2\n\t" 5263 "pshufd $tmp,$tmp2,0xE\n\t" 5264 "vaddsd $dst,$dst,$tmp\n\t" 5265 "vextractf32x4 $tmp2,$src2,0x3\n\t" 5266 "vaddsd $dst,$dst,$tmp2\n\t" 5267 "pshufd $tmp,$tmp2,0xE\n\t" 5268 "vaddsd $dst,$dst,$tmp\t! add reduction8D" %} 5269 ins_encode %{ 5270 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5271 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5272 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5273 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5274 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5275 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5276 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5277 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 5278 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5279 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5280 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5281 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 5282 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5283 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5284 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5285 %} 5286 ins_pipe( pipe_slow ); 5287 %} 5288 5289 instruct rsmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 5290 predicate(UseSSE > 3 && UseAVX == 0); 5291 match(Set dst (MulReductionVI src1 src2)); 5292 effect(TEMP tmp, TEMP tmp2); 5293 format %{ "pshufd $tmp2,$src2,0x1\n\t" 5294 "pmulld $tmp2,$src2\n\t" 5295 "movd $tmp,$src1\n\t" 5296 "pmulld $tmp2,$tmp\n\t" 5297 "movd $dst,$tmp2\t! mul reduction2I" %} 5298 ins_encode %{ 5299 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5300 __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister); 5301 __ movdl($tmp$$XMMRegister, $src1$$Register); 5302 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 5303 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5304 %} 5305 ins_pipe( pipe_slow ); 5306 %} 5307 5308 instruct rvmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 5309 predicate(UseAVX > 0); 5310 match(Set dst (MulReductionVI src1 src2)); 5311 effect(TEMP tmp, TEMP tmp2); 5312 format %{ "pshufd $tmp2,$src2,0x1\n\t" 5313 "vpmulld $tmp,$src2,$tmp2\n\t" 5314 "movd $tmp2,$src1\n\t" 5315 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5316 "movd $dst,$tmp2\t! mul reduction2I" %} 5317 ins_encode %{ 5318 int vector_len = 0; 5319 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5320 __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5321 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5322 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5323 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5324 %} 5325 ins_pipe( pipe_slow ); 5326 %} 5327 5328 instruct rsmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 5329 predicate(UseSSE > 3 && UseAVX == 0); 5330 match(Set dst (MulReductionVI src1 src2)); 5331 effect(TEMP tmp, TEMP tmp2); 5332 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5333 "pmulld $tmp2,$src2\n\t" 5334 "pshufd $tmp,$tmp2,0x1\n\t" 5335 "pmulld $tmp2,$tmp\n\t" 5336 "movd $tmp,$src1\n\t" 5337 "pmulld $tmp2,$tmp\n\t" 5338 "movd $dst,$tmp2\t! mul reduction4I" %} 5339 ins_encode %{ 5340 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5341 __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister); 5342 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x1); 5343 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 5344 __ movdl($tmp$$XMMRegister, $src1$$Register); 5345 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 5346 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5347 %} 5348 ins_pipe( pipe_slow ); 5349 %} 5350 5351 instruct rvmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 5352 predicate(UseAVX > 0); 5353 match(Set dst (MulReductionVI src1 src2)); 5354 effect(TEMP tmp, TEMP tmp2); 5355 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5356 "vpmulld $tmp,$src2,$tmp2\n\t" 5357 "pshufd $tmp2,$tmp,0x1\n\t" 5358 "vpmulld $tmp,$tmp,$tmp2\n\t" 5359 "movd $tmp2,$src1\n\t" 5360 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5361 "movd $dst,$tmp2\t! mul reduction4I" %} 5362 ins_encode %{ 5363 int vector_len = 0; 5364 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5365 __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5366 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5367 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5368 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5369 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5370 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5371 %} 5372 ins_pipe( pipe_slow ); 5373 %} 5374 5375 instruct rvmul8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 5376 predicate(UseAVX > 0); 5377 match(Set dst (MulReductionVI src1 src2)); 5378 effect(TEMP tmp, TEMP tmp2); 5379 format %{ "vextracti128_high $tmp,$src2\n\t" 5380 "vpmulld $tmp,$tmp,$src2\n\t" 5381 "pshufd $tmp2,$tmp,0xE\n\t" 5382 "vpmulld $tmp,$tmp,$tmp2\n\t" 5383 "pshufd $tmp2,$tmp,0x1\n\t" 5384 "vpmulld $tmp,$tmp,$tmp2\n\t" 5385 "movd $tmp2,$src1\n\t" 5386 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5387 "movd $dst,$tmp2\t! mul reduction8I" %} 5388 ins_encode %{ 5389 int vector_len = 0; 5390 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 5391 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 5392 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5393 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5394 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5395 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5396 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5397 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5398 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5399 %} 5400 ins_pipe( pipe_slow ); 5401 %} 5402 5403 instruct rvmul16I_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ 5404 predicate(UseAVX > 2); 5405 match(Set dst (MulReductionVI src1 src2)); 5406 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 5407 format %{ "vextracti64x4_high $tmp3,$src2\n\t" 5408 "vpmulld $tmp3,$tmp3,$src2\n\t" 5409 "vextracti128_high $tmp,$tmp3\n\t" 5410 "vpmulld $tmp,$tmp,$src2\n\t" 5411 "pshufd $tmp2,$tmp,0xE\n\t" 5412 "vpmulld $tmp,$tmp,$tmp2\n\t" 5413 "pshufd $tmp2,$tmp,0x1\n\t" 5414 "vpmulld $tmp,$tmp,$tmp2\n\t" 5415 "movd $tmp2,$src1\n\t" 5416 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5417 "movd $dst,$tmp2\t! mul reduction16I" %} 5418 ins_encode %{ 5419 __ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister); 5420 __ vpmulld($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); 5421 __ vextracti128_high($tmp$$XMMRegister, $tmp3$$XMMRegister); 5422 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); 5423 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5424 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5425 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5426 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5427 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5428 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5429 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5430 %} 5431 ins_pipe( pipe_slow ); 5432 %} 5433 5434 #ifdef _LP64 5435 instruct rvmul2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, regF tmp, regF tmp2) %{ 5436 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 5437 match(Set dst (MulReductionVL src1 src2)); 5438 effect(TEMP tmp, TEMP tmp2); 5439 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5440 "vpmullq $tmp,$src2,$tmp2\n\t" 5441 "movdq $tmp2,$src1\n\t" 5442 "vpmullq $tmp2,$tmp,$tmp2\n\t" 5443 "movdq $dst,$tmp2\t! mul reduction2L" %} 5444 ins_encode %{ 5445 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5446 __ vpmullq($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, 0); 5447 __ movdq($tmp2$$XMMRegister, $src1$$Register); 5448 __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5449 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5450 %} 5451 ins_pipe( pipe_slow ); 5452 %} 5453 5454 instruct rvmul4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{ 5455 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 5456 match(Set dst (MulReductionVL src1 src2)); 5457 effect(TEMP tmp, TEMP tmp2); 5458 format %{ "vextracti128_high $tmp,$src2\n\t" 5459 "vpmullq $tmp2,$tmp,$src2\n\t" 5460 "pshufd $tmp,$tmp2,0xE\n\t" 5461 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5462 "movdq $tmp,$src1\n\t" 5463 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5464 "movdq $dst,$tmp2\t! mul reduction4L" %} 5465 ins_encode %{ 5466 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 5467 __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); 5468 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5469 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5470 __ movdq($tmp$$XMMRegister, $src1$$Register); 5471 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5472 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5473 %} 5474 ins_pipe( pipe_slow ); 5475 %} 5476 5477 instruct rvmul8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{ 5478 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 5479 match(Set dst (MulReductionVL src1 src2)); 5480 effect(TEMP tmp, TEMP tmp2); 5481 format %{ "vextracti64x4_high $tmp2,$src2\n\t" 5482 "vpmullq $tmp2,$tmp2,$src2\n\t" 5483 "vextracti128_high $tmp,$tmp2\n\t" 5484 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5485 "pshufd $tmp,$tmp2,0xE\n\t" 5486 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5487 "movdq $tmp,$src1\n\t" 5488 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5489 "movdq $dst,$tmp2\t! mul reduction8L" %} 5490 ins_encode %{ 5491 __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5492 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 5493 __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); 5494 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5495 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5496 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5497 __ movdq($tmp$$XMMRegister, $src1$$Register); 5498 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5499 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5500 %} 5501 ins_pipe( pipe_slow ); 5502 %} 5503 #endif 5504 5505 instruct rsmul2F_reduction(regF dst, vecD src2, regF tmp) %{ 5506 predicate(UseSSE >= 1 && UseAVX == 0); 5507 match(Set dst (MulReductionVF dst src2)); 5508 effect(TEMP dst, TEMP tmp); 5509 format %{ "mulss $dst,$src2\n\t" 5510 "pshufd $tmp,$src2,0x01\n\t" 5511 "mulss $dst,$tmp\t! mul reduction2F" %} 5512 ins_encode %{ 5513 __ mulss($dst$$XMMRegister, $src2$$XMMRegister); 5514 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5515 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5516 %} 5517 ins_pipe( pipe_slow ); 5518 %} 5519 5520 instruct rvmul2F_reduction_reg(regF dst, vecD src2, regF tmp) %{ 5521 predicate(UseAVX > 0); 5522 match(Set dst (MulReductionVF dst src2)); 5523 effect(TEMP tmp, TEMP dst); 5524 format %{ "vmulss $dst,$dst,$src2\n\t" 5525 "pshufd $tmp,$src2,0x01\n\t" 5526 "vmulss $dst,$dst,$tmp\t! mul reduction2F" %} 5527 ins_encode %{ 5528 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5529 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5530 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5531 %} 5532 ins_pipe( pipe_slow ); 5533 %} 5534 5535 instruct rsmul4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ 5536 predicate(UseSSE >= 1 && UseAVX == 0); 5537 match(Set dst (MulReductionVF dst src2)); 5538 effect(TEMP dst, TEMP tmp); 5539 format %{ "mulss $dst,$src2\n\t" 5540 "pshufd $tmp,$src2,0x01\n\t" 5541 "mulss $dst,$tmp\n\t" 5542 "pshufd $tmp,$src2,0x02\n\t" 5543 "mulss $dst,$tmp\n\t" 5544 "pshufd $tmp,$src2,0x03\n\t" 5545 "mulss $dst,$tmp\t! mul reduction4F" %} 5546 ins_encode %{ 5547 __ mulss($dst$$XMMRegister, $src2$$XMMRegister); 5548 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5549 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5550 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5551 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5552 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5553 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5554 %} 5555 ins_pipe( pipe_slow ); 5556 %} 5557 5558 instruct rvmul4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ 5559 predicate(UseAVX > 0); 5560 match(Set dst (MulReductionVF dst src2)); 5561 effect(TEMP tmp, TEMP dst); 5562 format %{ "vmulss $dst,$dst,$src2\n\t" 5563 "pshufd $tmp,$src2,0x01\n\t" 5564 "vmulss $dst,$dst,$tmp\n\t" 5565 "pshufd $tmp,$src2,0x02\n\t" 5566 "vmulss $dst,$dst,$tmp\n\t" 5567 "pshufd $tmp,$src2,0x03\n\t" 5568 "vmulss $dst,$dst,$tmp\t! mul reduction4F" %} 5569 ins_encode %{ 5570 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5571 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5572 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5573 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5574 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5575 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5576 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5577 %} 5578 ins_pipe( pipe_slow ); 5579 %} 5580 5581 instruct rvmul8F_reduction_reg(regF dst, vecY src2, regF tmp, regF tmp2) %{ 5582 predicate(UseAVX > 0); 5583 match(Set dst (MulReductionVF dst src2)); 5584 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5585 format %{ "vmulss $dst,$dst,$src2\n\t" 5586 "pshufd $tmp,$src2,0x01\n\t" 5587 "vmulss $dst,$dst,$tmp\n\t" 5588 "pshufd $tmp,$src2,0x02\n\t" 5589 "vmulss $dst,$dst,$tmp\n\t" 5590 "pshufd $tmp,$src2,0x03\n\t" 5591 "vmulss $dst,$dst,$tmp\n\t" 5592 "vextractf128_high $tmp2,$src2\n\t" 5593 "vmulss $dst,$dst,$tmp2\n\t" 5594 "pshufd $tmp,$tmp2,0x01\n\t" 5595 "vmulss $dst,$dst,$tmp\n\t" 5596 "pshufd $tmp,$tmp2,0x02\n\t" 5597 "vmulss $dst,$dst,$tmp\n\t" 5598 "pshufd $tmp,$tmp2,0x03\n\t" 5599 "vmulss $dst,$dst,$tmp\t! mul reduction8F" %} 5600 ins_encode %{ 5601 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5602 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5603 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5604 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5605 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5606 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5607 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5608 __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5609 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5610 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5611 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5612 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5613 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5614 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5615 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5616 %} 5617 ins_pipe( pipe_slow ); 5618 %} 5619 5620 instruct rvmul16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{ 5621 predicate(UseAVX > 2); 5622 match(Set dst (MulReductionVF dst src2)); 5623 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5624 format %{ "vmulss $dst,$dst,$src2\n\t" 5625 "pshufd $tmp,$src2,0x01\n\t" 5626 "vmulss $dst,$dst,$tmp\n\t" 5627 "pshufd $tmp,$src2,0x02\n\t" 5628 "vmulss $dst,$dst,$tmp\n\t" 5629 "pshufd $tmp,$src2,0x03\n\t" 5630 "vmulss $dst,$dst,$tmp\n\t" 5631 "vextractf32x4 $tmp2,$src2,0x1\n\t" 5632 "vmulss $dst,$dst,$tmp2\n\t" 5633 "pshufd $tmp,$tmp2,0x01\n\t" 5634 "vmulss $dst,$dst,$tmp\n\t" 5635 "pshufd $tmp,$tmp2,0x02\n\t" 5636 "vmulss $dst,$dst,$tmp\n\t" 5637 "pshufd $tmp,$tmp2,0x03\n\t" 5638 "vmulss $dst,$dst,$tmp\n\t" 5639 "vextractf32x4 $tmp2,$src2,0x2\n\t" 5640 "vmulss $dst,$dst,$tmp2\n\t" 5641 "pshufd $tmp,$tmp2,0x01\n\t" 5642 "vmulss $dst,$dst,$tmp\n\t" 5643 "pshufd $tmp,$tmp2,0x02\n\t" 5644 "vmulss $dst,$dst,$tmp\n\t" 5645 "pshufd $tmp,$tmp2,0x03\n\t" 5646 "vmulss $dst,$dst,$tmp\n\t" 5647 "vextractf32x4 $tmp2,$src2,0x3\n\t" 5648 "vmulss $dst,$dst,$tmp2\n\t" 5649 "pshufd $tmp,$tmp2,0x01\n\t" 5650 "vmulss $dst,$dst,$tmp\n\t" 5651 "pshufd $tmp,$tmp2,0x02\n\t" 5652 "vmulss $dst,$dst,$tmp\n\t" 5653 "pshufd $tmp,$tmp2,0x03\n\t" 5654 "vmulss $dst,$dst,$tmp\t! mul reduction16F" %} 5655 ins_encode %{ 5656 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5657 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5658 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5659 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5660 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5661 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5662 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5663 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5664 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5665 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5666 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5667 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5668 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5669 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5670 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5671 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 5672 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5673 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5674 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5675 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5676 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5677 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5678 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5679 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 5680 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5681 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5682 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5683 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5684 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5685 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5686 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5687 %} 5688 ins_pipe( pipe_slow ); 5689 %} 5690 5691 instruct rsmul2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ 5692 predicate(UseSSE >= 1 && UseAVX == 0); 5693 match(Set dst (MulReductionVD dst src2)); 5694 effect(TEMP dst, TEMP tmp); 5695 format %{ "mulsd $dst,$src2\n\t" 5696 "pshufd $tmp,$src2,0xE\n\t" 5697 "mulsd $dst,$tmp\t! mul reduction2D" %} 5698 ins_encode %{ 5699 __ mulsd($dst$$XMMRegister, $src2$$XMMRegister); 5700 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5701 __ mulsd($dst$$XMMRegister, $tmp$$XMMRegister); 5702 %} 5703 ins_pipe( pipe_slow ); 5704 %} 5705 5706 instruct rvmul2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ 5707 predicate(UseAVX > 0); 5708 match(Set dst (MulReductionVD dst src2)); 5709 effect(TEMP tmp, TEMP dst); 5710 format %{ "vmulsd $dst,$dst,$src2\n\t" 5711 "pshufd $tmp,$src2,0xE\n\t" 5712 "vmulsd $dst,$dst,$tmp\t! mul reduction2D" %} 5713 ins_encode %{ 5714 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5715 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5716 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5717 %} 5718 ins_pipe( pipe_slow ); 5719 %} 5720 5721 instruct rvmul4D_reduction_reg(regD dst, vecY src2, regD tmp, regD tmp2) %{ 5722 predicate(UseAVX > 0); 5723 match(Set dst (MulReductionVD dst src2)); 5724 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5725 format %{ "vmulsd $dst,$dst,$src2\n\t" 5726 "pshufd $tmp,$src2,0xE\n\t" 5727 "vmulsd $dst,$dst,$tmp\n\t" 5728 "vextractf128_high $tmp2,$src2\n\t" 5729 "vmulsd $dst,$dst,$tmp2\n\t" 5730 "pshufd $tmp,$tmp2,0xE\n\t" 5731 "vmulsd $dst,$dst,$tmp\t! mul reduction4D" %} 5732 ins_encode %{ 5733 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5734 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5735 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5736 __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5737 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5738 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5739 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5740 %} 5741 ins_pipe( pipe_slow ); 5742 %} 5743 5744 instruct rvmul8D_reduction_reg(regD dst, vecZ src2, regD tmp, regD tmp2) %{ 5745 predicate(UseAVX > 2); 5746 match(Set dst (MulReductionVD dst src2)); 5747 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5748 format %{ "vmulsd $dst,$dst,$src2\n\t" 5749 "pshufd $tmp,$src2,0xE\n\t" 5750 "vmulsd $dst,$dst,$tmp\n\t" 5751 "vextractf32x4 $tmp2,$src2,0x1\n\t" 5752 "vmulsd $dst,$dst,$tmp2\n\t" 5753 "pshufd $tmp,$src2,0xE\n\t" 5754 "vmulsd $dst,$dst,$tmp\n\t" 5755 "vextractf32x4 $tmp2,$src2,0x2\n\t" 5756 "vmulsd $dst,$dst,$tmp2\n\t" 5757 "pshufd $tmp,$tmp2,0xE\n\t" 5758 "vmulsd $dst,$dst,$tmp\n\t" 5759 "vextractf32x4 $tmp2,$src2,0x3\n\t" 5760 "vmulsd $dst,$dst,$tmp2\n\t" 5761 "pshufd $tmp,$tmp2,0xE\n\t" 5762 "vmulsd $dst,$dst,$tmp\t! mul reduction8D" %} 5763 ins_encode %{ 5764 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5765 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5766 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5767 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5768 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5769 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5770 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5771 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 5772 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5773 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5774 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5775 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 5776 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5777 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5778 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5779 %} 5780 ins_pipe( pipe_slow ); 5781 %} 5782 5783 // ====================VECTOR ARITHMETIC======================================= 5784 5785 // --------------------------------- ADD -------------------------------------- 5786 5787 // Bytes vector add 5788 instruct vadd4B(vecS dst, vecS src) %{ 5789 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 5790 match(Set dst (AddVB dst src)); 5791 format %{ "paddb $dst,$src\t! add packed4B" %} 5792 ins_encode %{ 5793 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5794 %} 5795 ins_pipe( pipe_slow ); 5796 %} 5797 5798 instruct vadd4B_reg_avx(vecS dst, vecS src1, vecS src2) %{ 5799 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 5800 match(Set dst (AddVB src1 src2)); 5801 format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %} 5802 ins_encode %{ 5803 int vector_len = 0; 5804 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5805 %} 5806 ins_pipe( pipe_slow ); 5807 %} 5808 5809 instruct vadd4B_reg_evex(vecS dst, vecS src1, vecS src2) %{ 5810 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 5811 match(Set dst (AddVB src1 src2)); 5812 format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %} 5813 ins_encode %{ 5814 int vector_len = 0; 5815 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5816 %} 5817 ins_pipe( pipe_slow ); 5818 %} 5819 5820 instruct vadd4B_reg_evex_special(vecS dst, vecS src1, vecS src2) %{ 5821 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 5822 match(Set dst (AddVB dst src2)); 5823 effect(TEMP src1); 5824 format %{ "vpaddb $dst,$dst,$src2\t! add packed4B" %} 5825 ins_encode %{ 5826 int vector_len = 0; 5827 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5828 %} 5829 ins_pipe( pipe_slow ); 5830 %} 5831 5832 instruct vadd4B_mem_avx(vecS dst, vecS src, memory mem) %{ 5833 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 5834 match(Set dst (AddVB src (LoadVector mem))); 5835 format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %} 5836 ins_encode %{ 5837 int vector_len = 0; 5838 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5839 %} 5840 ins_pipe( pipe_slow ); 5841 %} 5842 5843 instruct vadd4B_mem_evex(vecS dst, vecS src, memory mem) %{ 5844 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 5845 match(Set dst (AddVB src (LoadVector mem))); 5846 format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %} 5847 ins_encode %{ 5848 int vector_len = 0; 5849 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5850 %} 5851 ins_pipe( pipe_slow ); 5852 %} 5853 5854 instruct vadd4B_mem_evex_special(vecS dst, vecS src, memory mem) %{ 5855 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 5856 match(Set dst (AddVB dst (LoadVector mem))); 5857 effect(TEMP src); 5858 format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %} 5859 ins_encode %{ 5860 int vector_len = 0; 5861 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5862 %} 5863 ins_pipe( pipe_slow ); 5864 %} 5865 5866 instruct vadd8B(vecD dst, vecD src) %{ 5867 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 5868 match(Set dst (AddVB dst src)); 5869 format %{ "paddb $dst,$src\t! add packed8B" %} 5870 ins_encode %{ 5871 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5872 %} 5873 ins_pipe( pipe_slow ); 5874 %} 5875 5876 instruct vadd8B_reg_avx(vecD dst, vecD src1, vecD src2) %{ 5877 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 5878 match(Set dst (AddVB src1 src2)); 5879 format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %} 5880 ins_encode %{ 5881 int vector_len = 0; 5882 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5883 %} 5884 ins_pipe( pipe_slow ); 5885 %} 5886 5887 instruct vadd8B_reg_evex(vecD dst, vecD src1, vecD src2) %{ 5888 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 5889 match(Set dst (AddVB src1 src2)); 5890 format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %} 5891 ins_encode %{ 5892 int vector_len = 0; 5893 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5894 %} 5895 ins_pipe( pipe_slow ); 5896 %} 5897 5898 instruct vadd8B_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 5899 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 5900 match(Set dst (AddVB dst src2)); 5901 effect(TEMP src1); 5902 format %{ "vpaddb $dst,$dst,$src2\t! add packed8B" %} 5903 ins_encode %{ 5904 int vector_len = 0; 5905 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5906 %} 5907 ins_pipe( pipe_slow ); 5908 %} 5909 5910 instruct vadd8B_mem_avx(vecD dst, vecD src, memory mem) %{ 5911 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 5912 match(Set dst (AddVB src (LoadVector mem))); 5913 format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %} 5914 ins_encode %{ 5915 int vector_len = 0; 5916 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5917 %} 5918 ins_pipe( pipe_slow ); 5919 %} 5920 5921 instruct vadd8B_mem_evex(vecD dst, vecD src, memory mem) %{ 5922 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 5923 match(Set dst (AddVB src (LoadVector mem))); 5924 format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %} 5925 ins_encode %{ 5926 int vector_len = 0; 5927 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5928 %} 5929 ins_pipe( pipe_slow ); 5930 %} 5931 5932 instruct vadd8B_mem_evex_special(vecD dst, vecD src, memory mem) %{ 5933 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 5934 match(Set dst (AddVB dst (LoadVector mem))); 5935 effect(TEMP src); 5936 format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %} 5937 ins_encode %{ 5938 int vector_len = 0; 5939 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5940 %} 5941 ins_pipe( pipe_slow ); 5942 %} 5943 5944 instruct vadd16B(vecX dst, vecX src) %{ 5945 predicate(UseAVX == 0 && n->as_Vector()->length() == 16); 5946 match(Set dst (AddVB dst src)); 5947 format %{ "paddb $dst,$src\t! add packed16B" %} 5948 ins_encode %{ 5949 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5950 %} 5951 ins_pipe( pipe_slow ); 5952 %} 5953 5954 instruct vadd16B_reg_avx(vecX dst, vecX src1, vecX src2) %{ 5955 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); 5956 match(Set dst (AddVB src1 src2)); 5957 format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %} 5958 ins_encode %{ 5959 int vector_len = 0; 5960 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5961 %} 5962 ins_pipe( pipe_slow ); 5963 %} 5964 5965 instruct vadd16B_reg_evex(vecX dst, vecX src1, vecX src2) %{ 5966 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 5967 match(Set dst (AddVB src1 src2)); 5968 format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %} 5969 ins_encode %{ 5970 int vector_len = 0; 5971 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5972 %} 5973 ins_pipe( pipe_slow ); 5974 %} 5975 5976 instruct vadd16B_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 5977 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 5978 match(Set dst (AddVB dst src2)); 5979 effect(TEMP src1); 5980 format %{ "vpaddb $dst,$dst,$src2\t! add packed16B" %} 5981 ins_encode %{ 5982 int vector_len = 0; 5983 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5984 %} 5985 ins_pipe( pipe_slow ); 5986 %} 5987 5988 instruct vadd16B_mem_avx(vecX dst, vecX src, memory mem) %{ 5989 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); 5990 match(Set dst (AddVB src (LoadVector mem))); 5991 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} 5992 ins_encode %{ 5993 int vector_len = 0; 5994 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5995 %} 5996 ins_pipe( pipe_slow ); 5997 %} 5998 5999 instruct vadd16B_mem_evex(vecX dst, vecX src, memory mem) %{ 6000 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 6001 match(Set dst (AddVB src (LoadVector mem))); 6002 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} 6003 ins_encode %{ 6004 int vector_len = 0; 6005 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6006 %} 6007 ins_pipe( pipe_slow ); 6008 %} 6009 6010 instruct vadd16B_mem_evex_special(vecX dst, vecX src, memory mem) %{ 6011 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 6012 match(Set dst (AddVB dst (LoadVector mem))); 6013 effect(TEMP src); 6014 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} 6015 ins_encode %{ 6016 int vector_len = 0; 6017 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6018 %} 6019 ins_pipe( pipe_slow ); 6020 %} 6021 6022 instruct vadd32B_reg_avx(vecY dst, vecY src1, vecY src2) %{ 6023 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32); 6024 match(Set dst (AddVB src1 src2)); 6025 format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %} 6026 ins_encode %{ 6027 int vector_len = 1; 6028 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6029 %} 6030 ins_pipe( pipe_slow ); 6031 %} 6032 6033 instruct vadd32B_reg_evex(vecY dst, vecY src1, vecY src2) %{ 6034 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6035 match(Set dst (AddVB src1 src2)); 6036 format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %} 6037 ins_encode %{ 6038 int vector_len = 1; 6039 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6040 %} 6041 ins_pipe( pipe_slow ); 6042 %} 6043 6044 instruct vadd32B_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 6045 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32); 6046 match(Set dst (AddVB dst src2)); 6047 effect(TEMP src1); 6048 format %{ "vpaddb $dst,$dst,$src2\t! add packed32B" %} 6049 ins_encode %{ 6050 int vector_len = 1; 6051 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6052 %} 6053 ins_pipe( pipe_slow ); 6054 %} 6055 6056 instruct vadd32B_mem_avx(vecY dst, vecY src, memory mem) %{ 6057 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32); 6058 match(Set dst (AddVB src (LoadVector mem))); 6059 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} 6060 ins_encode %{ 6061 int vector_len = 1; 6062 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6063 %} 6064 ins_pipe( pipe_slow ); 6065 %} 6066 6067 instruct vadd32B_mem_evex(vecY dst, vecY src, memory mem) %{ 6068 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6069 match(Set dst (AddVB src (LoadVector mem))); 6070 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} 6071 ins_encode %{ 6072 int vector_len = 1; 6073 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6074 %} 6075 ins_pipe( pipe_slow ); 6076 %} 6077 6078 instruct vadd32B_mem_evex_special(vecY dst, vecY src, memory mem) %{ 6079 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6080 match(Set dst (AddVB dst (LoadVector mem))); 6081 effect(TEMP src); 6082 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} 6083 ins_encode %{ 6084 int vector_len = 1; 6085 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6086 %} 6087 ins_pipe( pipe_slow ); 6088 %} 6089 6090 instruct vadd64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6091 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 6092 match(Set dst (AddVB src1 src2)); 6093 format %{ "vpaddb $dst,$src1,$src2\t! add packed64B" %} 6094 ins_encode %{ 6095 int vector_len = 2; 6096 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6097 %} 6098 ins_pipe( pipe_slow ); 6099 %} 6100 6101 instruct vadd64B_mem(vecZ dst, vecZ src, memory mem) %{ 6102 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 6103 match(Set dst (AddVB src (LoadVector mem))); 6104 format %{ "vpaddb $dst,$src,$mem\t! add packed64B" %} 6105 ins_encode %{ 6106 int vector_len = 2; 6107 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6108 %} 6109 ins_pipe( pipe_slow ); 6110 %} 6111 6112 // Shorts/Chars vector add 6113 instruct vadd2S(vecS dst, vecS src) %{ 6114 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 6115 match(Set dst (AddVS dst src)); 6116 format %{ "paddw $dst,$src\t! add packed2S" %} 6117 ins_encode %{ 6118 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 6119 %} 6120 ins_pipe( pipe_slow ); 6121 %} 6122 6123 instruct vadd2S_reg_avx(vecS dst, vecS src1, vecS src2) %{ 6124 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 6125 match(Set dst (AddVS src1 src2)); 6126 format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %} 6127 ins_encode %{ 6128 int vector_len = 0; 6129 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6130 %} 6131 ins_pipe( pipe_slow ); 6132 %} 6133 6134 instruct vadd2S_reg_evex(vecS dst, vecS src1, vecS src2) %{ 6135 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 6136 match(Set dst (AddVS src1 src2)); 6137 format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %} 6138 ins_encode %{ 6139 int vector_len = 0; 6140 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6141 %} 6142 ins_pipe( pipe_slow ); 6143 %} 6144 6145 instruct vadd2S_reg_evex_special(vecS dst, vecS src1, vecS src2) %{ 6146 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 6147 match(Set dst (AddVS dst src2)); 6148 effect(TEMP src1); 6149 format %{ "vpaddw $dst,$dst,$src2\t! add packed2S" %} 6150 ins_encode %{ 6151 int vector_len = 0; 6152 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6153 %} 6154 ins_pipe( pipe_slow ); 6155 %} 6156 6157 instruct vadd2S_mem_avx(vecS dst, vecS src, memory mem) %{ 6158 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 6159 match(Set dst (AddVS src (LoadVector mem))); 6160 format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %} 6161 ins_encode %{ 6162 int vector_len = 0; 6163 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6164 %} 6165 ins_pipe( pipe_slow ); 6166 %} 6167 6168 instruct vadd2S_mem_evex(vecS dst, vecS src, memory mem) %{ 6169 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 6170 match(Set dst (AddVS src (LoadVector mem))); 6171 format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %} 6172 ins_encode %{ 6173 int vector_len = 0; 6174 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6175 %} 6176 ins_pipe( pipe_slow ); 6177 %} 6178 6179 instruct vadd2S_mem_evex_special(vecS dst, vecS src, memory mem) %{ 6180 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 6181 match(Set dst (AddVS dst (LoadVector mem))); 6182 effect(TEMP src); 6183 format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %} 6184 ins_encode %{ 6185 int vector_len = 0; 6186 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6187 %} 6188 ins_pipe( pipe_slow ); 6189 %} 6190 6191 instruct vadd4S(vecD dst, vecD src) %{ 6192 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 6193 match(Set dst (AddVS dst src)); 6194 format %{ "paddw $dst,$src\t! add packed4S" %} 6195 ins_encode %{ 6196 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 6197 %} 6198 ins_pipe( pipe_slow ); 6199 %} 6200 6201 instruct vadd4S_reg_avx(vecD dst, vecD src1, vecD src2) %{ 6202 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 6203 match(Set dst (AddVS src1 src2)); 6204 format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %} 6205 ins_encode %{ 6206 int vector_len = 0; 6207 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6208 %} 6209 ins_pipe( pipe_slow ); 6210 %} 6211 6212 instruct vadd4S_reg_evex(vecD dst, vecD src1, vecD src2) %{ 6213 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 6214 match(Set dst (AddVS src1 src2)); 6215 format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %} 6216 ins_encode %{ 6217 int vector_len = 0; 6218 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6219 %} 6220 ins_pipe( pipe_slow ); 6221 %} 6222 6223 instruct vadd4S_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 6224 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 6225 match(Set dst (AddVS dst src2)); 6226 effect(TEMP src1); 6227 format %{ "vpaddw $dst,$dst,$src2\t! add packed4S" %} 6228 ins_encode %{ 6229 int vector_len = 0; 6230 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6231 %} 6232 ins_pipe( pipe_slow ); 6233 %} 6234 6235 instruct vadd4S_mem_avx(vecD dst, vecD src, memory mem) %{ 6236 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 6237 match(Set dst (AddVS src (LoadVector mem))); 6238 format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %} 6239 ins_encode %{ 6240 int vector_len = 0; 6241 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6242 %} 6243 ins_pipe( pipe_slow ); 6244 %} 6245 6246 instruct vadd4S_mem_evex(vecD dst, vecD src, memory mem) %{ 6247 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 6248 match(Set dst (AddVS src (LoadVector mem))); 6249 format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %} 6250 ins_encode %{ 6251 int vector_len = 0; 6252 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6253 %} 6254 ins_pipe( pipe_slow ); 6255 %} 6256 6257 instruct vadd4S_mem_evex_special(vecD dst, vecD src, memory mem) %{ 6258 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 6259 match(Set dst (AddVS dst (LoadVector mem))); 6260 effect(TEMP src); 6261 format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %} 6262 ins_encode %{ 6263 int vector_len = 0; 6264 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6265 %} 6266 ins_pipe( pipe_slow ); 6267 %} 6268 6269 instruct vadd8S(vecX dst, vecX src) %{ 6270 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 6271 match(Set dst (AddVS dst src)); 6272 format %{ "paddw $dst,$src\t! add packed8S" %} 6273 ins_encode %{ 6274 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 6275 %} 6276 ins_pipe( pipe_slow ); 6277 %} 6278 6279 instruct vadd8S_reg_avx(vecX dst, vecX src1, vecX src2) %{ 6280 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 6281 match(Set dst (AddVS src1 src2)); 6282 format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %} 6283 ins_encode %{ 6284 int vector_len = 0; 6285 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6286 %} 6287 ins_pipe( pipe_slow ); 6288 %} 6289 6290 instruct vadd8S_reg_evex(vecX dst, vecX src1, vecX src2) %{ 6291 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 6292 match(Set dst (AddVS src1 src2)); 6293 format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %} 6294 ins_encode %{ 6295 int vector_len = 0; 6296 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6297 %} 6298 ins_pipe( pipe_slow ); 6299 %} 6300 6301 instruct vadd8S_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 6302 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 6303 match(Set dst (AddVS dst src2)); 6304 effect(TEMP src1); 6305 format %{ "vpaddw $dst,$dst,$src2\t! add packed8S" %} 6306 ins_encode %{ 6307 int vector_len = 0; 6308 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6309 %} 6310 ins_pipe( pipe_slow ); 6311 %} 6312 6313 instruct vadd8S_mem_avx(vecX dst, vecX src, memory mem) %{ 6314 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 6315 match(Set dst (AddVS src (LoadVector mem))); 6316 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} 6317 ins_encode %{ 6318 int vector_len = 0; 6319 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6320 %} 6321 ins_pipe( pipe_slow ); 6322 %} 6323 6324 instruct vadd8S_mem_evex(vecX dst, vecX src, memory mem) %{ 6325 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 6326 match(Set dst (AddVS src (LoadVector mem))); 6327 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} 6328 ins_encode %{ 6329 int vector_len = 0; 6330 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6331 %} 6332 ins_pipe( pipe_slow ); 6333 %} 6334 6335 instruct vadd8S_mem_evex_special(vecX dst, vecX src, memory mem) %{ 6336 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 6337 match(Set dst (AddVS dst (LoadVector mem))); 6338 effect(TEMP src); 6339 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} 6340 ins_encode %{ 6341 int vector_len = 0; 6342 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6343 %} 6344 ins_pipe( pipe_slow ); 6345 %} 6346 6347 instruct vadd16S_reg_avx(vecY dst, vecY src1, vecY src2) %{ 6348 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 6349 match(Set dst (AddVS src1 src2)); 6350 format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %} 6351 ins_encode %{ 6352 int vector_len = 1; 6353 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6354 %} 6355 ins_pipe( pipe_slow ); 6356 %} 6357 6358 instruct vadd16S_reg_evex(vecY dst, vecY src1, vecY src2) %{ 6359 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 6360 match(Set dst (AddVS src1 src2)); 6361 format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %} 6362 ins_encode %{ 6363 int vector_len = 1; 6364 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6365 %} 6366 ins_pipe( pipe_slow ); 6367 %} 6368 6369 instruct vadd16S_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 6370 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 6371 match(Set dst (AddVS dst src2)); 6372 effect(TEMP src1); 6373 format %{ "vpaddw $dst,$dst,$src2\t! add packed16S" %} 6374 ins_encode %{ 6375 int vector_len = 1; 6376 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6377 %} 6378 ins_pipe( pipe_slow ); 6379 %} 6380 6381 instruct vadd16S_mem_avx(vecY dst, vecY src, memory mem) %{ 6382 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 6383 match(Set dst (AddVS src (LoadVector mem))); 6384 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} 6385 ins_encode %{ 6386 int vector_len = 1; 6387 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6388 %} 6389 ins_pipe( pipe_slow ); 6390 %} 6391 6392 instruct vadd16S_mem_evex(vecY dst, vecY src, memory mem) %{ 6393 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 6394 match(Set dst (AddVS src (LoadVector mem))); 6395 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} 6396 ins_encode %{ 6397 int vector_len = 1; 6398 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6399 %} 6400 ins_pipe( pipe_slow ); 6401 %} 6402 6403 instruct vadd16S_mem_evex_special(vecY dst, vecY src, memory mem) %{ 6404 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 6405 match(Set dst (AddVS dst (LoadVector mem))); 6406 effect(TEMP src); 6407 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} 6408 ins_encode %{ 6409 int vector_len = 1; 6410 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6411 %} 6412 ins_pipe( pipe_slow ); 6413 %} 6414 6415 instruct vadd32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6416 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6417 match(Set dst (AddVS src1 src2)); 6418 format %{ "vpaddw $dst,$src1,$src2\t! add packed32S" %} 6419 ins_encode %{ 6420 int vector_len = 2; 6421 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6422 %} 6423 ins_pipe( pipe_slow ); 6424 %} 6425 6426 instruct vadd32S_mem(vecZ dst, vecZ src, memory mem) %{ 6427 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6428 match(Set dst (AddVS src (LoadVector mem))); 6429 format %{ "vpaddw $dst,$src,$mem\t! add packed32S" %} 6430 ins_encode %{ 6431 int vector_len = 2; 6432 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6433 %} 6434 ins_pipe( pipe_slow ); 6435 %} 6436 6437 // Integers vector add 6438 instruct vadd2I(vecD dst, vecD src) %{ 6439 predicate(n->as_Vector()->length() == 2); 6440 match(Set dst (AddVI dst src)); 6441 format %{ "paddd $dst,$src\t! add packed2I" %} 6442 ins_encode %{ 6443 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 6444 %} 6445 ins_pipe( pipe_slow ); 6446 %} 6447 6448 instruct vadd2I_reg(vecD dst, vecD src1, vecD src2) %{ 6449 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6450 match(Set dst (AddVI src1 src2)); 6451 format %{ "vpaddd $dst,$src1,$src2\t! add packed2I" %} 6452 ins_encode %{ 6453 int vector_len = 0; 6454 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6455 %} 6456 ins_pipe( pipe_slow ); 6457 %} 6458 6459 instruct vadd2I_mem(vecD dst, vecD src, memory mem) %{ 6460 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6461 match(Set dst (AddVI src (LoadVector mem))); 6462 format %{ "vpaddd $dst,$src,$mem\t! add packed2I" %} 6463 ins_encode %{ 6464 int vector_len = 0; 6465 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6466 %} 6467 ins_pipe( pipe_slow ); 6468 %} 6469 6470 instruct vadd4I(vecX dst, vecX src) %{ 6471 predicate(n->as_Vector()->length() == 4); 6472 match(Set dst (AddVI dst src)); 6473 format %{ "paddd $dst,$src\t! add packed4I" %} 6474 ins_encode %{ 6475 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 6476 %} 6477 ins_pipe( pipe_slow ); 6478 %} 6479 6480 instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{ 6481 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6482 match(Set dst (AddVI src1 src2)); 6483 format %{ "vpaddd $dst,$src1,$src2\t! add packed4I" %} 6484 ins_encode %{ 6485 int vector_len = 0; 6486 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6487 %} 6488 ins_pipe( pipe_slow ); 6489 %} 6490 6491 instruct vadd4I_mem(vecX dst, vecX src, memory mem) %{ 6492 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6493 match(Set dst (AddVI src (LoadVector mem))); 6494 format %{ "vpaddd $dst,$src,$mem\t! add packed4I" %} 6495 ins_encode %{ 6496 int vector_len = 0; 6497 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6498 %} 6499 ins_pipe( pipe_slow ); 6500 %} 6501 6502 instruct vadd8I_reg(vecY dst, vecY src1, vecY src2) %{ 6503 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 6504 match(Set dst (AddVI src1 src2)); 6505 format %{ "vpaddd $dst,$src1,$src2\t! add packed8I" %} 6506 ins_encode %{ 6507 int vector_len = 1; 6508 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6509 %} 6510 ins_pipe( pipe_slow ); 6511 %} 6512 6513 instruct vadd8I_mem(vecY dst, vecY src, memory mem) %{ 6514 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 6515 match(Set dst (AddVI src (LoadVector mem))); 6516 format %{ "vpaddd $dst,$src,$mem\t! add packed8I" %} 6517 ins_encode %{ 6518 int vector_len = 1; 6519 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6520 %} 6521 ins_pipe( pipe_slow ); 6522 %} 6523 6524 instruct vadd16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6525 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6526 match(Set dst (AddVI src1 src2)); 6527 format %{ "vpaddd $dst,$src1,$src2\t! add packed16I" %} 6528 ins_encode %{ 6529 int vector_len = 2; 6530 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6531 %} 6532 ins_pipe( pipe_slow ); 6533 %} 6534 6535 instruct vadd16I_mem(vecZ dst, vecZ src, memory mem) %{ 6536 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6537 match(Set dst (AddVI src (LoadVector mem))); 6538 format %{ "vpaddd $dst,$src,$mem\t! add packed16I" %} 6539 ins_encode %{ 6540 int vector_len = 2; 6541 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6542 %} 6543 ins_pipe( pipe_slow ); 6544 %} 6545 6546 // Longs vector add 6547 instruct vadd2L(vecX dst, vecX src) %{ 6548 predicate(n->as_Vector()->length() == 2); 6549 match(Set dst (AddVL dst src)); 6550 format %{ "paddq $dst,$src\t! add packed2L" %} 6551 ins_encode %{ 6552 __ paddq($dst$$XMMRegister, $src$$XMMRegister); 6553 %} 6554 ins_pipe( pipe_slow ); 6555 %} 6556 6557 instruct vadd2L_reg(vecX dst, vecX src1, vecX src2) %{ 6558 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6559 match(Set dst (AddVL src1 src2)); 6560 format %{ "vpaddq $dst,$src1,$src2\t! add packed2L" %} 6561 ins_encode %{ 6562 int vector_len = 0; 6563 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6564 %} 6565 ins_pipe( pipe_slow ); 6566 %} 6567 6568 instruct vadd2L_mem(vecX dst, vecX src, memory mem) %{ 6569 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6570 match(Set dst (AddVL src (LoadVector mem))); 6571 format %{ "vpaddq $dst,$src,$mem\t! add packed2L" %} 6572 ins_encode %{ 6573 int vector_len = 0; 6574 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6575 %} 6576 ins_pipe( pipe_slow ); 6577 %} 6578 6579 instruct vadd4L_reg(vecY dst, vecY src1, vecY src2) %{ 6580 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 6581 match(Set dst (AddVL src1 src2)); 6582 format %{ "vpaddq $dst,$src1,$src2\t! add packed4L" %} 6583 ins_encode %{ 6584 int vector_len = 1; 6585 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6586 %} 6587 ins_pipe( pipe_slow ); 6588 %} 6589 6590 instruct vadd4L_mem(vecY dst, vecY src, memory mem) %{ 6591 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 6592 match(Set dst (AddVL src (LoadVector mem))); 6593 format %{ "vpaddq $dst,$src,$mem\t! add packed4L" %} 6594 ins_encode %{ 6595 int vector_len = 1; 6596 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6597 %} 6598 ins_pipe( pipe_slow ); 6599 %} 6600 6601 instruct vadd8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6602 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6603 match(Set dst (AddVL src1 src2)); 6604 format %{ "vpaddq $dst,$src1,$src2\t! add packed8L" %} 6605 ins_encode %{ 6606 int vector_len = 2; 6607 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6608 %} 6609 ins_pipe( pipe_slow ); 6610 %} 6611 6612 instruct vadd8L_mem(vecZ dst, vecZ src, memory mem) %{ 6613 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6614 match(Set dst (AddVL src (LoadVector mem))); 6615 format %{ "vpaddq $dst,$src,$mem\t! add packed8L" %} 6616 ins_encode %{ 6617 int vector_len = 2; 6618 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6619 %} 6620 ins_pipe( pipe_slow ); 6621 %} 6622 6623 // Floats vector add 6624 instruct vadd2F(vecD dst, vecD src) %{ 6625 predicate(n->as_Vector()->length() == 2); 6626 match(Set dst (AddVF dst src)); 6627 format %{ "addps $dst,$src\t! add packed2F" %} 6628 ins_encode %{ 6629 __ addps($dst$$XMMRegister, $src$$XMMRegister); 6630 %} 6631 ins_pipe( pipe_slow ); 6632 %} 6633 6634 instruct vadd2F_reg(vecD dst, vecD src1, vecD src2) %{ 6635 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6636 match(Set dst (AddVF src1 src2)); 6637 format %{ "vaddps $dst,$src1,$src2\t! add packed2F" %} 6638 ins_encode %{ 6639 int vector_len = 0; 6640 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6641 %} 6642 ins_pipe( pipe_slow ); 6643 %} 6644 6645 instruct vadd2F_mem(vecD dst, vecD src, memory mem) %{ 6646 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6647 match(Set dst (AddVF src (LoadVector mem))); 6648 format %{ "vaddps $dst,$src,$mem\t! add packed2F" %} 6649 ins_encode %{ 6650 int vector_len = 0; 6651 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6652 %} 6653 ins_pipe( pipe_slow ); 6654 %} 6655 6656 instruct vadd4F(vecX dst, vecX src) %{ 6657 predicate(n->as_Vector()->length() == 4); 6658 match(Set dst (AddVF dst src)); 6659 format %{ "addps $dst,$src\t! add packed4F" %} 6660 ins_encode %{ 6661 __ addps($dst$$XMMRegister, $src$$XMMRegister); 6662 %} 6663 ins_pipe( pipe_slow ); 6664 %} 6665 6666 instruct vadd4F_reg(vecX dst, vecX src1, vecX src2) %{ 6667 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6668 match(Set dst (AddVF src1 src2)); 6669 format %{ "vaddps $dst,$src1,$src2\t! add packed4F" %} 6670 ins_encode %{ 6671 int vector_len = 0; 6672 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6673 %} 6674 ins_pipe( pipe_slow ); 6675 %} 6676 6677 instruct vadd4F_mem(vecX dst, vecX src, memory mem) %{ 6678 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6679 match(Set dst (AddVF src (LoadVector mem))); 6680 format %{ "vaddps $dst,$src,$mem\t! add packed4F" %} 6681 ins_encode %{ 6682 int vector_len = 0; 6683 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6684 %} 6685 ins_pipe( pipe_slow ); 6686 %} 6687 6688 instruct vadd8F_reg(vecY dst, vecY src1, vecY src2) %{ 6689 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6690 match(Set dst (AddVF src1 src2)); 6691 format %{ "vaddps $dst,$src1,$src2\t! add packed8F" %} 6692 ins_encode %{ 6693 int vector_len = 1; 6694 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6695 %} 6696 ins_pipe( pipe_slow ); 6697 %} 6698 6699 instruct vadd8F_mem(vecY dst, vecY src, memory mem) %{ 6700 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6701 match(Set dst (AddVF src (LoadVector mem))); 6702 format %{ "vaddps $dst,$src,$mem\t! add packed8F" %} 6703 ins_encode %{ 6704 int vector_len = 1; 6705 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6706 %} 6707 ins_pipe( pipe_slow ); 6708 %} 6709 6710 instruct vadd16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6711 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6712 match(Set dst (AddVF src1 src2)); 6713 format %{ "vaddps $dst,$src1,$src2\t! add packed16F" %} 6714 ins_encode %{ 6715 int vector_len = 2; 6716 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6717 %} 6718 ins_pipe( pipe_slow ); 6719 %} 6720 6721 instruct vadd16F_mem(vecZ dst, vecZ src, memory mem) %{ 6722 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6723 match(Set dst (AddVF src (LoadVector mem))); 6724 format %{ "vaddps $dst,$src,$mem\t! add packed16F" %} 6725 ins_encode %{ 6726 int vector_len = 2; 6727 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6728 %} 6729 ins_pipe( pipe_slow ); 6730 %} 6731 6732 // Doubles vector add 6733 instruct vadd2D(vecX dst, vecX src) %{ 6734 predicate(n->as_Vector()->length() == 2); 6735 match(Set dst (AddVD dst src)); 6736 format %{ "addpd $dst,$src\t! add packed2D" %} 6737 ins_encode %{ 6738 __ addpd($dst$$XMMRegister, $src$$XMMRegister); 6739 %} 6740 ins_pipe( pipe_slow ); 6741 %} 6742 6743 instruct vadd2D_reg(vecX dst, vecX src1, vecX src2) %{ 6744 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6745 match(Set dst (AddVD src1 src2)); 6746 format %{ "vaddpd $dst,$src1,$src2\t! add packed2D" %} 6747 ins_encode %{ 6748 int vector_len = 0; 6749 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6750 %} 6751 ins_pipe( pipe_slow ); 6752 %} 6753 6754 instruct vadd2D_mem(vecX dst, vecX src, memory mem) %{ 6755 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6756 match(Set dst (AddVD src (LoadVector mem))); 6757 format %{ "vaddpd $dst,$src,$mem\t! add packed2D" %} 6758 ins_encode %{ 6759 int vector_len = 0; 6760 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6761 %} 6762 ins_pipe( pipe_slow ); 6763 %} 6764 6765 instruct vadd4D_reg(vecY dst, vecY src1, vecY src2) %{ 6766 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6767 match(Set dst (AddVD src1 src2)); 6768 format %{ "vaddpd $dst,$src1,$src2\t! add packed4D" %} 6769 ins_encode %{ 6770 int vector_len = 1; 6771 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6772 %} 6773 ins_pipe( pipe_slow ); 6774 %} 6775 6776 instruct vadd4D_mem(vecY dst, vecY src, memory mem) %{ 6777 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6778 match(Set dst (AddVD src (LoadVector mem))); 6779 format %{ "vaddpd $dst,$src,$mem\t! add packed4D" %} 6780 ins_encode %{ 6781 int vector_len = 1; 6782 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6783 %} 6784 ins_pipe( pipe_slow ); 6785 %} 6786 6787 instruct vadd8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6788 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6789 match(Set dst (AddVD src1 src2)); 6790 format %{ "vaddpd $dst,$src1,$src2\t! add packed8D" %} 6791 ins_encode %{ 6792 int vector_len = 2; 6793 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6794 %} 6795 ins_pipe( pipe_slow ); 6796 %} 6797 6798 instruct vadd8D_mem(vecZ dst, vecZ src, memory mem) %{ 6799 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6800 match(Set dst (AddVD src (LoadVector mem))); 6801 format %{ "vaddpd $dst,$src,$mem\t! add packed8D" %} 6802 ins_encode %{ 6803 int vector_len = 2; 6804 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6805 %} 6806 ins_pipe( pipe_slow ); 6807 %} 6808 6809 // --------------------------------- SUB -------------------------------------- 6810 6811 // Bytes vector sub 6812 instruct vsub4B(vecS dst, vecS src) %{ 6813 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 6814 match(Set dst (SubVB dst src)); 6815 format %{ "psubb $dst,$src\t! sub packed4B" %} 6816 ins_encode %{ 6817 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 6818 %} 6819 ins_pipe( pipe_slow ); 6820 %} 6821 6822 instruct vsub4B_reg_avx(vecS dst, vecS src1, vecS src2) %{ 6823 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 6824 match(Set dst (SubVB src1 src2)); 6825 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} 6826 ins_encode %{ 6827 int vector_len = 0; 6828 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6829 %} 6830 ins_pipe( pipe_slow ); 6831 %} 6832 6833 instruct vsub4B_reg_evex(vecS dst, vecS src1, vecS src2) %{ 6834 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 6835 match(Set dst (SubVB src1 src2)); 6836 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} 6837 ins_encode %{ 6838 int vector_len = 0; 6839 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6840 %} 6841 ins_pipe( pipe_slow ); 6842 %} 6843 6844 instruct vsub4B_reg_exex_special(vecS dst, vecS src1, vecS src2) %{ 6845 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 6846 match(Set dst (SubVB dst src2)); 6847 effect(TEMP src1); 6848 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} 6849 ins_encode %{ 6850 int vector_len = 0; 6851 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6852 %} 6853 ins_pipe( pipe_slow ); 6854 %} 6855 6856 instruct vsub4B_mem_avx(vecS dst, vecS src, memory mem) %{ 6857 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 6858 match(Set dst (SubVB src (LoadVector mem))); 6859 format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %} 6860 ins_encode %{ 6861 int vector_len = 0; 6862 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6863 %} 6864 ins_pipe( pipe_slow ); 6865 %} 6866 6867 instruct vsub4B_mem_evex(vecS dst, vecS src, memory mem) %{ 6868 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 6869 match(Set dst (SubVB src (LoadVector mem))); 6870 format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %} 6871 ins_encode %{ 6872 int vector_len = 0; 6873 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6874 %} 6875 ins_pipe( pipe_slow ); 6876 %} 6877 6878 instruct vsub4B_mem_evex_special(vecS dst, vecS src, memory mem) %{ 6879 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 6880 match(Set dst (SubVB dst (LoadVector mem))); 6881 effect(TEMP src); 6882 format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %} 6883 ins_encode %{ 6884 int vector_len = 0; 6885 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6886 %} 6887 ins_pipe( pipe_slow ); 6888 %} 6889 6890 instruct vsub8B(vecD dst, vecD src) %{ 6891 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 6892 match(Set dst (SubVB dst src)); 6893 format %{ "psubb $dst,$src\t! sub packed8B" %} 6894 ins_encode %{ 6895 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 6896 %} 6897 ins_pipe( pipe_slow ); 6898 %} 6899 6900 instruct vsub8B_reg_avx(vecD dst, vecD src1, vecD src2) %{ 6901 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 6902 match(Set dst (SubVB src1 src2)); 6903 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} 6904 ins_encode %{ 6905 int vector_len = 0; 6906 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6907 %} 6908 ins_pipe( pipe_slow ); 6909 %} 6910 6911 instruct vsub8B_reg_evex(vecD dst, vecD src1, vecD src2) %{ 6912 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 6913 match(Set dst (SubVB src1 src2)); 6914 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} 6915 ins_encode %{ 6916 int vector_len = 0; 6917 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6918 %} 6919 ins_pipe( pipe_slow ); 6920 %} 6921 6922 instruct vsub8B_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 6923 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 6924 match(Set dst (SubVB dst src2)); 6925 effect(TEMP src1); 6926 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} 6927 ins_encode %{ 6928 int vector_len = 0; 6929 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6930 %} 6931 ins_pipe( pipe_slow ); 6932 %} 6933 6934 instruct vsub8B_mem_avx(vecD dst, vecD src, memory mem) %{ 6935 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 6936 match(Set dst (SubVB src (LoadVector mem))); 6937 format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %} 6938 ins_encode %{ 6939 int vector_len = 0; 6940 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6941 %} 6942 ins_pipe( pipe_slow ); 6943 %} 6944 6945 instruct vsub8B_mem_evex(vecD dst, vecD src, memory mem) %{ 6946 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 6947 match(Set dst (SubVB src (LoadVector mem))); 6948 format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %} 6949 ins_encode %{ 6950 int vector_len = 0; 6951 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6952 %} 6953 ins_pipe( pipe_slow ); 6954 %} 6955 6956 instruct vsub8B_mem_evex_special(vecD dst, vecD src, memory mem) %{ 6957 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 6958 match(Set dst (SubVB dst (LoadVector mem))); 6959 effect(TEMP src); 6960 format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %} 6961 ins_encode %{ 6962 int vector_len = 0; 6963 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6964 %} 6965 ins_pipe( pipe_slow ); 6966 %} 6967 6968 instruct vsub16B(vecX dst, vecX src) %{ 6969 predicate(UseAVX == 0 && n->as_Vector()->length() == 16); 6970 match(Set dst (SubVB dst src)); 6971 format %{ "psubb $dst,$src\t! sub packed16B" %} 6972 ins_encode %{ 6973 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 6974 %} 6975 ins_pipe( pipe_slow ); 6976 %} 6977 6978 instruct vsub16B_reg_avx(vecX dst, vecX src1, vecX src2) %{ 6979 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); 6980 match(Set dst (SubVB src1 src2)); 6981 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} 6982 ins_encode %{ 6983 int vector_len = 0; 6984 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6985 %} 6986 ins_pipe( pipe_slow ); 6987 %} 6988 6989 instruct vsub16B_reg_evex(vecX dst, vecX src1, vecX src2) %{ 6990 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 6991 match(Set dst (SubVB src1 src2)); 6992 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} 6993 ins_encode %{ 6994 int vector_len = 0; 6995 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6996 %} 6997 ins_pipe( pipe_slow ); 6998 %} 6999 7000 instruct vsub16B_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 7001 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 7002 match(Set dst (SubVB dst src2)); 7003 effect(TEMP src1); 7004 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} 7005 ins_encode %{ 7006 int vector_len = 0; 7007 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7008 %} 7009 ins_pipe( pipe_slow ); 7010 %} 7011 7012 instruct vsub16B_mem_avx(vecX dst, vecX src, memory mem) %{ 7013 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); 7014 match(Set dst (SubVB src (LoadVector mem))); 7015 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} 7016 ins_encode %{ 7017 int vector_len = 0; 7018 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7019 %} 7020 ins_pipe( pipe_slow ); 7021 %} 7022 7023 instruct vsub16B_mem_evex(vecX dst, vecX src, memory mem) %{ 7024 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 7025 match(Set dst (SubVB src (LoadVector mem))); 7026 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} 7027 ins_encode %{ 7028 int vector_len = 0; 7029 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7030 %} 7031 ins_pipe( pipe_slow ); 7032 %} 7033 7034 instruct vsub16B_mem_evex_special(vecX dst, vecX src, memory mem) %{ 7035 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 7036 match(Set dst (SubVB dst (LoadVector mem))); 7037 effect(TEMP src); 7038 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} 7039 ins_encode %{ 7040 int vector_len = 0; 7041 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7042 %} 7043 ins_pipe( pipe_slow ); 7044 %} 7045 7046 instruct vsub32B_reg_avx(vecY dst, vecY src1, vecY src2) %{ 7047 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32); 7048 match(Set dst (SubVB src1 src2)); 7049 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} 7050 ins_encode %{ 7051 int vector_len = 1; 7052 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7053 %} 7054 ins_pipe( pipe_slow ); 7055 %} 7056 7057 instruct vsub32B_reg_evex(vecY dst, vecY src1, vecY src2) %{ 7058 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 7059 match(Set dst (SubVB src1 src2)); 7060 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} 7061 ins_encode %{ 7062 int vector_len = 1; 7063 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7064 %} 7065 ins_pipe( pipe_slow ); 7066 %} 7067 7068 instruct vsub32B_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 7069 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32); 7070 match(Set dst (SubVB dst src2)); 7071 effect(TEMP src1); 7072 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} 7073 ins_encode %{ 7074 int vector_len = 1; 7075 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7076 %} 7077 ins_pipe( pipe_slow ); 7078 %} 7079 7080 instruct vsub32B_mem_avx(vecY dst, vecY src, memory mem) %{ 7081 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32); 7082 match(Set dst (SubVB src (LoadVector mem))); 7083 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} 7084 ins_encode %{ 7085 int vector_len = 1; 7086 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7087 %} 7088 ins_pipe( pipe_slow ); 7089 %} 7090 7091 instruct vsub32B_mem_evex(vecY dst, vecY src, memory mem) %{ 7092 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 7093 match(Set dst (SubVB src (LoadVector mem))); 7094 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} 7095 ins_encode %{ 7096 int vector_len = 1; 7097 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7098 %} 7099 ins_pipe( pipe_slow ); 7100 %} 7101 7102 instruct vsub32B_mem_evex_special(vecY dst, vecY src, memory mem) %{ 7103 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32); 7104 match(Set dst (SubVB dst (LoadVector mem))); 7105 effect(TEMP src); 7106 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} 7107 ins_encode %{ 7108 int vector_len = 1; 7109 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7110 %} 7111 ins_pipe( pipe_slow ); 7112 %} 7113 7114 instruct vsub64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7115 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 7116 match(Set dst (SubVB src1 src2)); 7117 format %{ "vpsubb $dst,$src1,$src2\t! sub packed64B" %} 7118 ins_encode %{ 7119 int vector_len = 2; 7120 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7121 %} 7122 ins_pipe( pipe_slow ); 7123 %} 7124 7125 instruct vsub64B_mem(vecZ dst, vecZ src, memory mem) %{ 7126 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 7127 match(Set dst (SubVB src (LoadVector mem))); 7128 format %{ "vpsubb $dst,$src,$mem\t! sub packed64B" %} 7129 ins_encode %{ 7130 int vector_len = 2; 7131 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7132 %} 7133 ins_pipe( pipe_slow ); 7134 %} 7135 7136 // Shorts/Chars vector sub 7137 instruct vsub2S(vecS dst, vecS src) %{ 7138 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 7139 match(Set dst (SubVS dst src)); 7140 format %{ "psubw $dst,$src\t! sub packed2S" %} 7141 ins_encode %{ 7142 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 7143 %} 7144 ins_pipe( pipe_slow ); 7145 %} 7146 7147 instruct vsub2S_reg_avx(vecS dst, vecS src1, vecS src2) %{ 7148 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 7149 match(Set dst (SubVS src1 src2)); 7150 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} 7151 ins_encode %{ 7152 int vector_len = 0; 7153 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7154 %} 7155 ins_pipe( pipe_slow ); 7156 %} 7157 7158 instruct vsub2S_reg_evex(vecS dst, vecS src1, vecS src2) %{ 7159 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 7160 match(Set dst (SubVS src1 src2)); 7161 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} 7162 ins_encode %{ 7163 int vector_len = 0; 7164 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7165 %} 7166 ins_pipe( pipe_slow ); 7167 %} 7168 7169 instruct vsub2S_reg_evex_special(vecS dst, vecS src1, vecS src2) %{ 7170 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 7171 match(Set dst (SubVS dst src2)); 7172 effect(TEMP src1); 7173 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} 7174 ins_encode %{ 7175 int vector_len = 0; 7176 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7177 %} 7178 ins_pipe( pipe_slow ); 7179 %} 7180 7181 instruct vsub2S_mem_avx(vecS dst, vecS src, memory mem) %{ 7182 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 7183 match(Set dst (SubVS src (LoadVector mem))); 7184 format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %} 7185 ins_encode %{ 7186 int vector_len = 0; 7187 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7188 %} 7189 ins_pipe( pipe_slow ); 7190 %} 7191 7192 instruct vsub2S_mem_evex(vecS dst, vecS src, memory mem) %{ 7193 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 7194 match(Set dst (SubVS src (LoadVector mem))); 7195 format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %} 7196 ins_encode %{ 7197 int vector_len = 0; 7198 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7199 %} 7200 ins_pipe( pipe_slow ); 7201 %} 7202 7203 instruct vsub2S_mem_evex_special(vecS dst, vecS src, memory mem) %{ 7204 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 7205 match(Set dst (SubVS dst (LoadVector mem))); 7206 effect(TEMP src); 7207 format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %} 7208 ins_encode %{ 7209 int vector_len = 0; 7210 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7211 %} 7212 ins_pipe( pipe_slow ); 7213 %} 7214 7215 instruct vsub4S(vecD dst, vecD src) %{ 7216 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 7217 match(Set dst (SubVS dst src)); 7218 format %{ "psubw $dst,$src\t! sub packed4S" %} 7219 ins_encode %{ 7220 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 7221 %} 7222 ins_pipe( pipe_slow ); 7223 %} 7224 7225 instruct vsub4S_reg_avx(vecD dst, vecD src1, vecD src2) %{ 7226 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 7227 match(Set dst (SubVS src1 src2)); 7228 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} 7229 ins_encode %{ 7230 int vector_len = 0; 7231 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7232 %} 7233 ins_pipe( pipe_slow ); 7234 %} 7235 7236 instruct vsub4S_reg_evex(vecD dst, vecD src1, vecD src2) %{ 7237 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 7238 match(Set dst (SubVS src1 src2)); 7239 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} 7240 ins_encode %{ 7241 int vector_len = 0; 7242 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7243 %} 7244 ins_pipe( pipe_slow ); 7245 %} 7246 7247 instruct vsub4S_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 7248 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 7249 match(Set dst (SubVS dst src2)); 7250 effect(TEMP src1); 7251 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} 7252 ins_encode %{ 7253 int vector_len = 0; 7254 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7255 %} 7256 ins_pipe( pipe_slow ); 7257 %} 7258 7259 instruct vsub4S_mem_avx(vecD dst, vecD src, memory mem) %{ 7260 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 7261 match(Set dst (SubVS src (LoadVector mem))); 7262 format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %} 7263 ins_encode %{ 7264 int vector_len = 0; 7265 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7266 %} 7267 ins_pipe( pipe_slow ); 7268 %} 7269 7270 instruct vsub4S_mem_evex(vecD dst, vecD src, memory mem) %{ 7271 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 7272 match(Set dst (SubVS src (LoadVector mem))); 7273 format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %} 7274 ins_encode %{ 7275 int vector_len = 0; 7276 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7277 %} 7278 ins_pipe( pipe_slow ); 7279 %} 7280 7281 instruct vsub4S_mem_evex_special(vecD dst, vecD src, memory mem) %{ 7282 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 7283 match(Set dst (SubVS dst (LoadVector mem))); 7284 effect(TEMP src); 7285 format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %} 7286 ins_encode %{ 7287 int vector_len = 0; 7288 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7289 %} 7290 ins_pipe( pipe_slow ); 7291 %} 7292 7293 instruct vsub8S(vecX dst, vecX src) %{ 7294 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 7295 match(Set dst (SubVS dst src)); 7296 format %{ "psubw $dst,$src\t! sub packed8S" %} 7297 ins_encode %{ 7298 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 7299 %} 7300 ins_pipe( pipe_slow ); 7301 %} 7302 7303 instruct vsub8S_reg_avx(vecX dst, vecX src1, vecX src2) %{ 7304 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 7305 match(Set dst (SubVS src1 src2)); 7306 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} 7307 ins_encode %{ 7308 int vector_len = 0; 7309 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7310 %} 7311 ins_pipe( pipe_slow ); 7312 %} 7313 7314 instruct vsub8S_reg_evex(vecX dst, vecX src1, vecX src2) %{ 7315 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 7316 match(Set dst (SubVS src1 src2)); 7317 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} 7318 ins_encode %{ 7319 int vector_len = 0; 7320 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7321 %} 7322 ins_pipe( pipe_slow ); 7323 %} 7324 7325 instruct vsub8S_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 7326 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 7327 match(Set dst (SubVS dst src2)); 7328 effect(TEMP src1); 7329 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} 7330 ins_encode %{ 7331 int vector_len = 0; 7332 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7333 %} 7334 ins_pipe( pipe_slow ); 7335 %} 7336 7337 instruct vsub8S_mem_avx(vecX dst, vecX src, memory mem) %{ 7338 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 7339 match(Set dst (SubVS src (LoadVector mem))); 7340 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} 7341 ins_encode %{ 7342 int vector_len = 0; 7343 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7344 %} 7345 ins_pipe( pipe_slow ); 7346 %} 7347 7348 instruct vsub8S_mem_evex(vecX dst, vecX src, memory mem) %{ 7349 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 7350 match(Set dst (SubVS src (LoadVector mem))); 7351 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} 7352 ins_encode %{ 7353 int vector_len = 0; 7354 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7355 %} 7356 ins_pipe( pipe_slow ); 7357 %} 7358 7359 instruct vsub8S_mem_evex_special(vecX dst, vecX src, memory mem) %{ 7360 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 7361 match(Set dst (SubVS dst (LoadVector mem))); 7362 effect(TEMP src); 7363 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} 7364 ins_encode %{ 7365 int vector_len = 0; 7366 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7367 %} 7368 ins_pipe( pipe_slow ); 7369 %} 7370 7371 instruct vsub16S_reg_avx(vecY dst, vecY src1, vecY src2) %{ 7372 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 7373 match(Set dst (SubVS src1 src2)); 7374 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} 7375 ins_encode %{ 7376 int vector_len = 1; 7377 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7378 %} 7379 ins_pipe( pipe_slow ); 7380 %} 7381 7382 instruct vsub16S_reg_evex(vecY dst, vecY src1, vecY src2) %{ 7383 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 7384 match(Set dst (SubVS src1 src2)); 7385 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} 7386 ins_encode %{ 7387 int vector_len = 1; 7388 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7389 %} 7390 ins_pipe( pipe_slow ); 7391 %} 7392 7393 instruct vsub16S_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 7394 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 7395 match(Set dst (SubVS dst src2)); 7396 effect(TEMP src1); 7397 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} 7398 ins_encode %{ 7399 int vector_len = 1; 7400 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7401 %} 7402 ins_pipe( pipe_slow ); 7403 %} 7404 7405 instruct vsub16S_mem_avx(vecY dst, vecY src, memory mem) %{ 7406 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 7407 match(Set dst (SubVS src (LoadVector mem))); 7408 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} 7409 ins_encode %{ 7410 int vector_len = 1; 7411 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7412 %} 7413 ins_pipe( pipe_slow ); 7414 %} 7415 7416 instruct vsub16S_mem_evex(vecY dst, vecY src, memory mem) %{ 7417 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 7418 match(Set dst (SubVS src (LoadVector mem))); 7419 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} 7420 ins_encode %{ 7421 int vector_len = 1; 7422 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7423 %} 7424 ins_pipe( pipe_slow ); 7425 %} 7426 7427 instruct vsub16S_mem_evex_special(vecY dst, vecY src, memory mem) %{ 7428 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 7429 match(Set dst (SubVS dst (LoadVector mem))); 7430 effect(TEMP src); 7431 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} 7432 ins_encode %{ 7433 int vector_len = 1; 7434 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7435 %} 7436 ins_pipe( pipe_slow ); 7437 %} 7438 7439 instruct vsub32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7440 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 7441 match(Set dst (SubVS src1 src2)); 7442 format %{ "vpsubw $dst,$src1,$src2\t! sub packed32S" %} 7443 ins_encode %{ 7444 int vector_len = 2; 7445 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7446 %} 7447 ins_pipe( pipe_slow ); 7448 %} 7449 7450 instruct vsub32S_mem(vecZ dst, vecZ src, memory mem) %{ 7451 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 7452 match(Set dst (SubVS src (LoadVector mem))); 7453 format %{ "vpsubw $dst,$src,$mem\t! sub packed32S" %} 7454 ins_encode %{ 7455 int vector_len = 2; 7456 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7457 %} 7458 ins_pipe( pipe_slow ); 7459 %} 7460 7461 // Integers vector sub 7462 instruct vsub2I(vecD dst, vecD src) %{ 7463 predicate(n->as_Vector()->length() == 2); 7464 match(Set dst (SubVI dst src)); 7465 format %{ "psubd $dst,$src\t! sub packed2I" %} 7466 ins_encode %{ 7467 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 7468 %} 7469 ins_pipe( pipe_slow ); 7470 %} 7471 7472 instruct vsub2I_reg(vecD dst, vecD src1, vecD src2) %{ 7473 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7474 match(Set dst (SubVI src1 src2)); 7475 format %{ "vpsubd $dst,$src1,$src2\t! sub packed2I" %} 7476 ins_encode %{ 7477 int vector_len = 0; 7478 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7479 %} 7480 ins_pipe( pipe_slow ); 7481 %} 7482 7483 instruct vsub2I_mem(vecD dst, vecD src, memory mem) %{ 7484 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7485 match(Set dst (SubVI src (LoadVector mem))); 7486 format %{ "vpsubd $dst,$src,$mem\t! sub packed2I" %} 7487 ins_encode %{ 7488 int vector_len = 0; 7489 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7490 %} 7491 ins_pipe( pipe_slow ); 7492 %} 7493 7494 instruct vsub4I(vecX dst, vecX src) %{ 7495 predicate(n->as_Vector()->length() == 4); 7496 match(Set dst (SubVI dst src)); 7497 format %{ "psubd $dst,$src\t! sub packed4I" %} 7498 ins_encode %{ 7499 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 7500 %} 7501 ins_pipe( pipe_slow ); 7502 %} 7503 7504 instruct vsub4I_reg(vecX dst, vecX src1, vecX src2) %{ 7505 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7506 match(Set dst (SubVI src1 src2)); 7507 format %{ "vpsubd $dst,$src1,$src2\t! sub packed4I" %} 7508 ins_encode %{ 7509 int vector_len = 0; 7510 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7511 %} 7512 ins_pipe( pipe_slow ); 7513 %} 7514 7515 instruct vsub4I_mem(vecX dst, vecX src, memory mem) %{ 7516 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7517 match(Set dst (SubVI src (LoadVector mem))); 7518 format %{ "vpsubd $dst,$src,$mem\t! sub packed4I" %} 7519 ins_encode %{ 7520 int vector_len = 0; 7521 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7522 %} 7523 ins_pipe( pipe_slow ); 7524 %} 7525 7526 instruct vsub8I_reg(vecY dst, vecY src1, vecY src2) %{ 7527 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 7528 match(Set dst (SubVI src1 src2)); 7529 format %{ "vpsubd $dst,$src1,$src2\t! sub packed8I" %} 7530 ins_encode %{ 7531 int vector_len = 1; 7532 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7533 %} 7534 ins_pipe( pipe_slow ); 7535 %} 7536 7537 instruct vsub8I_mem(vecY dst, vecY src, memory mem) %{ 7538 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 7539 match(Set dst (SubVI src (LoadVector mem))); 7540 format %{ "vpsubd $dst,$src,$mem\t! sub packed8I" %} 7541 ins_encode %{ 7542 int vector_len = 1; 7543 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7544 %} 7545 ins_pipe( pipe_slow ); 7546 %} 7547 7548 instruct vsub16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7549 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7550 match(Set dst (SubVI src1 src2)); 7551 format %{ "vpsubd $dst,$src1,$src2\t! sub packed16I" %} 7552 ins_encode %{ 7553 int vector_len = 2; 7554 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7555 %} 7556 ins_pipe( pipe_slow ); 7557 %} 7558 7559 instruct vsub16I_mem(vecZ dst, vecZ src, memory mem) %{ 7560 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7561 match(Set dst (SubVI src (LoadVector mem))); 7562 format %{ "vpsubd $dst,$src,$mem\t! sub packed16I" %} 7563 ins_encode %{ 7564 int vector_len = 2; 7565 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7566 %} 7567 ins_pipe( pipe_slow ); 7568 %} 7569 7570 // Longs vector sub 7571 instruct vsub2L(vecX dst, vecX src) %{ 7572 predicate(n->as_Vector()->length() == 2); 7573 match(Set dst (SubVL dst src)); 7574 format %{ "psubq $dst,$src\t! sub packed2L" %} 7575 ins_encode %{ 7576 __ psubq($dst$$XMMRegister, $src$$XMMRegister); 7577 %} 7578 ins_pipe( pipe_slow ); 7579 %} 7580 7581 instruct vsub2L_reg(vecX dst, vecX src1, vecX src2) %{ 7582 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7583 match(Set dst (SubVL src1 src2)); 7584 format %{ "vpsubq $dst,$src1,$src2\t! sub packed2L" %} 7585 ins_encode %{ 7586 int vector_len = 0; 7587 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7588 %} 7589 ins_pipe( pipe_slow ); 7590 %} 7591 7592 instruct vsub2L_mem(vecX dst, vecX src, memory mem) %{ 7593 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7594 match(Set dst (SubVL src (LoadVector mem))); 7595 format %{ "vpsubq $dst,$src,$mem\t! sub packed2L" %} 7596 ins_encode %{ 7597 int vector_len = 0; 7598 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7599 %} 7600 ins_pipe( pipe_slow ); 7601 %} 7602 7603 instruct vsub4L_reg(vecY dst, vecY src1, vecY src2) %{ 7604 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 7605 match(Set dst (SubVL src1 src2)); 7606 format %{ "vpsubq $dst,$src1,$src2\t! sub packed4L" %} 7607 ins_encode %{ 7608 int vector_len = 1; 7609 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7610 %} 7611 ins_pipe( pipe_slow ); 7612 %} 7613 7614 instruct vsub4L_mem(vecY dst, vecY src, memory mem) %{ 7615 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 7616 match(Set dst (SubVL src (LoadVector mem))); 7617 format %{ "vpsubq $dst,$src,$mem\t! sub packed4L" %} 7618 ins_encode %{ 7619 int vector_len = 1; 7620 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7621 %} 7622 ins_pipe( pipe_slow ); 7623 %} 7624 7625 instruct vsub8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7626 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7627 match(Set dst (SubVL src1 src2)); 7628 format %{ "vpsubq $dst,$src1,$src2\t! sub packed8L" %} 7629 ins_encode %{ 7630 int vector_len = 2; 7631 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7632 %} 7633 ins_pipe( pipe_slow ); 7634 %} 7635 7636 instruct vsub8L_mem(vecZ dst, vecZ src, memory mem) %{ 7637 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7638 match(Set dst (SubVL src (LoadVector mem))); 7639 format %{ "vpsubq $dst,$src,$mem\t! sub packed8L" %} 7640 ins_encode %{ 7641 int vector_len = 2; 7642 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7643 %} 7644 ins_pipe( pipe_slow ); 7645 %} 7646 7647 // Floats vector sub 7648 instruct vsub2F(vecD dst, vecD src) %{ 7649 predicate(n->as_Vector()->length() == 2); 7650 match(Set dst (SubVF dst src)); 7651 format %{ "subps $dst,$src\t! sub packed2F" %} 7652 ins_encode %{ 7653 __ subps($dst$$XMMRegister, $src$$XMMRegister); 7654 %} 7655 ins_pipe( pipe_slow ); 7656 %} 7657 7658 instruct vsub2F_reg(vecD dst, vecD src1, vecD src2) %{ 7659 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7660 match(Set dst (SubVF src1 src2)); 7661 format %{ "vsubps $dst,$src1,$src2\t! sub packed2F" %} 7662 ins_encode %{ 7663 int vector_len = 0; 7664 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7665 %} 7666 ins_pipe( pipe_slow ); 7667 %} 7668 7669 instruct vsub2F_mem(vecD dst, vecD src, memory mem) %{ 7670 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7671 match(Set dst (SubVF src (LoadVector mem))); 7672 format %{ "vsubps $dst,$src,$mem\t! sub packed2F" %} 7673 ins_encode %{ 7674 int vector_len = 0; 7675 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7676 %} 7677 ins_pipe( pipe_slow ); 7678 %} 7679 7680 instruct vsub4F(vecX dst, vecX src) %{ 7681 predicate(n->as_Vector()->length() == 4); 7682 match(Set dst (SubVF dst src)); 7683 format %{ "subps $dst,$src\t! sub packed4F" %} 7684 ins_encode %{ 7685 __ subps($dst$$XMMRegister, $src$$XMMRegister); 7686 %} 7687 ins_pipe( pipe_slow ); 7688 %} 7689 7690 instruct vsub4F_reg(vecX dst, vecX src1, vecX src2) %{ 7691 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7692 match(Set dst (SubVF src1 src2)); 7693 format %{ "vsubps $dst,$src1,$src2\t! sub packed4F" %} 7694 ins_encode %{ 7695 int vector_len = 0; 7696 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7697 %} 7698 ins_pipe( pipe_slow ); 7699 %} 7700 7701 instruct vsub4F_mem(vecX dst, vecX src, memory mem) %{ 7702 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7703 match(Set dst (SubVF src (LoadVector mem))); 7704 format %{ "vsubps $dst,$src,$mem\t! sub packed4F" %} 7705 ins_encode %{ 7706 int vector_len = 0; 7707 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7708 %} 7709 ins_pipe( pipe_slow ); 7710 %} 7711 7712 instruct vsub8F_reg(vecY dst, vecY src1, vecY src2) %{ 7713 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7714 match(Set dst (SubVF src1 src2)); 7715 format %{ "vsubps $dst,$src1,$src2\t! sub packed8F" %} 7716 ins_encode %{ 7717 int vector_len = 1; 7718 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7719 %} 7720 ins_pipe( pipe_slow ); 7721 %} 7722 7723 instruct vsub8F_mem(vecY dst, vecY src, memory mem) %{ 7724 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7725 match(Set dst (SubVF src (LoadVector mem))); 7726 format %{ "vsubps $dst,$src,$mem\t! sub packed8F" %} 7727 ins_encode %{ 7728 int vector_len = 1; 7729 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7730 %} 7731 ins_pipe( pipe_slow ); 7732 %} 7733 7734 instruct vsub16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7735 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7736 match(Set dst (SubVF src1 src2)); 7737 format %{ "vsubps $dst,$src1,$src2\t! sub packed16F" %} 7738 ins_encode %{ 7739 int vector_len = 2; 7740 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7741 %} 7742 ins_pipe( pipe_slow ); 7743 %} 7744 7745 instruct vsub16F_mem(vecZ dst, vecZ src, memory mem) %{ 7746 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7747 match(Set dst (SubVF src (LoadVector mem))); 7748 format %{ "vsubps $dst,$src,$mem\t! sub packed16F" %} 7749 ins_encode %{ 7750 int vector_len = 2; 7751 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7752 %} 7753 ins_pipe( pipe_slow ); 7754 %} 7755 7756 // Doubles vector sub 7757 instruct vsub2D(vecX dst, vecX src) %{ 7758 predicate(n->as_Vector()->length() == 2); 7759 match(Set dst (SubVD dst src)); 7760 format %{ "subpd $dst,$src\t! sub packed2D" %} 7761 ins_encode %{ 7762 __ subpd($dst$$XMMRegister, $src$$XMMRegister); 7763 %} 7764 ins_pipe( pipe_slow ); 7765 %} 7766 7767 instruct vsub2D_reg(vecX dst, vecX src1, vecX src2) %{ 7768 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7769 match(Set dst (SubVD src1 src2)); 7770 format %{ "vsubpd $dst,$src1,$src2\t! sub packed2D" %} 7771 ins_encode %{ 7772 int vector_len = 0; 7773 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7774 %} 7775 ins_pipe( pipe_slow ); 7776 %} 7777 7778 instruct vsub2D_mem(vecX dst, vecX src, memory mem) %{ 7779 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7780 match(Set dst (SubVD src (LoadVector mem))); 7781 format %{ "vsubpd $dst,$src,$mem\t! sub packed2D" %} 7782 ins_encode %{ 7783 int vector_len = 0; 7784 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7785 %} 7786 ins_pipe( pipe_slow ); 7787 %} 7788 7789 instruct vsub4D_reg(vecY dst, vecY src1, vecY src2) %{ 7790 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7791 match(Set dst (SubVD src1 src2)); 7792 format %{ "vsubpd $dst,$src1,$src2\t! sub packed4D" %} 7793 ins_encode %{ 7794 int vector_len = 1; 7795 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7796 %} 7797 ins_pipe( pipe_slow ); 7798 %} 7799 7800 instruct vsub4D_mem(vecY dst, vecY src, memory mem) %{ 7801 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7802 match(Set dst (SubVD src (LoadVector mem))); 7803 format %{ "vsubpd $dst,$src,$mem\t! sub packed4D" %} 7804 ins_encode %{ 7805 int vector_len = 1; 7806 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7807 %} 7808 ins_pipe( pipe_slow ); 7809 %} 7810 7811 instruct vsub8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7812 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7813 match(Set dst (SubVD src1 src2)); 7814 format %{ "vsubpd $dst,$src1,$src2\t! sub packed8D" %} 7815 ins_encode %{ 7816 int vector_len = 2; 7817 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7818 %} 7819 ins_pipe( pipe_slow ); 7820 %} 7821 7822 instruct vsub8D_mem(vecZ dst, vecZ src, memory mem) %{ 7823 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7824 match(Set dst (SubVD src (LoadVector mem))); 7825 format %{ "vsubpd $dst,$src,$mem\t! sub packed8D" %} 7826 ins_encode %{ 7827 int vector_len = 2; 7828 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7829 %} 7830 ins_pipe( pipe_slow ); 7831 %} 7832 7833 // --------------------------------- MUL -------------------------------------- 7834 7835 // Shorts/Chars vector mul 7836 instruct vmul2S(vecS dst, vecS src) %{ 7837 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 7838 match(Set dst (MulVS dst src)); 7839 format %{ "pmullw $dst,$src\t! mul packed2S" %} 7840 ins_encode %{ 7841 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 7842 %} 7843 ins_pipe( pipe_slow ); 7844 %} 7845 7846 instruct vmul2S_reg_avx(vecS dst, vecS src1, vecS src2) %{ 7847 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 7848 match(Set dst (MulVS src1 src2)); 7849 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} 7850 ins_encode %{ 7851 int vector_len = 0; 7852 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7853 %} 7854 ins_pipe( pipe_slow ); 7855 %} 7856 7857 instruct vmul2S_reg_evex(vecS dst, vecS src1, vecS src2) %{ 7858 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 7859 match(Set dst (MulVS src1 src2)); 7860 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} 7861 ins_encode %{ 7862 int vector_len = 0; 7863 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7864 %} 7865 ins_pipe( pipe_slow ); 7866 %} 7867 7868 instruct vmul2S_evex_special(vecS dst, vecS src1, vecS src2) %{ 7869 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 7870 match(Set dst (MulVS dst src2)); 7871 effect(TEMP src1); 7872 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} 7873 ins_encode %{ 7874 int vector_len = 0; 7875 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7876 %} 7877 ins_pipe( pipe_slow ); 7878 %} 7879 7880 instruct vmul2S_mem_avx(vecS dst, vecS src, memory mem) %{ 7881 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 7882 match(Set dst (MulVS src (LoadVector mem))); 7883 format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %} 7884 ins_encode %{ 7885 int vector_len = 0; 7886 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7887 %} 7888 ins_pipe( pipe_slow ); 7889 %} 7890 7891 instruct vmul2S_mem_evex(vecS dst, vecS src, memory mem) %{ 7892 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 7893 match(Set dst (MulVS src (LoadVector mem))); 7894 format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %} 7895 ins_encode %{ 7896 int vector_len = 0; 7897 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7898 %} 7899 ins_pipe( pipe_slow ); 7900 %} 7901 7902 instruct vmul2S_mem_evex_special(vecS dst, vecS src, memory mem) %{ 7903 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 7904 match(Set dst (MulVS dst (LoadVector mem))); 7905 effect(TEMP src); 7906 format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %} 7907 ins_encode %{ 7908 int vector_len = 0; 7909 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7910 %} 7911 ins_pipe( pipe_slow ); 7912 %} 7913 7914 instruct vmul4S(vecD dst, vecD src) %{ 7915 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 7916 match(Set dst (MulVS dst src)); 7917 format %{ "pmullw $dst,$src\t! mul packed4S" %} 7918 ins_encode %{ 7919 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 7920 %} 7921 ins_pipe( pipe_slow ); 7922 %} 7923 7924 instruct vmul4S_reg_avx(vecD dst, vecD src1, vecD src2) %{ 7925 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 7926 match(Set dst (MulVS src1 src2)); 7927 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} 7928 ins_encode %{ 7929 int vector_len = 0; 7930 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7931 %} 7932 ins_pipe( pipe_slow ); 7933 %} 7934 7935 instruct vmul4S_reg_evex(vecD dst, vecD src1, vecD src2) %{ 7936 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 7937 match(Set dst (MulVS src1 src2)); 7938 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} 7939 ins_encode %{ 7940 int vector_len = 0; 7941 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7942 %} 7943 ins_pipe( pipe_slow ); 7944 %} 7945 7946 instruct vmul4S_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 7947 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 7948 match(Set dst (MulVS dst src2)); 7949 effect(TEMP src1); 7950 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} 7951 ins_encode %{ 7952 int vector_len = 0; 7953 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7954 %} 7955 ins_pipe( pipe_slow ); 7956 %} 7957 7958 instruct vmul4S_mem_avx(vecD dst, vecD src, memory mem) %{ 7959 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 7960 match(Set dst (MulVS src (LoadVector mem))); 7961 format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %} 7962 ins_encode %{ 7963 int vector_len = 0; 7964 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7965 %} 7966 ins_pipe( pipe_slow ); 7967 %} 7968 7969 instruct vmul4S_mem_evex(vecD dst, vecD src, memory mem) %{ 7970 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 7971 match(Set dst (MulVS src (LoadVector mem))); 7972 format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %} 7973 ins_encode %{ 7974 int vector_len = 0; 7975 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7976 %} 7977 ins_pipe( pipe_slow ); 7978 %} 7979 7980 instruct vmul4S_mem_evex_special(vecD dst, vecD src, memory mem) %{ 7981 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 7982 match(Set dst (MulVS dst (LoadVector mem))); 7983 effect(TEMP src); 7984 format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %} 7985 ins_encode %{ 7986 int vector_len = 0; 7987 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7988 %} 7989 ins_pipe( pipe_slow ); 7990 %} 7991 7992 instruct vmul8S(vecX dst, vecX src) %{ 7993 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 7994 match(Set dst (MulVS dst src)); 7995 format %{ "pmullw $dst,$src\t! mul packed8S" %} 7996 ins_encode %{ 7997 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 7998 %} 7999 ins_pipe( pipe_slow ); 8000 %} 8001 8002 instruct vmul8S_reg_avx(vecX dst, vecX src1, vecX src2) %{ 8003 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 8004 match(Set dst (MulVS src1 src2)); 8005 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} 8006 ins_encode %{ 8007 int vector_len = 0; 8008 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8009 %} 8010 ins_pipe( pipe_slow ); 8011 %} 8012 8013 instruct vmul8S_reg_evex(vecX dst, vecX src1, vecX src2) %{ 8014 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 8015 match(Set dst (MulVS src1 src2)); 8016 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} 8017 ins_encode %{ 8018 int vector_len = 0; 8019 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8020 %} 8021 ins_pipe( pipe_slow ); 8022 %} 8023 8024 instruct vmul8S_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 8025 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 8026 match(Set dst (MulVS dst src2)); 8027 effect(TEMP src1); 8028 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} 8029 ins_encode %{ 8030 int vector_len = 0; 8031 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8032 %} 8033 ins_pipe( pipe_slow ); 8034 %} 8035 8036 instruct vmul8S_mem_avx(vecX dst, vecX src, memory mem) %{ 8037 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 8038 match(Set dst (MulVS src (LoadVector mem))); 8039 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} 8040 ins_encode %{ 8041 int vector_len = 0; 8042 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8043 %} 8044 ins_pipe( pipe_slow ); 8045 %} 8046 8047 instruct vmul8S_mem_evex(vecX dst, vecX src, memory mem) %{ 8048 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 8049 match(Set dst (MulVS src (LoadVector mem))); 8050 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} 8051 ins_encode %{ 8052 int vector_len = 0; 8053 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8054 %} 8055 ins_pipe( pipe_slow ); 8056 %} 8057 8058 instruct vmul8S_mem_evex_special(vecX dst, vecX src, memory mem) %{ 8059 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 8060 match(Set dst (MulVS dst (LoadVector mem))); 8061 effect(TEMP src); 8062 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} 8063 ins_encode %{ 8064 int vector_len = 0; 8065 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8066 %} 8067 ins_pipe( pipe_slow ); 8068 %} 8069 8070 instruct vmul16S_reg_avx(vecY dst, vecY src1, vecY src2) %{ 8071 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 8072 match(Set dst (MulVS src1 src2)); 8073 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} 8074 ins_encode %{ 8075 int vector_len = 1; 8076 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8077 %} 8078 ins_pipe( pipe_slow ); 8079 %} 8080 8081 instruct vmul16S_reg_evex(vecY dst, vecY src1, vecY src2) %{ 8082 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 8083 match(Set dst (MulVS src1 src2)); 8084 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} 8085 ins_encode %{ 8086 int vector_len = 1; 8087 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8088 %} 8089 ins_pipe( pipe_slow ); 8090 %} 8091 8092 instruct vmul16S_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 8093 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 8094 match(Set dst (MulVS dst src2)); 8095 effect(TEMP src1); 8096 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} 8097 ins_encode %{ 8098 int vector_len = 1; 8099 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8100 %} 8101 ins_pipe( pipe_slow ); 8102 %} 8103 8104 instruct vmul16S_mem_avx(vecY dst, vecY src, memory mem) %{ 8105 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 8106 match(Set dst (MulVS src (LoadVector mem))); 8107 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} 8108 ins_encode %{ 8109 int vector_len = 1; 8110 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8111 %} 8112 ins_pipe( pipe_slow ); 8113 %} 8114 8115 instruct vmul16S_mem_evex(vecY dst, vecY src, memory mem) %{ 8116 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 8117 match(Set dst (MulVS src (LoadVector mem))); 8118 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} 8119 ins_encode %{ 8120 int vector_len = 1; 8121 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8122 %} 8123 ins_pipe( pipe_slow ); 8124 %} 8125 8126 instruct vmul16S_mem_evex_special(vecY dst, vecY src, memory mem) %{ 8127 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 8128 match(Set dst (MulVS dst (LoadVector mem))); 8129 effect(TEMP src); 8130 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} 8131 ins_encode %{ 8132 int vector_len = 1; 8133 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8134 %} 8135 ins_pipe( pipe_slow ); 8136 %} 8137 8138 instruct vmul32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8139 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 8140 match(Set dst (MulVS src1 src2)); 8141 format %{ "vpmullw $dst,$src1,$src2\t! mul packed32S" %} 8142 ins_encode %{ 8143 int vector_len = 2; 8144 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8145 %} 8146 ins_pipe( pipe_slow ); 8147 %} 8148 8149 instruct vmul32S_mem(vecZ dst, vecZ src, memory mem) %{ 8150 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 8151 match(Set dst (MulVS src (LoadVector mem))); 8152 format %{ "vpmullw $dst,$src,$mem\t! mul packed32S" %} 8153 ins_encode %{ 8154 int vector_len = 2; 8155 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8156 %} 8157 ins_pipe( pipe_slow ); 8158 %} 8159 8160 // Integers vector mul (sse4_1) 8161 instruct vmul2I(vecD dst, vecD src) %{ 8162 predicate(UseSSE > 3 && n->as_Vector()->length() == 2); 8163 match(Set dst (MulVI dst src)); 8164 format %{ "pmulld $dst,$src\t! mul packed2I" %} 8165 ins_encode %{ 8166 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 8167 %} 8168 ins_pipe( pipe_slow ); 8169 %} 8170 8171 instruct vmul2I_reg(vecD dst, vecD src1, vecD src2) %{ 8172 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8173 match(Set dst (MulVI src1 src2)); 8174 format %{ "vpmulld $dst,$src1,$src2\t! mul packed2I" %} 8175 ins_encode %{ 8176 int vector_len = 0; 8177 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8178 %} 8179 ins_pipe( pipe_slow ); 8180 %} 8181 8182 instruct vmul2I_mem(vecD dst, vecD src, memory mem) %{ 8183 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8184 match(Set dst (MulVI src (LoadVector mem))); 8185 format %{ "vpmulld $dst,$src,$mem\t! mul packed2I" %} 8186 ins_encode %{ 8187 int vector_len = 0; 8188 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8189 %} 8190 ins_pipe( pipe_slow ); 8191 %} 8192 8193 instruct vmul4I(vecX dst, vecX src) %{ 8194 predicate(UseSSE > 3 && n->as_Vector()->length() == 4); 8195 match(Set dst (MulVI dst src)); 8196 format %{ "pmulld $dst,$src\t! mul packed4I" %} 8197 ins_encode %{ 8198 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 8199 %} 8200 ins_pipe( pipe_slow ); 8201 %} 8202 8203 instruct vmul4I_reg(vecX dst, vecX src1, vecX src2) %{ 8204 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8205 match(Set dst (MulVI src1 src2)); 8206 format %{ "vpmulld $dst,$src1,$src2\t! mul packed4I" %} 8207 ins_encode %{ 8208 int vector_len = 0; 8209 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8210 %} 8211 ins_pipe( pipe_slow ); 8212 %} 8213 8214 instruct vmul4I_mem(vecX dst, vecX src, memory mem) %{ 8215 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8216 match(Set dst (MulVI src (LoadVector mem))); 8217 format %{ "vpmulld $dst,$src,$mem\t! mul packed4I" %} 8218 ins_encode %{ 8219 int vector_len = 0; 8220 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8221 %} 8222 ins_pipe( pipe_slow ); 8223 %} 8224 8225 instruct vmul2L_reg(vecX dst, vecX src1, vecX src2) %{ 8226 predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && VM_Version::supports_avx512dq()); 8227 match(Set dst (MulVL src1 src2)); 8228 format %{ "vpmullq $dst,$src1,$src2\t! mul packed2L" %} 8229 ins_encode %{ 8230 int vector_len = 0; 8231 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8232 %} 8233 ins_pipe( pipe_slow ); 8234 %} 8235 8236 instruct vmul2L_mem(vecX dst, vecX src, memory mem) %{ 8237 predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && VM_Version::supports_avx512dq()); 8238 match(Set dst (MulVL src (LoadVector mem))); 8239 format %{ "vpmullq $dst,$src,$mem\t! mul packed2L" %} 8240 ins_encode %{ 8241 int vector_len = 0; 8242 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8243 %} 8244 ins_pipe( pipe_slow ); 8245 %} 8246 8247 instruct vmul4L_reg(vecY dst, vecY src1, vecY src2) %{ 8248 predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && VM_Version::supports_avx512dq()); 8249 match(Set dst (MulVL src1 src2)); 8250 format %{ "vpmullq $dst,$src1,$src2\t! mul packed4L" %} 8251 ins_encode %{ 8252 int vector_len = 1; 8253 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8254 %} 8255 ins_pipe( pipe_slow ); 8256 %} 8257 8258 instruct vmul4L_mem(vecY dst, vecY src, memory mem) %{ 8259 predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && VM_Version::supports_avx512dq()); 8260 match(Set dst (MulVL src (LoadVector mem))); 8261 format %{ "vpmullq $dst,$src,$mem\t! mul packed4L" %} 8262 ins_encode %{ 8263 int vector_len = 1; 8264 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8265 %} 8266 ins_pipe( pipe_slow ); 8267 %} 8268 8269 instruct vmul8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8270 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq()); 8271 match(Set dst (MulVL src1 src2)); 8272 format %{ "vpmullq $dst,$src1,$src2\t! mul packed8L" %} 8273 ins_encode %{ 8274 int vector_len = 2; 8275 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8276 %} 8277 ins_pipe( pipe_slow ); 8278 %} 8279 8280 instruct vmul8L_mem(vecZ dst, vecZ src, memory mem) %{ 8281 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq()); 8282 match(Set dst (MulVL src (LoadVector mem))); 8283 format %{ "vpmullq $dst,$src,$mem\t! mul packed8L" %} 8284 ins_encode %{ 8285 int vector_len = 2; 8286 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8287 %} 8288 ins_pipe( pipe_slow ); 8289 %} 8290 8291 instruct vmul8I_reg(vecY dst, vecY src1, vecY src2) %{ 8292 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 8293 match(Set dst (MulVI src1 src2)); 8294 format %{ "vpmulld $dst,$src1,$src2\t! mul packed8I" %} 8295 ins_encode %{ 8296 int vector_len = 1; 8297 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8298 %} 8299 ins_pipe( pipe_slow ); 8300 %} 8301 8302 instruct vmul8I_mem(vecY dst, vecY src, memory mem) %{ 8303 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 8304 match(Set dst (MulVI src (LoadVector mem))); 8305 format %{ "vpmulld $dst,$src,$mem\t! mul packed8I" %} 8306 ins_encode %{ 8307 int vector_len = 1; 8308 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8309 %} 8310 ins_pipe( pipe_slow ); 8311 %} 8312 8313 instruct vmul16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8314 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8315 match(Set dst (MulVI src1 src2)); 8316 format %{ "vpmulld $dst,$src1,$src2\t! mul packed16I" %} 8317 ins_encode %{ 8318 int vector_len = 2; 8319 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8320 %} 8321 ins_pipe( pipe_slow ); 8322 %} 8323 8324 instruct vmul16I_mem(vecZ dst, vecZ src, memory mem) %{ 8325 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8326 match(Set dst (MulVI src (LoadVector mem))); 8327 format %{ "vpmulld $dst,$src,$mem\t! mul packed16I" %} 8328 ins_encode %{ 8329 int vector_len = 2; 8330 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8331 %} 8332 ins_pipe( pipe_slow ); 8333 %} 8334 8335 // Floats vector mul 8336 instruct vmul2F(vecD dst, vecD src) %{ 8337 predicate(n->as_Vector()->length() == 2); 8338 match(Set dst (MulVF dst src)); 8339 format %{ "mulps $dst,$src\t! mul packed2F" %} 8340 ins_encode %{ 8341 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 8342 %} 8343 ins_pipe( pipe_slow ); 8344 %} 8345 8346 instruct vmul2F_reg(vecD dst, vecD src1, vecD src2) %{ 8347 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8348 match(Set dst (MulVF src1 src2)); 8349 format %{ "vmulps $dst,$src1,$src2\t! mul packed2F" %} 8350 ins_encode %{ 8351 int vector_len = 0; 8352 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8353 %} 8354 ins_pipe( pipe_slow ); 8355 %} 8356 8357 instruct vmul2F_mem(vecD dst, vecD src, memory mem) %{ 8358 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8359 match(Set dst (MulVF src (LoadVector mem))); 8360 format %{ "vmulps $dst,$src,$mem\t! mul packed2F" %} 8361 ins_encode %{ 8362 int vector_len = 0; 8363 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8364 %} 8365 ins_pipe( pipe_slow ); 8366 %} 8367 8368 instruct vmul4F(vecX dst, vecX src) %{ 8369 predicate(n->as_Vector()->length() == 4); 8370 match(Set dst (MulVF dst src)); 8371 format %{ "mulps $dst,$src\t! mul packed4F" %} 8372 ins_encode %{ 8373 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 8374 %} 8375 ins_pipe( pipe_slow ); 8376 %} 8377 8378 instruct vmul4F_reg(vecX dst, vecX src1, vecX src2) %{ 8379 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8380 match(Set dst (MulVF src1 src2)); 8381 format %{ "vmulps $dst,$src1,$src2\t! mul packed4F" %} 8382 ins_encode %{ 8383 int vector_len = 0; 8384 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8385 %} 8386 ins_pipe( pipe_slow ); 8387 %} 8388 8389 instruct vmul4F_mem(vecX dst, vecX src, memory mem) %{ 8390 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8391 match(Set dst (MulVF src (LoadVector mem))); 8392 format %{ "vmulps $dst,$src,$mem\t! mul packed4F" %} 8393 ins_encode %{ 8394 int vector_len = 0; 8395 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8396 %} 8397 ins_pipe( pipe_slow ); 8398 %} 8399 8400 instruct vmul8F_reg(vecY dst, vecY src1, vecY src2) %{ 8401 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8402 match(Set dst (MulVF src1 src2)); 8403 format %{ "vmulps $dst,$src1,$src2\t! mul packed8F" %} 8404 ins_encode %{ 8405 int vector_len = 1; 8406 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8407 %} 8408 ins_pipe( pipe_slow ); 8409 %} 8410 8411 instruct vmul8F_mem(vecY dst, vecY src, memory mem) %{ 8412 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8413 match(Set dst (MulVF src (LoadVector mem))); 8414 format %{ "vmulps $dst,$src,$mem\t! mul packed8F" %} 8415 ins_encode %{ 8416 int vector_len = 1; 8417 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8418 %} 8419 ins_pipe( pipe_slow ); 8420 %} 8421 8422 instruct vmul16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8423 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8424 match(Set dst (MulVF src1 src2)); 8425 format %{ "vmulps $dst,$src1,$src2\t! mul packed16F" %} 8426 ins_encode %{ 8427 int vector_len = 2; 8428 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8429 %} 8430 ins_pipe( pipe_slow ); 8431 %} 8432 8433 instruct vmul16F_mem(vecZ dst, vecZ src, memory mem) %{ 8434 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8435 match(Set dst (MulVF src (LoadVector mem))); 8436 format %{ "vmulps $dst,$src,$mem\t! mul packed16F" %} 8437 ins_encode %{ 8438 int vector_len = 2; 8439 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8440 %} 8441 ins_pipe( pipe_slow ); 8442 %} 8443 8444 // Doubles vector mul 8445 instruct vmul2D(vecX dst, vecX src) %{ 8446 predicate(n->as_Vector()->length() == 2); 8447 match(Set dst (MulVD dst src)); 8448 format %{ "mulpd $dst,$src\t! mul packed2D" %} 8449 ins_encode %{ 8450 __ mulpd($dst$$XMMRegister, $src$$XMMRegister); 8451 %} 8452 ins_pipe( pipe_slow ); 8453 %} 8454 8455 instruct vmul2D_reg(vecX dst, vecX src1, vecX src2) %{ 8456 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8457 match(Set dst (MulVD src1 src2)); 8458 format %{ "vmulpd $dst,$src1,$src2\t! mul packed2D" %} 8459 ins_encode %{ 8460 int vector_len = 0; 8461 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8462 %} 8463 ins_pipe( pipe_slow ); 8464 %} 8465 8466 instruct vmul2D_mem(vecX dst, vecX src, memory mem) %{ 8467 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8468 match(Set dst (MulVD src (LoadVector mem))); 8469 format %{ "vmulpd $dst,$src,$mem\t! mul packed2D" %} 8470 ins_encode %{ 8471 int vector_len = 0; 8472 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8473 %} 8474 ins_pipe( pipe_slow ); 8475 %} 8476 8477 instruct vmul4D_reg(vecY dst, vecY src1, vecY src2) %{ 8478 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8479 match(Set dst (MulVD src1 src2)); 8480 format %{ "vmulpd $dst,$src1,$src2\t! mul packed4D" %} 8481 ins_encode %{ 8482 int vector_len = 1; 8483 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8484 %} 8485 ins_pipe( pipe_slow ); 8486 %} 8487 8488 instruct vmul4D_mem(vecY dst, vecY src, memory mem) %{ 8489 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8490 match(Set dst (MulVD src (LoadVector mem))); 8491 format %{ "vmulpd $dst,$src,$mem\t! mul packed4D" %} 8492 ins_encode %{ 8493 int vector_len = 1; 8494 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8495 %} 8496 ins_pipe( pipe_slow ); 8497 %} 8498 8499 instruct vmul8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8500 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8501 match(Set dst (MulVD src1 src2)); 8502 format %{ "vmulpd $dst k0,$src1,$src2\t! mul packed8D" %} 8503 ins_encode %{ 8504 int vector_len = 2; 8505 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8506 %} 8507 ins_pipe( pipe_slow ); 8508 %} 8509 8510 instruct vmul8D_mem(vecZ dst, vecZ src, memory mem) %{ 8511 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8512 match(Set dst (MulVD src (LoadVector mem))); 8513 format %{ "vmulpd $dst k0,$src,$mem\t! mul packed8D" %} 8514 ins_encode %{ 8515 int vector_len = 2; 8516 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8517 %} 8518 ins_pipe( pipe_slow ); 8519 %} 8520 8521 instruct vcmov4D_reg(vecY dst, vecY src1, vecY src2, immI8 cop, cmpOp_vcmppd copnd) %{ 8522 predicate(UseAVX > 0 && UseAVX < 3 && n->as_Vector()->length() == 4); 8523 match(Set dst (CMoveVD (Binary copnd cop) (Binary src1 src2))); 8524 effect(TEMP dst, USE src1, USE src2); 8525 format %{ "cmppd.$copnd $dst, $src1, $src2 ! vcmovevd, cond=$cop\n\t" 8526 "vpblendd $dst,$src1,$src2,$dst ! vcmovevd\n\t" 8527 %} 8528 ins_encode %{ 8529 int vector_len = 1; 8530 int cond = (Assembler::Condition)($copnd$$cmpcode); 8531 __ cmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cond, vector_len); 8532 __ vpblendd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vector_len); 8533 %} 8534 ins_pipe( pipe_slow ); 8535 %} 8536 8537 // --------------------------------- DIV -------------------------------------- 8538 8539 // Floats vector div 8540 instruct vdiv2F(vecD dst, vecD src) %{ 8541 predicate(n->as_Vector()->length() == 2); 8542 match(Set dst (DivVF dst src)); 8543 format %{ "divps $dst,$src\t! div packed2F" %} 8544 ins_encode %{ 8545 __ divps($dst$$XMMRegister, $src$$XMMRegister); 8546 %} 8547 ins_pipe( pipe_slow ); 8548 %} 8549 8550 instruct vdiv2F_reg(vecD dst, vecD src1, vecD src2) %{ 8551 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8552 match(Set dst (DivVF src1 src2)); 8553 format %{ "vdivps $dst,$src1,$src2\t! div packed2F" %} 8554 ins_encode %{ 8555 int vector_len = 0; 8556 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8557 %} 8558 ins_pipe( pipe_slow ); 8559 %} 8560 8561 instruct vdiv2F_mem(vecD dst, vecD src, memory mem) %{ 8562 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8563 match(Set dst (DivVF src (LoadVector mem))); 8564 format %{ "vdivps $dst,$src,$mem\t! div packed2F" %} 8565 ins_encode %{ 8566 int vector_len = 0; 8567 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8568 %} 8569 ins_pipe( pipe_slow ); 8570 %} 8571 8572 instruct vdiv4F(vecX dst, vecX src) %{ 8573 predicate(n->as_Vector()->length() == 4); 8574 match(Set dst (DivVF dst src)); 8575 format %{ "divps $dst,$src\t! div packed4F" %} 8576 ins_encode %{ 8577 __ divps($dst$$XMMRegister, $src$$XMMRegister); 8578 %} 8579 ins_pipe( pipe_slow ); 8580 %} 8581 8582 instruct vdiv4F_reg(vecX dst, vecX src1, vecX src2) %{ 8583 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8584 match(Set dst (DivVF src1 src2)); 8585 format %{ "vdivps $dst,$src1,$src2\t! div packed4F" %} 8586 ins_encode %{ 8587 int vector_len = 0; 8588 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8589 %} 8590 ins_pipe( pipe_slow ); 8591 %} 8592 8593 instruct vdiv4F_mem(vecX dst, vecX src, memory mem) %{ 8594 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8595 match(Set dst (DivVF src (LoadVector mem))); 8596 format %{ "vdivps $dst,$src,$mem\t! div packed4F" %} 8597 ins_encode %{ 8598 int vector_len = 0; 8599 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8600 %} 8601 ins_pipe( pipe_slow ); 8602 %} 8603 8604 instruct vdiv8F_reg(vecY dst, vecY src1, vecY src2) %{ 8605 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8606 match(Set dst (DivVF src1 src2)); 8607 format %{ "vdivps $dst,$src1,$src2\t! div packed8F" %} 8608 ins_encode %{ 8609 int vector_len = 1; 8610 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8611 %} 8612 ins_pipe( pipe_slow ); 8613 %} 8614 8615 instruct vdiv8F_mem(vecY dst, vecY src, memory mem) %{ 8616 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8617 match(Set dst (DivVF src (LoadVector mem))); 8618 format %{ "vdivps $dst,$src,$mem\t! div packed8F" %} 8619 ins_encode %{ 8620 int vector_len = 1; 8621 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8622 %} 8623 ins_pipe( pipe_slow ); 8624 %} 8625 8626 instruct vdiv16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8627 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 8628 match(Set dst (DivVF src1 src2)); 8629 format %{ "vdivps $dst,$src1,$src2\t! div packed16F" %} 8630 ins_encode %{ 8631 int vector_len = 2; 8632 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8633 %} 8634 ins_pipe( pipe_slow ); 8635 %} 8636 8637 instruct vdiv16F_mem(vecZ dst, vecZ src, memory mem) %{ 8638 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 8639 match(Set dst (DivVF src (LoadVector mem))); 8640 format %{ "vdivps $dst,$src,$mem\t! div packed16F" %} 8641 ins_encode %{ 8642 int vector_len = 2; 8643 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8644 %} 8645 ins_pipe( pipe_slow ); 8646 %} 8647 8648 // Doubles vector div 8649 instruct vdiv2D(vecX dst, vecX src) %{ 8650 predicate(n->as_Vector()->length() == 2); 8651 match(Set dst (DivVD dst src)); 8652 format %{ "divpd $dst,$src\t! div packed2D" %} 8653 ins_encode %{ 8654 __ divpd($dst$$XMMRegister, $src$$XMMRegister); 8655 %} 8656 ins_pipe( pipe_slow ); 8657 %} 8658 8659 instruct vdiv2D_reg(vecX dst, vecX src1, vecX src2) %{ 8660 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8661 match(Set dst (DivVD src1 src2)); 8662 format %{ "vdivpd $dst,$src1,$src2\t! div packed2D" %} 8663 ins_encode %{ 8664 int vector_len = 0; 8665 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8666 %} 8667 ins_pipe( pipe_slow ); 8668 %} 8669 8670 instruct vdiv2D_mem(vecX dst, vecX src, memory mem) %{ 8671 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8672 match(Set dst (DivVD src (LoadVector mem))); 8673 format %{ "vdivpd $dst,$src,$mem\t! div packed2D" %} 8674 ins_encode %{ 8675 int vector_len = 0; 8676 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8677 %} 8678 ins_pipe( pipe_slow ); 8679 %} 8680 8681 instruct vdiv4D_reg(vecY dst, vecY src1, vecY src2) %{ 8682 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8683 match(Set dst (DivVD src1 src2)); 8684 format %{ "vdivpd $dst,$src1,$src2\t! div packed4D" %} 8685 ins_encode %{ 8686 int vector_len = 1; 8687 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8688 %} 8689 ins_pipe( pipe_slow ); 8690 %} 8691 8692 instruct vdiv4D_mem(vecY dst, vecY src, memory mem) %{ 8693 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8694 match(Set dst (DivVD src (LoadVector mem))); 8695 format %{ "vdivpd $dst,$src,$mem\t! div packed4D" %} 8696 ins_encode %{ 8697 int vector_len = 1; 8698 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8699 %} 8700 ins_pipe( pipe_slow ); 8701 %} 8702 8703 instruct vdiv8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8704 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8705 match(Set dst (DivVD src1 src2)); 8706 format %{ "vdivpd $dst,$src1,$src2\t! div packed8D" %} 8707 ins_encode %{ 8708 int vector_len = 2; 8709 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8710 %} 8711 ins_pipe( pipe_slow ); 8712 %} 8713 8714 instruct vdiv8D_mem(vecZ dst, vecZ src, memory mem) %{ 8715 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8716 match(Set dst (DivVD src (LoadVector mem))); 8717 format %{ "vdivpd $dst,$src,$mem\t! div packed8D" %} 8718 ins_encode %{ 8719 int vector_len = 2; 8720 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8721 %} 8722 ins_pipe( pipe_slow ); 8723 %} 8724 8725 // ------------------------------ Shift --------------------------------------- 8726 8727 // Left and right shift count vectors are the same on x86 8728 // (only lowest bits of xmm reg are used for count). 8729 instruct vshiftcnt(vecS dst, rRegI cnt) %{ 8730 match(Set dst (LShiftCntV cnt)); 8731 match(Set dst (RShiftCntV cnt)); 8732 format %{ "movd $dst,$cnt\t! load shift count" %} 8733 ins_encode %{ 8734 __ movdl($dst$$XMMRegister, $cnt$$Register); 8735 %} 8736 ins_pipe( pipe_slow ); 8737 %} 8738 8739 // --------------------------------- Sqrt -------------------------------------- 8740 8741 // Floating point vector sqrt - double precision only 8742 instruct vsqrt2D_reg(vecX dst, vecX src) %{ 8743 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8744 match(Set dst (SqrtVD src)); 8745 format %{ "vsqrtpd $dst,$src\t! sqrt packed2D" %} 8746 ins_encode %{ 8747 int vector_len = 0; 8748 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8749 %} 8750 ins_pipe( pipe_slow ); 8751 %} 8752 8753 instruct vsqrt2D_mem(vecX dst, memory mem) %{ 8754 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8755 match(Set dst (SqrtVD (LoadVector mem))); 8756 format %{ "vsqrtpd $dst,$mem\t! sqrt packed2D" %} 8757 ins_encode %{ 8758 int vector_len = 0; 8759 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); 8760 %} 8761 ins_pipe( pipe_slow ); 8762 %} 8763 8764 instruct vsqrt4D_reg(vecY dst, vecY src) %{ 8765 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8766 match(Set dst (SqrtVD src)); 8767 format %{ "vsqrtpd $dst,$src\t! sqrt packed4D" %} 8768 ins_encode %{ 8769 int vector_len = 1; 8770 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8771 %} 8772 ins_pipe( pipe_slow ); 8773 %} 8774 8775 instruct vsqrt4D_mem(vecY dst, memory mem) %{ 8776 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8777 match(Set dst (SqrtVD (LoadVector mem))); 8778 format %{ "vsqrtpd $dst,$mem\t! sqrt packed4D" %} 8779 ins_encode %{ 8780 int vector_len = 1; 8781 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); 8782 %} 8783 ins_pipe( pipe_slow ); 8784 %} 8785 8786 instruct vsqrt8D_reg(vecZ dst, vecZ src) %{ 8787 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8788 match(Set dst (SqrtVD src)); 8789 format %{ "vsqrtpd $dst,$src\t! sqrt packed8D" %} 8790 ins_encode %{ 8791 int vector_len = 2; 8792 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8793 %} 8794 ins_pipe( pipe_slow ); 8795 %} 8796 8797 instruct vsqrt8D_mem(vecZ dst, memory mem) %{ 8798 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8799 match(Set dst (SqrtVD (LoadVector mem))); 8800 format %{ "vsqrtpd $dst,$mem\t! sqrt packed8D" %} 8801 ins_encode %{ 8802 int vector_len = 2; 8803 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); 8804 %} 8805 ins_pipe( pipe_slow ); 8806 %} 8807 8808 // ------------------------------ LeftShift ----------------------------------- 8809 8810 // Shorts/Chars vector left shift 8811 instruct vsll2S(vecS dst, vecS shift) %{ 8812 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8813 match(Set dst (LShiftVS dst shift)); 8814 format %{ "psllw $dst,$shift\t! left shift packed2S" %} 8815 ins_encode %{ 8816 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 8817 %} 8818 ins_pipe( pipe_slow ); 8819 %} 8820 8821 instruct vsll2S_imm(vecS dst, immI8 shift) %{ 8822 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8823 match(Set dst (LShiftVS dst shift)); 8824 format %{ "psllw $dst,$shift\t! left shift packed2S" %} 8825 ins_encode %{ 8826 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 8827 %} 8828 ins_pipe( pipe_slow ); 8829 %} 8830 8831 instruct vsll2S_reg_avx(vecS dst, vecS src, vecS shift) %{ 8832 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 8833 match(Set dst (LShiftVS src shift)); 8834 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 8835 ins_encode %{ 8836 int vector_len = 0; 8837 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8838 %} 8839 ins_pipe( pipe_slow ); 8840 %} 8841 8842 instruct vsll2S_reg_evex(vecS dst, vecS src, vecS shift) %{ 8843 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 8844 match(Set dst (LShiftVS src shift)); 8845 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 8846 ins_encode %{ 8847 int vector_len = 0; 8848 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8849 %} 8850 ins_pipe( pipe_slow ); 8851 %} 8852 8853 instruct vsll2S_reg_evex_special(vecS dst, vecS src, vecS shift) %{ 8854 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 8855 match(Set dst (LShiftVS dst shift)); 8856 effect(TEMP src); 8857 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 8858 ins_encode %{ 8859 int vector_len = 0; 8860 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8861 %} 8862 ins_pipe( pipe_slow ); 8863 %} 8864 8865 instruct vsll2S_reg_imm_avx(vecS dst, vecS src, immI8 shift) %{ 8866 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 8867 match(Set dst (LShiftVS src shift)); 8868 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 8869 ins_encode %{ 8870 int vector_len = 0; 8871 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8872 %} 8873 ins_pipe( pipe_slow ); 8874 %} 8875 8876 instruct vsll2S_reg_imm_evex(vecS dst, vecS src, immI8 shift) %{ 8877 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 8878 match(Set dst (LShiftVS src shift)); 8879 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 8880 ins_encode %{ 8881 int vector_len = 0; 8882 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8883 %} 8884 ins_pipe( pipe_slow ); 8885 %} 8886 8887 instruct vsll2S_reg_imm_evex_special(vecS dst, vecS src, immI8 shift) %{ 8888 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 8889 match(Set dst (LShiftVS dst shift)); 8890 effect(TEMP src); 8891 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 8892 ins_encode %{ 8893 int vector_len = 0; 8894 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8895 %} 8896 ins_pipe( pipe_slow ); 8897 %} 8898 8899 instruct vsll4S(vecD dst, vecS shift) %{ 8900 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 8901 match(Set dst (LShiftVS dst shift)); 8902 format %{ "psllw $dst,$shift\t! left shift packed4S" %} 8903 ins_encode %{ 8904 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 8905 %} 8906 ins_pipe( pipe_slow ); 8907 %} 8908 8909 instruct vsll4S_imm(vecD dst, immI8 shift) %{ 8910 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 8911 match(Set dst (LShiftVS dst shift)); 8912 format %{ "psllw $dst,$shift\t! left shift packed4S" %} 8913 ins_encode %{ 8914 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 8915 %} 8916 ins_pipe( pipe_slow ); 8917 %} 8918 8919 instruct vsll4S_reg_avx(vecD dst, vecD src, vecS shift) %{ 8920 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 8921 match(Set dst (LShiftVS src shift)); 8922 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 8923 ins_encode %{ 8924 int vector_len = 0; 8925 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8926 %} 8927 ins_pipe( pipe_slow ); 8928 %} 8929 8930 instruct vsll4S_reg_evex(vecD dst, vecD src, vecS shift) %{ 8931 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 8932 match(Set dst (LShiftVS src shift)); 8933 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 8934 ins_encode %{ 8935 int vector_len = 0; 8936 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8937 %} 8938 ins_pipe( pipe_slow ); 8939 %} 8940 8941 instruct vsll4S_reg_evex_special(vecD dst, vecD src, vecS shift) %{ 8942 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 8943 match(Set dst (LShiftVS dst shift)); 8944 effect(TEMP src); 8945 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 8946 ins_encode %{ 8947 int vector_len = 0; 8948 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8949 %} 8950 ins_pipe( pipe_slow ); 8951 %} 8952 8953 instruct vsll4S_reg_imm_avx(vecD dst, vecD src, immI8 shift) %{ 8954 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 8955 match(Set dst (LShiftVS src shift)); 8956 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 8957 ins_encode %{ 8958 int vector_len = 0; 8959 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8960 %} 8961 ins_pipe( pipe_slow ); 8962 %} 8963 8964 instruct vsll4S_reg_imm_evex(vecD dst, vecD src, immI8 shift) %{ 8965 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 8966 match(Set dst (LShiftVS src shift)); 8967 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 8968 ins_encode %{ 8969 int vector_len = 0; 8970 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8971 %} 8972 ins_pipe( pipe_slow ); 8973 %} 8974 8975 instruct vsll4S_reg_imm_evex_special(vecD dst, vecD src, immI8 shift) %{ 8976 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 8977 match(Set dst (LShiftVS dst shift)); 8978 effect(TEMP src); 8979 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 8980 ins_encode %{ 8981 int vector_len = 0; 8982 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8983 %} 8984 ins_pipe( pipe_slow ); 8985 %} 8986 8987 instruct vsll8S(vecX dst, vecS shift) %{ 8988 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 8989 match(Set dst (LShiftVS dst shift)); 8990 format %{ "psllw $dst,$shift\t! left shift packed8S" %} 8991 ins_encode %{ 8992 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 8993 %} 8994 ins_pipe( pipe_slow ); 8995 %} 8996 8997 instruct vsll8S_imm(vecX dst, immI8 shift) %{ 8998 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 8999 match(Set dst (LShiftVS dst shift)); 9000 format %{ "psllw $dst,$shift\t! left shift packed8S" %} 9001 ins_encode %{ 9002 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 9003 %} 9004 ins_pipe( pipe_slow ); 9005 %} 9006 9007 instruct vsll8S_reg_avx(vecX dst, vecX src, vecS shift) %{ 9008 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 9009 match(Set dst (LShiftVS src shift)); 9010 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 9011 ins_encode %{ 9012 int vector_len = 0; 9013 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9014 %} 9015 ins_pipe( pipe_slow ); 9016 %} 9017 9018 instruct vsll8S_reg_evex(vecX dst, vecX src, vecS shift) %{ 9019 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 9020 match(Set dst (LShiftVS src shift)); 9021 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 9022 ins_encode %{ 9023 int vector_len = 0; 9024 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9025 %} 9026 ins_pipe( pipe_slow ); 9027 %} 9028 9029 instruct vsll8S_reg_evex_special(vecX dst, vecX src, vecS shift) %{ 9030 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 9031 match(Set dst (LShiftVS dst shift)); 9032 effect(TEMP src); 9033 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 9034 ins_encode %{ 9035 int vector_len = 0; 9036 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9037 %} 9038 ins_pipe( pipe_slow ); 9039 %} 9040 9041 instruct vsll8S_reg_imm_avx(vecX dst, vecX src, immI8 shift) %{ 9042 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 9043 match(Set dst (LShiftVS src shift)); 9044 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 9045 ins_encode %{ 9046 int vector_len = 0; 9047 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9048 %} 9049 ins_pipe( pipe_slow ); 9050 %} 9051 9052 instruct vsll8S_reg_imm_evex(vecX dst, vecX src, immI8 shift) %{ 9053 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 9054 match(Set dst (LShiftVS src shift)); 9055 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 9056 ins_encode %{ 9057 int vector_len = 0; 9058 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9059 %} 9060 ins_pipe( pipe_slow ); 9061 %} 9062 9063 instruct vsll8S_reg_imm_evex_special(vecX dst, vecX src, immI8 shift) %{ 9064 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 9065 match(Set dst (LShiftVS dst shift)); 9066 effect(TEMP src); 9067 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 9068 ins_encode %{ 9069 int vector_len = 0; 9070 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9071 %} 9072 ins_pipe( pipe_slow ); 9073 %} 9074 9075 instruct vsll16S_reg_avx(vecY dst, vecY src, vecS shift) %{ 9076 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 9077 match(Set dst (LShiftVS src shift)); 9078 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 9079 ins_encode %{ 9080 int vector_len = 1; 9081 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9082 %} 9083 ins_pipe( pipe_slow ); 9084 %} 9085 9086 instruct vsll16S_reg_evex(vecY dst, vecY src, vecS shift) %{ 9087 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 9088 match(Set dst (LShiftVS src shift)); 9089 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 9090 ins_encode %{ 9091 int vector_len = 1; 9092 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9093 %} 9094 ins_pipe( pipe_slow ); 9095 %} 9096 9097 instruct vsll16S_reg_evex_special(vecY dst, vecY src, vecS shift) %{ 9098 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 9099 match(Set dst (LShiftVS dst shift)); 9100 effect(TEMP src); 9101 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 9102 ins_encode %{ 9103 int vector_len = 1; 9104 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9105 %} 9106 ins_pipe( pipe_slow ); 9107 %} 9108 9109 instruct vsll16S_reg_imm_avx(vecY dst, vecY src, immI8 shift) %{ 9110 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 9111 match(Set dst (LShiftVS src shift)); 9112 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 9113 ins_encode %{ 9114 int vector_len = 1; 9115 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9116 %} 9117 ins_pipe( pipe_slow ); 9118 %} 9119 9120 instruct vsll16S_reg_imm_evex(vecY dst, vecY src, immI8 shift) %{ 9121 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 9122 match(Set dst (LShiftVS src shift)); 9123 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 9124 ins_encode %{ 9125 int vector_len = 1; 9126 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9127 %} 9128 ins_pipe( pipe_slow ); 9129 %} 9130 9131 instruct vsll16S_reg_imm_evex_special(vecY dst, vecY src, immI8 shift) %{ 9132 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 9133 match(Set dst (LShiftVS dst shift)); 9134 effect(TEMP src); 9135 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 9136 ins_encode %{ 9137 int vector_len = 1; 9138 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9139 %} 9140 ins_pipe( pipe_slow ); 9141 %} 9142 9143 instruct vsll32S_reg(vecZ dst, vecZ src, vecS shift) %{ 9144 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 9145 match(Set dst (LShiftVS src shift)); 9146 format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %} 9147 ins_encode %{ 9148 int vector_len = 2; 9149 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9150 %} 9151 ins_pipe( pipe_slow ); 9152 %} 9153 9154 instruct vsll32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 9155 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 9156 match(Set dst (LShiftVS src shift)); 9157 format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %} 9158 ins_encode %{ 9159 int vector_len = 2; 9160 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9161 %} 9162 ins_pipe( pipe_slow ); 9163 %} 9164 9165 // Integers vector left shift 9166 instruct vsll2I(vecD dst, vecS shift) %{ 9167 predicate(n->as_Vector()->length() == 2); 9168 match(Set dst (LShiftVI dst shift)); 9169 format %{ "pslld $dst,$shift\t! left shift packed2I" %} 9170 ins_encode %{ 9171 __ pslld($dst$$XMMRegister, $shift$$XMMRegister); 9172 %} 9173 ins_pipe( pipe_slow ); 9174 %} 9175 9176 instruct vsll2I_imm(vecD dst, immI8 shift) %{ 9177 predicate(n->as_Vector()->length() == 2); 9178 match(Set dst (LShiftVI dst shift)); 9179 format %{ "pslld $dst,$shift\t! left shift packed2I" %} 9180 ins_encode %{ 9181 __ pslld($dst$$XMMRegister, (int)$shift$$constant); 9182 %} 9183 ins_pipe( pipe_slow ); 9184 %} 9185 9186 instruct vsll2I_reg(vecD dst, vecD src, vecS shift) %{ 9187 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9188 match(Set dst (LShiftVI src shift)); 9189 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} 9190 ins_encode %{ 9191 int vector_len = 0; 9192 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9193 %} 9194 ins_pipe( pipe_slow ); 9195 %} 9196 9197 instruct vsll2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 9198 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9199 match(Set dst (LShiftVI src shift)); 9200 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} 9201 ins_encode %{ 9202 int vector_len = 0; 9203 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9204 %} 9205 ins_pipe( pipe_slow ); 9206 %} 9207 9208 instruct vsll4I(vecX dst, vecS shift) %{ 9209 predicate(n->as_Vector()->length() == 4); 9210 match(Set dst (LShiftVI dst shift)); 9211 format %{ "pslld $dst,$shift\t! left shift packed4I" %} 9212 ins_encode %{ 9213 __ pslld($dst$$XMMRegister, $shift$$XMMRegister); 9214 %} 9215 ins_pipe( pipe_slow ); 9216 %} 9217 9218 instruct vsll4I_imm(vecX dst, immI8 shift) %{ 9219 predicate(n->as_Vector()->length() == 4); 9220 match(Set dst (LShiftVI dst shift)); 9221 format %{ "pslld $dst,$shift\t! left shift packed4I" %} 9222 ins_encode %{ 9223 __ pslld($dst$$XMMRegister, (int)$shift$$constant); 9224 %} 9225 ins_pipe( pipe_slow ); 9226 %} 9227 9228 instruct vsll4I_reg(vecX dst, vecX src, vecS shift) %{ 9229 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9230 match(Set dst (LShiftVI src shift)); 9231 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} 9232 ins_encode %{ 9233 int vector_len = 0; 9234 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9235 %} 9236 ins_pipe( pipe_slow ); 9237 %} 9238 9239 instruct vsll4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 9240 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9241 match(Set dst (LShiftVI src shift)); 9242 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} 9243 ins_encode %{ 9244 int vector_len = 0; 9245 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9246 %} 9247 ins_pipe( pipe_slow ); 9248 %} 9249 9250 instruct vsll8I_reg(vecY dst, vecY src, vecS shift) %{ 9251 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 9252 match(Set dst (LShiftVI src shift)); 9253 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} 9254 ins_encode %{ 9255 int vector_len = 1; 9256 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9257 %} 9258 ins_pipe( pipe_slow ); 9259 %} 9260 9261 instruct vsll8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 9262 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 9263 match(Set dst (LShiftVI src shift)); 9264 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} 9265 ins_encode %{ 9266 int vector_len = 1; 9267 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9268 %} 9269 ins_pipe( pipe_slow ); 9270 %} 9271 9272 instruct vsll16I_reg(vecZ dst, vecZ src, vecS shift) %{ 9273 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 9274 match(Set dst (LShiftVI src shift)); 9275 format %{ "vpslld $dst,$src,$shift\t! left shift packed16I" %} 9276 ins_encode %{ 9277 int vector_len = 2; 9278 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9279 %} 9280 ins_pipe( pipe_slow ); 9281 %} 9282 9283 instruct vsll16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 9284 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 9285 match(Set dst (LShiftVI src shift)); 9286 format %{ "vpslld $dst,$src,$shift\t! left shift packed16I" %} 9287 ins_encode %{ 9288 int vector_len = 2; 9289 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9290 %} 9291 ins_pipe( pipe_slow ); 9292 %} 9293 9294 // Longs vector left shift 9295 instruct vsll2L(vecX dst, vecS shift) %{ 9296 predicate(n->as_Vector()->length() == 2); 9297 match(Set dst (LShiftVL dst shift)); 9298 format %{ "psllq $dst,$shift\t! left shift packed2L" %} 9299 ins_encode %{ 9300 __ psllq($dst$$XMMRegister, $shift$$XMMRegister); 9301 %} 9302 ins_pipe( pipe_slow ); 9303 %} 9304 9305 instruct vsll2L_imm(vecX dst, immI8 shift) %{ 9306 predicate(n->as_Vector()->length() == 2); 9307 match(Set dst (LShiftVL dst shift)); 9308 format %{ "psllq $dst,$shift\t! left shift packed2L" %} 9309 ins_encode %{ 9310 __ psllq($dst$$XMMRegister, (int)$shift$$constant); 9311 %} 9312 ins_pipe( pipe_slow ); 9313 %} 9314 9315 instruct vsll2L_reg(vecX dst, vecX src, vecS shift) %{ 9316 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9317 match(Set dst (LShiftVL src shift)); 9318 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} 9319 ins_encode %{ 9320 int vector_len = 0; 9321 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9322 %} 9323 ins_pipe( pipe_slow ); 9324 %} 9325 9326 instruct vsll2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ 9327 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9328 match(Set dst (LShiftVL src shift)); 9329 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} 9330 ins_encode %{ 9331 int vector_len = 0; 9332 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9333 %} 9334 ins_pipe( pipe_slow ); 9335 %} 9336 9337 instruct vsll4L_reg(vecY dst, vecY src, vecS shift) %{ 9338 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 9339 match(Set dst (LShiftVL src shift)); 9340 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} 9341 ins_encode %{ 9342 int vector_len = 1; 9343 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9344 %} 9345 ins_pipe( pipe_slow ); 9346 %} 9347 9348 instruct vsll4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ 9349 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 9350 match(Set dst (LShiftVL src shift)); 9351 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} 9352 ins_encode %{ 9353 int vector_len = 1; 9354 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9355 %} 9356 ins_pipe( pipe_slow ); 9357 %} 9358 9359 instruct vsll8L_reg(vecZ dst, vecZ src, vecS shift) %{ 9360 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 9361 match(Set dst (LShiftVL src shift)); 9362 format %{ "vpsllq $dst,$src,$shift\t! left shift packed8L" %} 9363 ins_encode %{ 9364 int vector_len = 2; 9365 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9366 %} 9367 ins_pipe( pipe_slow ); 9368 %} 9369 9370 instruct vsll8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 9371 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 9372 match(Set dst (LShiftVL src shift)); 9373 format %{ "vpsllq $dst,$src,$shift\t! left shift packed8L" %} 9374 ins_encode %{ 9375 int vector_len = 2; 9376 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9377 %} 9378 ins_pipe( pipe_slow ); 9379 %} 9380 9381 // ----------------------- LogicalRightShift ----------------------------------- 9382 9383 // Shorts vector logical right shift produces incorrect Java result 9384 // for negative data because java code convert short value into int with 9385 // sign extension before a shift. But char vectors are fine since chars are 9386 // unsigned values. 9387 9388 instruct vsrl2S(vecS dst, vecS shift) %{ 9389 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 9390 match(Set dst (URShiftVS dst shift)); 9391 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} 9392 ins_encode %{ 9393 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 9394 %} 9395 ins_pipe( pipe_slow ); 9396 %} 9397 9398 instruct vsrl2S_imm(vecS dst, immI8 shift) %{ 9399 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 9400 match(Set dst (URShiftVS dst shift)); 9401 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} 9402 ins_encode %{ 9403 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 9404 %} 9405 ins_pipe( pipe_slow ); 9406 %} 9407 9408 instruct vsrl2S_reg_avx(vecS dst, vecS src, vecS shift) %{ 9409 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 9410 match(Set dst (URShiftVS src shift)); 9411 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 9412 ins_encode %{ 9413 int vector_len = 0; 9414 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9415 %} 9416 ins_pipe( pipe_slow ); 9417 %} 9418 9419 instruct vsrl2S_reg_evex(vecS dst, vecS src, vecS shift) %{ 9420 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 9421 match(Set dst (URShiftVS src shift)); 9422 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 9423 ins_encode %{ 9424 int vector_len = 0; 9425 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9426 %} 9427 ins_pipe( pipe_slow ); 9428 %} 9429 9430 instruct vsrl2S_reg_evex_special(vecS dst, vecS src, vecS shift) %{ 9431 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 9432 match(Set dst (URShiftVS dst shift)); 9433 effect(TEMP src); 9434 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 9435 ins_encode %{ 9436 int vector_len = 0; 9437 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9438 %} 9439 ins_pipe( pipe_slow ); 9440 %} 9441 9442 instruct vsrl2S_reg_imm_avx(vecS dst, vecS src, immI8 shift) %{ 9443 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 9444 match(Set dst (URShiftVS src shift)); 9445 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 9446 ins_encode %{ 9447 int vector_len = 0; 9448 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9449 %} 9450 ins_pipe( pipe_slow ); 9451 %} 9452 9453 instruct vsrl2S_reg_imm_evex(vecS dst, vecS src, immI8 shift) %{ 9454 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 9455 match(Set dst (URShiftVS src shift)); 9456 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 9457 ins_encode %{ 9458 int vector_len = 0; 9459 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9460 %} 9461 ins_pipe( pipe_slow ); 9462 %} 9463 9464 instruct vsrl2S_reg_imm_evex_special(vecS dst, vecS src, immI8 shift) %{ 9465 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 9466 match(Set dst (URShiftVS dst shift)); 9467 effect(TEMP src); 9468 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 9469 ins_encode %{ 9470 int vector_len = 0; 9471 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9472 %} 9473 ins_pipe( pipe_slow ); 9474 %} 9475 9476 instruct vsrl4S(vecD dst, vecS shift) %{ 9477 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 9478 match(Set dst (URShiftVS dst shift)); 9479 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} 9480 ins_encode %{ 9481 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 9482 %} 9483 ins_pipe( pipe_slow ); 9484 %} 9485 9486 instruct vsrl4S_imm(vecD dst, immI8 shift) %{ 9487 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 9488 match(Set dst (URShiftVS dst shift)); 9489 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} 9490 ins_encode %{ 9491 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 9492 %} 9493 ins_pipe( pipe_slow ); 9494 %} 9495 9496 instruct vsrl4S_reg_avx(vecD dst, vecD src, vecS shift) %{ 9497 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 9498 match(Set dst (URShiftVS src shift)); 9499 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 9500 ins_encode %{ 9501 int vector_len = 0; 9502 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9503 %} 9504 ins_pipe( pipe_slow ); 9505 %} 9506 9507 instruct vsrl4S_reg_evex(vecD dst, vecD src, vecS shift) %{ 9508 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 9509 match(Set dst (URShiftVS src shift)); 9510 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 9511 ins_encode %{ 9512 int vector_len = 0; 9513 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9514 %} 9515 ins_pipe( pipe_slow ); 9516 %} 9517 9518 instruct vsrl4S_reg_evex_special(vecD dst, vecD src, vecS shift) %{ 9519 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 9520 match(Set dst (URShiftVS dst shift)); 9521 effect(TEMP src); 9522 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 9523 ins_encode %{ 9524 int vector_len = 0; 9525 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9526 %} 9527 ins_pipe( pipe_slow ); 9528 %} 9529 9530 instruct vsrl4S_reg_imm_avx(vecD dst, vecD src, immI8 shift) %{ 9531 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 9532 match(Set dst (URShiftVS src shift)); 9533 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 9534 ins_encode %{ 9535 int vector_len = 0; 9536 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9537 %} 9538 ins_pipe( pipe_slow ); 9539 %} 9540 9541 instruct vsrl4S_reg_imm_evex(vecD dst, vecD src, immI8 shift) %{ 9542 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 9543 match(Set dst (URShiftVS src shift)); 9544 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 9545 ins_encode %{ 9546 int vector_len = 0; 9547 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9548 %} 9549 ins_pipe( pipe_slow ); 9550 %} 9551 9552 instruct vsrl4S_reg_imm_evex_special(vecD dst, vecD src, immI8 shift) %{ 9553 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 9554 match(Set dst (URShiftVS dst shift)); 9555 effect(TEMP src); 9556 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 9557 ins_encode %{ 9558 int vector_len = 0; 9559 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9560 %} 9561 ins_pipe( pipe_slow ); 9562 %} 9563 9564 instruct vsrl8S(vecX dst, vecS shift) %{ 9565 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 9566 match(Set dst (URShiftVS dst shift)); 9567 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} 9568 ins_encode %{ 9569 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 9570 %} 9571 ins_pipe( pipe_slow ); 9572 %} 9573 9574 instruct vsrl8S_imm(vecX dst, immI8 shift) %{ 9575 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 9576 match(Set dst (URShiftVS dst shift)); 9577 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} 9578 ins_encode %{ 9579 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 9580 %} 9581 ins_pipe( pipe_slow ); 9582 %} 9583 9584 instruct vsrl8S_reg_avx(vecX dst, vecX src, vecS shift) %{ 9585 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 9586 match(Set dst (URShiftVS src shift)); 9587 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 9588 ins_encode %{ 9589 int vector_len = 0; 9590 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9591 %} 9592 ins_pipe( pipe_slow ); 9593 %} 9594 9595 instruct vsrl8S_reg_evex(vecX dst, vecX src, vecS shift) %{ 9596 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 9597 match(Set dst (URShiftVS src shift)); 9598 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 9599 ins_encode %{ 9600 int vector_len = 0; 9601 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9602 %} 9603 ins_pipe( pipe_slow ); 9604 %} 9605 9606 instruct vsrl8S_reg_evex_special(vecX dst, vecX src, vecS shift) %{ 9607 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 9608 match(Set dst (URShiftVS dst shift)); 9609 effect(TEMP src); 9610 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 9611 ins_encode %{ 9612 int vector_len = 0; 9613 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9614 %} 9615 ins_pipe( pipe_slow ); 9616 %} 9617 9618 instruct vsrl8S_reg_imm_avx(vecX dst, vecX src, immI8 shift) %{ 9619 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 9620 match(Set dst (URShiftVS src shift)); 9621 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 9622 ins_encode %{ 9623 int vector_len = 0; 9624 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9625 %} 9626 ins_pipe( pipe_slow ); 9627 %} 9628 9629 instruct vsrl8S_reg_imm_evex(vecX dst, vecX src, immI8 shift) %{ 9630 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 9631 match(Set dst (URShiftVS src shift)); 9632 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 9633 ins_encode %{ 9634 int vector_len = 0; 9635 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9636 %} 9637 ins_pipe( pipe_slow ); 9638 %} 9639 9640 instruct vsrl8S_reg_imm_evex_special(vecX dst, vecX src, immI8 shift) %{ 9641 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 9642 match(Set dst (URShiftVS dst shift)); 9643 effect(TEMP src); 9644 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 9645 ins_encode %{ 9646 int vector_len = 0; 9647 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9648 %} 9649 ins_pipe( pipe_slow ); 9650 %} 9651 9652 instruct vsrl16S_reg_avx(vecY dst, vecY src, vecS shift) %{ 9653 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 9654 match(Set dst (URShiftVS src shift)); 9655 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 9656 ins_encode %{ 9657 int vector_len = 1; 9658 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9659 %} 9660 ins_pipe( pipe_slow ); 9661 %} 9662 9663 instruct vsrl16S_reg_evex(vecY dst, vecY src, vecS shift) %{ 9664 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 9665 match(Set dst (URShiftVS src shift)); 9666 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 9667 ins_encode %{ 9668 int vector_len = 1; 9669 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9670 %} 9671 ins_pipe( pipe_slow ); 9672 %} 9673 9674 instruct vsrl16S_reg_evex_special(vecY dst, vecY src, vecS shift) %{ 9675 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 9676 match(Set dst (URShiftVS dst shift)); 9677 effect(TEMP src); 9678 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 9679 ins_encode %{ 9680 int vector_len = 1; 9681 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9682 %} 9683 ins_pipe( pipe_slow ); 9684 %} 9685 9686 instruct vsrl16S_reg_imm_avx(vecY dst, vecY src, immI8 shift) %{ 9687 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 9688 match(Set dst (URShiftVS src shift)); 9689 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 9690 ins_encode %{ 9691 int vector_len = 1; 9692 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9693 %} 9694 ins_pipe( pipe_slow ); 9695 %} 9696 9697 instruct vsrl16S_reg_imm_evex(vecY dst, vecY src, immI8 shift) %{ 9698 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 9699 match(Set dst (URShiftVS src shift)); 9700 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 9701 ins_encode %{ 9702 int vector_len = 1; 9703 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9704 %} 9705 ins_pipe( pipe_slow ); 9706 %} 9707 9708 instruct vsrl16S_reg_imm_evex_special(vecY dst, vecY src, immI8 shift) %{ 9709 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 9710 match(Set dst (URShiftVS dst shift)); 9711 effect(TEMP src); 9712 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 9713 ins_encode %{ 9714 int vector_len = 1; 9715 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9716 %} 9717 ins_pipe( pipe_slow ); 9718 %} 9719 9720 instruct vsrl32S_reg(vecZ dst, vecZ src, vecS shift) %{ 9721 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 9722 match(Set dst (URShiftVS src shift)); 9723 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %} 9724 ins_encode %{ 9725 int vector_len = 2; 9726 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9727 %} 9728 ins_pipe( pipe_slow ); 9729 %} 9730 9731 instruct vsrl32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 9732 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 9733 match(Set dst (URShiftVS src shift)); 9734 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %} 9735 ins_encode %{ 9736 int vector_len = 2; 9737 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9738 %} 9739 ins_pipe( pipe_slow ); 9740 %} 9741 9742 // Integers vector logical right shift 9743 instruct vsrl2I(vecD dst, vecS shift) %{ 9744 predicate(n->as_Vector()->length() == 2); 9745 match(Set dst (URShiftVI dst shift)); 9746 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} 9747 ins_encode %{ 9748 __ psrld($dst$$XMMRegister, $shift$$XMMRegister); 9749 %} 9750 ins_pipe( pipe_slow ); 9751 %} 9752 9753 instruct vsrl2I_imm(vecD dst, immI8 shift) %{ 9754 predicate(n->as_Vector()->length() == 2); 9755 match(Set dst (URShiftVI dst shift)); 9756 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} 9757 ins_encode %{ 9758 __ psrld($dst$$XMMRegister, (int)$shift$$constant); 9759 %} 9760 ins_pipe( pipe_slow ); 9761 %} 9762 9763 instruct vsrl2I_reg(vecD dst, vecD src, vecS shift) %{ 9764 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9765 match(Set dst (URShiftVI src shift)); 9766 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} 9767 ins_encode %{ 9768 int vector_len = 0; 9769 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9770 %} 9771 ins_pipe( pipe_slow ); 9772 %} 9773 9774 instruct vsrl2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 9775 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9776 match(Set dst (URShiftVI src shift)); 9777 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} 9778 ins_encode %{ 9779 int vector_len = 0; 9780 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9781 %} 9782 ins_pipe( pipe_slow ); 9783 %} 9784 9785 instruct vsrl4I(vecX dst, vecS shift) %{ 9786 predicate(n->as_Vector()->length() == 4); 9787 match(Set dst (URShiftVI dst shift)); 9788 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} 9789 ins_encode %{ 9790 __ psrld($dst$$XMMRegister, $shift$$XMMRegister); 9791 %} 9792 ins_pipe( pipe_slow ); 9793 %} 9794 9795 instruct vsrl4I_imm(vecX dst, immI8 shift) %{ 9796 predicate(n->as_Vector()->length() == 4); 9797 match(Set dst (URShiftVI dst shift)); 9798 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} 9799 ins_encode %{ 9800 __ psrld($dst$$XMMRegister, (int)$shift$$constant); 9801 %} 9802 ins_pipe( pipe_slow ); 9803 %} 9804 9805 instruct vsrl4I_reg(vecX dst, vecX src, vecS shift) %{ 9806 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9807 match(Set dst (URShiftVI src shift)); 9808 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} 9809 ins_encode %{ 9810 int vector_len = 0; 9811 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9812 %} 9813 ins_pipe( pipe_slow ); 9814 %} 9815 9816 instruct vsrl4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 9817 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9818 match(Set dst (URShiftVI src shift)); 9819 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} 9820 ins_encode %{ 9821 int vector_len = 0; 9822 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9823 %} 9824 ins_pipe( pipe_slow ); 9825 %} 9826 9827 instruct vsrl8I_reg(vecY dst, vecY src, vecS shift) %{ 9828 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 9829 match(Set dst (URShiftVI src shift)); 9830 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} 9831 ins_encode %{ 9832 int vector_len = 1; 9833 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9834 %} 9835 ins_pipe( pipe_slow ); 9836 %} 9837 9838 instruct vsrl8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 9839 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 9840 match(Set dst (URShiftVI src shift)); 9841 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} 9842 ins_encode %{ 9843 int vector_len = 1; 9844 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9845 %} 9846 ins_pipe( pipe_slow ); 9847 %} 9848 9849 instruct vsrl16I_reg(vecZ dst, vecZ src, vecS shift) %{ 9850 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 9851 match(Set dst (URShiftVI src shift)); 9852 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed16I" %} 9853 ins_encode %{ 9854 int vector_len = 2; 9855 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9856 %} 9857 ins_pipe( pipe_slow ); 9858 %} 9859 9860 instruct vsrl16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 9861 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 9862 match(Set dst (URShiftVI src shift)); 9863 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed16I" %} 9864 ins_encode %{ 9865 int vector_len = 2; 9866 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9867 %} 9868 ins_pipe( pipe_slow ); 9869 %} 9870 9871 // Longs vector logical right shift 9872 instruct vsrl2L(vecX dst, vecS shift) %{ 9873 predicate(n->as_Vector()->length() == 2); 9874 match(Set dst (URShiftVL dst shift)); 9875 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} 9876 ins_encode %{ 9877 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister); 9878 %} 9879 ins_pipe( pipe_slow ); 9880 %} 9881 9882 instruct vsrl2L_imm(vecX dst, immI8 shift) %{ 9883 predicate(n->as_Vector()->length() == 2); 9884 match(Set dst (URShiftVL dst shift)); 9885 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} 9886 ins_encode %{ 9887 __ psrlq($dst$$XMMRegister, (int)$shift$$constant); 9888 %} 9889 ins_pipe( pipe_slow ); 9890 %} 9891 9892 instruct vsrl2L_reg(vecX dst, vecX src, vecS shift) %{ 9893 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9894 match(Set dst (URShiftVL src shift)); 9895 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} 9896 ins_encode %{ 9897 int vector_len = 0; 9898 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9899 %} 9900 ins_pipe( pipe_slow ); 9901 %} 9902 9903 instruct vsrl2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ 9904 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9905 match(Set dst (URShiftVL src shift)); 9906 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} 9907 ins_encode %{ 9908 int vector_len = 0; 9909 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9910 %} 9911 ins_pipe( pipe_slow ); 9912 %} 9913 9914 instruct vsrl4L_reg(vecY dst, vecY src, vecS shift) %{ 9915 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 9916 match(Set dst (URShiftVL src shift)); 9917 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} 9918 ins_encode %{ 9919 int vector_len = 1; 9920 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9921 %} 9922 ins_pipe( pipe_slow ); 9923 %} 9924 9925 instruct vsrl4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ 9926 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 9927 match(Set dst (URShiftVL src shift)); 9928 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} 9929 ins_encode %{ 9930 int vector_len = 1; 9931 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9932 %} 9933 ins_pipe( pipe_slow ); 9934 %} 9935 9936 instruct vsrl8L_reg(vecZ dst, vecZ src, vecS shift) %{ 9937 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 9938 match(Set dst (URShiftVL src shift)); 9939 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed8L" %} 9940 ins_encode %{ 9941 int vector_len = 2; 9942 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9943 %} 9944 ins_pipe( pipe_slow ); 9945 %} 9946 9947 instruct vsrl8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 9948 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 9949 match(Set dst (URShiftVL src shift)); 9950 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed8L" %} 9951 ins_encode %{ 9952 int vector_len = 2; 9953 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9954 %} 9955 ins_pipe( pipe_slow ); 9956 %} 9957 9958 // ------------------- ArithmeticRightShift ----------------------------------- 9959 9960 // Shorts/Chars vector arithmetic right shift 9961 instruct vsra2S(vecS dst, vecS shift) %{ 9962 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 9963 match(Set dst (RShiftVS dst shift)); 9964 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} 9965 ins_encode %{ 9966 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 9967 %} 9968 ins_pipe( pipe_slow ); 9969 %} 9970 9971 instruct vsra2S_imm(vecS dst, immI8 shift) %{ 9972 predicate(n->as_Vector()->length() == 2); 9973 match(Set dst (RShiftVS dst shift)); 9974 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} 9975 ins_encode %{ 9976 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 9977 %} 9978 ins_pipe( pipe_slow ); 9979 %} 9980 9981 instruct vsra2S_reg_avx(vecS dst, vecS src, vecS shift) %{ 9982 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 9983 match(Set dst (RShiftVS src shift)); 9984 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 9985 ins_encode %{ 9986 int vector_len = 0; 9987 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9988 %} 9989 ins_pipe( pipe_slow ); 9990 %} 9991 9992 instruct vsra2S_reg_evex(vecS dst, vecS src, vecS shift) %{ 9993 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 9994 match(Set dst (RShiftVS src shift)); 9995 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 9996 ins_encode %{ 9997 int vector_len = 0; 9998 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9999 %} 10000 ins_pipe( pipe_slow ); 10001 %} 10002 10003 instruct vsra2S_reg_evex_special(vecS dst, vecS src, vecS shift) %{ 10004 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 10005 match(Set dst (RShiftVS dst shift)); 10006 effect(TEMP src); 10007 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 10008 ins_encode %{ 10009 int vector_len = 0; 10010 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10011 %} 10012 ins_pipe( pipe_slow ); 10013 %} 10014 10015 instruct vsra2S_reg_imm_avx(vecS dst, vecS src, immI8 shift) %{ 10016 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 10017 match(Set dst (RShiftVS src shift)); 10018 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 10019 ins_encode %{ 10020 int vector_len = 0; 10021 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10022 %} 10023 ins_pipe( pipe_slow ); 10024 %} 10025 10026 instruct vsra2S_reg_imm_evex(vecS dst, vecS src, immI8 shift) %{ 10027 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 10028 match(Set dst (RShiftVS src shift)); 10029 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 10030 ins_encode %{ 10031 int vector_len = 0; 10032 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10033 %} 10034 ins_pipe( pipe_slow ); 10035 %} 10036 10037 instruct vsra2S_reg_imm_evex_special(vecS dst, vecS src, immI8 shift) %{ 10038 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 10039 match(Set dst (RShiftVS dst shift)); 10040 effect(TEMP src); 10041 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 10042 ins_encode %{ 10043 int vector_len = 0; 10044 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10045 %} 10046 ins_pipe( pipe_slow ); 10047 %} 10048 10049 instruct vsra4S(vecD dst, vecS shift) %{ 10050 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 10051 match(Set dst (RShiftVS dst shift)); 10052 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} 10053 ins_encode %{ 10054 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 10055 %} 10056 ins_pipe( pipe_slow ); 10057 %} 10058 10059 instruct vsra4S_imm(vecD dst, immI8 shift) %{ 10060 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 10061 match(Set dst (RShiftVS dst shift)); 10062 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} 10063 ins_encode %{ 10064 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 10065 %} 10066 ins_pipe( pipe_slow ); 10067 %} 10068 10069 instruct vsra4S_reg_avx(vecD dst, vecD src, vecS shift) %{ 10070 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 10071 match(Set dst (RShiftVS src shift)); 10072 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 10073 ins_encode %{ 10074 int vector_len = 0; 10075 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10076 %} 10077 ins_pipe( pipe_slow ); 10078 %} 10079 10080 instruct vsra4S_reg_evex(vecD dst, vecD src, vecS shift) %{ 10081 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 10082 match(Set dst (RShiftVS src shift)); 10083 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 10084 ins_encode %{ 10085 int vector_len = 0; 10086 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10087 %} 10088 ins_pipe( pipe_slow ); 10089 %} 10090 10091 instruct vsra4S_reg_evex_special(vecD dst, vecD src, vecS shift) %{ 10092 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 10093 match(Set dst (RShiftVS dst shift)); 10094 effect(TEMP src); 10095 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 10096 ins_encode %{ 10097 int vector_len = 0; 10098 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10099 %} 10100 ins_pipe( pipe_slow ); 10101 %} 10102 10103 instruct vsra4S_reg_imm_avx(vecD dst, vecD src, immI8 shift) %{ 10104 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 10105 match(Set dst (RShiftVS src shift)); 10106 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 10107 ins_encode %{ 10108 int vector_len = 0; 10109 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10110 %} 10111 ins_pipe( pipe_slow ); 10112 %} 10113 10114 instruct vsra4S_reg_imm_evex(vecD dst, vecD src, immI8 shift) %{ 10115 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 10116 match(Set dst (RShiftVS src shift)); 10117 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 10118 ins_encode %{ 10119 int vector_len = 0; 10120 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10121 %} 10122 ins_pipe( pipe_slow ); 10123 %} 10124 10125 instruct vsra4S_reg_imm_evex_special(vecD dst, vecD src, immI8 shift) %{ 10126 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 10127 match(Set dst (RShiftVS dst shift)); 10128 effect(TEMP src); 10129 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 10130 ins_encode %{ 10131 int vector_len = 0; 10132 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10133 %} 10134 ins_pipe( pipe_slow ); 10135 %} 10136 10137 instruct vsra8S(vecX dst, vecS shift) %{ 10138 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 10139 match(Set dst (RShiftVS dst shift)); 10140 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} 10141 ins_encode %{ 10142 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 10143 %} 10144 ins_pipe( pipe_slow ); 10145 %} 10146 10147 instruct vsra8S_imm(vecX dst, immI8 shift) %{ 10148 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 10149 match(Set dst (RShiftVS dst shift)); 10150 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} 10151 ins_encode %{ 10152 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 10153 %} 10154 ins_pipe( pipe_slow ); 10155 %} 10156 10157 instruct vsra8S_reg_avx(vecX dst, vecX src, vecS shift) %{ 10158 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 10159 match(Set dst (RShiftVS src shift)); 10160 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 10161 ins_encode %{ 10162 int vector_len = 0; 10163 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10164 %} 10165 ins_pipe( pipe_slow ); 10166 %} 10167 10168 instruct vsra8S_reg_evex(vecX dst, vecX src, vecS shift) %{ 10169 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 10170 match(Set dst (RShiftVS src shift)); 10171 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 10172 ins_encode %{ 10173 int vector_len = 0; 10174 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10175 %} 10176 ins_pipe( pipe_slow ); 10177 %} 10178 10179 instruct vsra8S_reg_evex_special(vecX dst, vecX src, vecS shift) %{ 10180 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 10181 match(Set dst (RShiftVS dst shift)); 10182 effect(TEMP src); 10183 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 10184 ins_encode %{ 10185 int vector_len = 0; 10186 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10187 %} 10188 ins_pipe( pipe_slow ); 10189 %} 10190 10191 instruct vsra8S_reg_imm_avx(vecX dst, vecX src, immI8 shift) %{ 10192 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 10193 match(Set dst (RShiftVS src shift)); 10194 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 10195 ins_encode %{ 10196 int vector_len = 0; 10197 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10198 %} 10199 ins_pipe( pipe_slow ); 10200 %} 10201 10202 instruct vsra8S_reg_imm_evex(vecX dst, vecX src, immI8 shift) %{ 10203 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 10204 match(Set dst (RShiftVS src shift)); 10205 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 10206 ins_encode %{ 10207 int vector_len = 0; 10208 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10209 %} 10210 ins_pipe( pipe_slow ); 10211 %} 10212 10213 instruct vsra8S_reg_imm_evex_special(vecX dst, vecX src, immI8 shift) %{ 10214 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 10215 match(Set dst (RShiftVS dst shift)); 10216 effect(TEMP src); 10217 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 10218 ins_encode %{ 10219 int vector_len = 0; 10220 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10221 %} 10222 ins_pipe( pipe_slow ); 10223 %} 10224 10225 instruct vsra16S_reg_avx(vecY dst, vecY src, vecS shift) %{ 10226 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 10227 match(Set dst (RShiftVS src shift)); 10228 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 10229 ins_encode %{ 10230 int vector_len = 1; 10231 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10232 %} 10233 ins_pipe( pipe_slow ); 10234 %} 10235 10236 instruct vsra16S_reg_evex(vecY dst, vecY src, vecS shift) %{ 10237 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 10238 match(Set dst (RShiftVS src shift)); 10239 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 10240 ins_encode %{ 10241 int vector_len = 1; 10242 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10243 %} 10244 ins_pipe( pipe_slow ); 10245 %} 10246 10247 instruct vsra16S_reg_evex_special(vecY dst, vecY src, vecS shift) %{ 10248 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 10249 match(Set dst (RShiftVS dst shift)); 10250 effect(TEMP src); 10251 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 10252 ins_encode %{ 10253 int vector_len = 1; 10254 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10255 %} 10256 ins_pipe( pipe_slow ); 10257 %} 10258 10259 instruct vsra16S_reg_imm_avx(vecY dst, vecY src, immI8 shift) %{ 10260 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); 10261 match(Set dst (RShiftVS src shift)); 10262 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 10263 ins_encode %{ 10264 int vector_len = 1; 10265 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10266 %} 10267 ins_pipe( pipe_slow ); 10268 %} 10269 10270 instruct vsra16S_reg_imm_evex(vecY dst, vecY src, immI8 shift) %{ 10271 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 10272 match(Set dst (RShiftVS src shift)); 10273 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 10274 ins_encode %{ 10275 int vector_len = 1; 10276 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10277 %} 10278 ins_pipe( pipe_slow ); 10279 %} 10280 10281 instruct vsra16S_reg_imm_evex_special(vecY dst, vecY src, immI8 shift) %{ 10282 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 10283 match(Set dst (RShiftVS dst shift)); 10284 effect(TEMP src); 10285 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 10286 ins_encode %{ 10287 int vector_len = 1; 10288 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10289 %} 10290 ins_pipe( pipe_slow ); 10291 %} 10292 10293 instruct vsra32S_reg(vecZ dst, vecZ src, vecS shift) %{ 10294 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 10295 match(Set dst (RShiftVS src shift)); 10296 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %} 10297 ins_encode %{ 10298 int vector_len = 2; 10299 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10300 %} 10301 ins_pipe( pipe_slow ); 10302 %} 10303 10304 instruct vsra32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 10305 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 10306 match(Set dst (RShiftVS src shift)); 10307 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %} 10308 ins_encode %{ 10309 int vector_len = 2; 10310 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10311 %} 10312 ins_pipe( pipe_slow ); 10313 %} 10314 10315 // Integers vector arithmetic right shift 10316 instruct vsra2I(vecD dst, vecS shift) %{ 10317 predicate(n->as_Vector()->length() == 2); 10318 match(Set dst (RShiftVI dst shift)); 10319 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} 10320 ins_encode %{ 10321 __ psrad($dst$$XMMRegister, $shift$$XMMRegister); 10322 %} 10323 ins_pipe( pipe_slow ); 10324 %} 10325 10326 instruct vsra2I_imm(vecD dst, immI8 shift) %{ 10327 predicate(n->as_Vector()->length() == 2); 10328 match(Set dst (RShiftVI dst shift)); 10329 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} 10330 ins_encode %{ 10331 __ psrad($dst$$XMMRegister, (int)$shift$$constant); 10332 %} 10333 ins_pipe( pipe_slow ); 10334 %} 10335 10336 instruct vsra2I_reg(vecD dst, vecD src, vecS shift) %{ 10337 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 10338 match(Set dst (RShiftVI src shift)); 10339 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} 10340 ins_encode %{ 10341 int vector_len = 0; 10342 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10343 %} 10344 ins_pipe( pipe_slow ); 10345 %} 10346 10347 instruct vsra2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 10348 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 10349 match(Set dst (RShiftVI src shift)); 10350 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} 10351 ins_encode %{ 10352 int vector_len = 0; 10353 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10354 %} 10355 ins_pipe( pipe_slow ); 10356 %} 10357 10358 instruct vsra4I(vecX dst, vecS shift) %{ 10359 predicate(n->as_Vector()->length() == 4); 10360 match(Set dst (RShiftVI dst shift)); 10361 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} 10362 ins_encode %{ 10363 __ psrad($dst$$XMMRegister, $shift$$XMMRegister); 10364 %} 10365 ins_pipe( pipe_slow ); 10366 %} 10367 10368 instruct vsra4I_imm(vecX dst, immI8 shift) %{ 10369 predicate(n->as_Vector()->length() == 4); 10370 match(Set dst (RShiftVI dst shift)); 10371 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} 10372 ins_encode %{ 10373 __ psrad($dst$$XMMRegister, (int)$shift$$constant); 10374 %} 10375 ins_pipe( pipe_slow ); 10376 %} 10377 10378 instruct vsra4I_reg(vecX dst, vecX src, vecS shift) %{ 10379 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 10380 match(Set dst (RShiftVI src shift)); 10381 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} 10382 ins_encode %{ 10383 int vector_len = 0; 10384 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10385 %} 10386 ins_pipe( pipe_slow ); 10387 %} 10388 10389 instruct vsra4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 10390 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 10391 match(Set dst (RShiftVI src shift)); 10392 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} 10393 ins_encode %{ 10394 int vector_len = 0; 10395 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10396 %} 10397 ins_pipe( pipe_slow ); 10398 %} 10399 10400 instruct vsra8I_reg(vecY dst, vecY src, vecS shift) %{ 10401 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 10402 match(Set dst (RShiftVI src shift)); 10403 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} 10404 ins_encode %{ 10405 int vector_len = 1; 10406 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10407 %} 10408 ins_pipe( pipe_slow ); 10409 %} 10410 10411 instruct vsra8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 10412 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 10413 match(Set dst (RShiftVI src shift)); 10414 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} 10415 ins_encode %{ 10416 int vector_len = 1; 10417 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10418 %} 10419 ins_pipe( pipe_slow ); 10420 %} 10421 10422 instruct vsra16I_reg(vecZ dst, vecZ src, vecS shift) %{ 10423 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 10424 match(Set dst (RShiftVI src shift)); 10425 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed16I" %} 10426 ins_encode %{ 10427 int vector_len = 2; 10428 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10429 %} 10430 ins_pipe( pipe_slow ); 10431 %} 10432 10433 instruct vsra16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 10434 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 10435 match(Set dst (RShiftVI src shift)); 10436 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed16I" %} 10437 ins_encode %{ 10438 int vector_len = 2; 10439 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10440 %} 10441 ins_pipe( pipe_slow ); 10442 %} 10443 10444 // There are no longs vector arithmetic right shift instructions. 10445 10446 10447 // --------------------------------- AND -------------------------------------- 10448 10449 instruct vand4B(vecS dst, vecS src) %{ 10450 predicate(n->as_Vector()->length_in_bytes() == 4); 10451 match(Set dst (AndV dst src)); 10452 format %{ "pand $dst,$src\t! and vectors (4 bytes)" %} 10453 ins_encode %{ 10454 __ pand($dst$$XMMRegister, $src$$XMMRegister); 10455 %} 10456 ins_pipe( pipe_slow ); 10457 %} 10458 10459 instruct vand4B_reg(vecS dst, vecS src1, vecS src2) %{ 10460 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 10461 match(Set dst (AndV src1 src2)); 10462 format %{ "vpand $dst,$src1,$src2\t! and vectors (4 bytes)" %} 10463 ins_encode %{ 10464 int vector_len = 0; 10465 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10466 %} 10467 ins_pipe( pipe_slow ); 10468 %} 10469 10470 instruct vand4B_mem(vecS dst, vecS src, memory mem) %{ 10471 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 10472 match(Set dst (AndV src (LoadVector mem))); 10473 format %{ "vpand $dst,$src,$mem\t! and vectors (4 bytes)" %} 10474 ins_encode %{ 10475 int vector_len = 0; 10476 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10477 %} 10478 ins_pipe( pipe_slow ); 10479 %} 10480 10481 instruct vand8B(vecD dst, vecD src) %{ 10482 predicate(n->as_Vector()->length_in_bytes() == 8); 10483 match(Set dst (AndV dst src)); 10484 format %{ "pand $dst,$src\t! and vectors (8 bytes)" %} 10485 ins_encode %{ 10486 __ pand($dst$$XMMRegister, $src$$XMMRegister); 10487 %} 10488 ins_pipe( pipe_slow ); 10489 %} 10490 10491 instruct vand8B_reg(vecD dst, vecD src1, vecD src2) %{ 10492 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 10493 match(Set dst (AndV src1 src2)); 10494 format %{ "vpand $dst,$src1,$src2\t! and vectors (8 bytes)" %} 10495 ins_encode %{ 10496 int vector_len = 0; 10497 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10498 %} 10499 ins_pipe( pipe_slow ); 10500 %} 10501 10502 instruct vand8B_mem(vecD dst, vecD src, memory mem) %{ 10503 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 10504 match(Set dst (AndV src (LoadVector mem))); 10505 format %{ "vpand $dst,$src,$mem\t! and vectors (8 bytes)" %} 10506 ins_encode %{ 10507 int vector_len = 0; 10508 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10509 %} 10510 ins_pipe( pipe_slow ); 10511 %} 10512 10513 instruct vand16B(vecX dst, vecX src) %{ 10514 predicate(n->as_Vector()->length_in_bytes() == 16); 10515 match(Set dst (AndV dst src)); 10516 format %{ "pand $dst,$src\t! and vectors (16 bytes)" %} 10517 ins_encode %{ 10518 __ pand($dst$$XMMRegister, $src$$XMMRegister); 10519 %} 10520 ins_pipe( pipe_slow ); 10521 %} 10522 10523 instruct vand16B_reg(vecX dst, vecX src1, vecX src2) %{ 10524 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 10525 match(Set dst (AndV src1 src2)); 10526 format %{ "vpand $dst,$src1,$src2\t! and vectors (16 bytes)" %} 10527 ins_encode %{ 10528 int vector_len = 0; 10529 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10530 %} 10531 ins_pipe( pipe_slow ); 10532 %} 10533 10534 instruct vand16B_mem(vecX dst, vecX src, memory mem) %{ 10535 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 10536 match(Set dst (AndV src (LoadVector mem))); 10537 format %{ "vpand $dst,$src,$mem\t! and vectors (16 bytes)" %} 10538 ins_encode %{ 10539 int vector_len = 0; 10540 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10541 %} 10542 ins_pipe( pipe_slow ); 10543 %} 10544 10545 instruct vand32B_reg(vecY dst, vecY src1, vecY src2) %{ 10546 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 10547 match(Set dst (AndV src1 src2)); 10548 format %{ "vpand $dst,$src1,$src2\t! and vectors (32 bytes)" %} 10549 ins_encode %{ 10550 int vector_len = 1; 10551 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10552 %} 10553 ins_pipe( pipe_slow ); 10554 %} 10555 10556 instruct vand32B_mem(vecY dst, vecY src, memory mem) %{ 10557 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 10558 match(Set dst (AndV src (LoadVector mem))); 10559 format %{ "vpand $dst,$src,$mem\t! and vectors (32 bytes)" %} 10560 ins_encode %{ 10561 int vector_len = 1; 10562 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10563 %} 10564 ins_pipe( pipe_slow ); 10565 %} 10566 10567 instruct vand64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 10568 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 10569 match(Set dst (AndV src1 src2)); 10570 format %{ "vpand $dst,$src1,$src2\t! and vectors (64 bytes)" %} 10571 ins_encode %{ 10572 int vector_len = 2; 10573 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10574 %} 10575 ins_pipe( pipe_slow ); 10576 %} 10577 10578 instruct vand64B_mem(vecZ dst, vecZ src, memory mem) %{ 10579 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 10580 match(Set dst (AndV src (LoadVector mem))); 10581 format %{ "vpand $dst,$src,$mem\t! and vectors (64 bytes)" %} 10582 ins_encode %{ 10583 int vector_len = 2; 10584 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10585 %} 10586 ins_pipe( pipe_slow ); 10587 %} 10588 10589 // --------------------------------- OR --------------------------------------- 10590 10591 instruct vor4B(vecS dst, vecS src) %{ 10592 predicate(n->as_Vector()->length_in_bytes() == 4); 10593 match(Set dst (OrV dst src)); 10594 format %{ "por $dst,$src\t! or vectors (4 bytes)" %} 10595 ins_encode %{ 10596 __ por($dst$$XMMRegister, $src$$XMMRegister); 10597 %} 10598 ins_pipe( pipe_slow ); 10599 %} 10600 10601 instruct vor4B_reg(vecS dst, vecS src1, vecS src2) %{ 10602 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 10603 match(Set dst (OrV src1 src2)); 10604 format %{ "vpor $dst,$src1,$src2\t! or vectors (4 bytes)" %} 10605 ins_encode %{ 10606 int vector_len = 0; 10607 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10608 %} 10609 ins_pipe( pipe_slow ); 10610 %} 10611 10612 instruct vor4B_mem(vecS dst, vecS src, memory mem) %{ 10613 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 10614 match(Set dst (OrV src (LoadVector mem))); 10615 format %{ "vpor $dst,$src,$mem\t! or vectors (4 bytes)" %} 10616 ins_encode %{ 10617 int vector_len = 0; 10618 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10619 %} 10620 ins_pipe( pipe_slow ); 10621 %} 10622 10623 instruct vor8B(vecD dst, vecD src) %{ 10624 predicate(n->as_Vector()->length_in_bytes() == 8); 10625 match(Set dst (OrV dst src)); 10626 format %{ "por $dst,$src\t! or vectors (8 bytes)" %} 10627 ins_encode %{ 10628 __ por($dst$$XMMRegister, $src$$XMMRegister); 10629 %} 10630 ins_pipe( pipe_slow ); 10631 %} 10632 10633 instruct vor8B_reg(vecD dst, vecD src1, vecD src2) %{ 10634 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 10635 match(Set dst (OrV src1 src2)); 10636 format %{ "vpor $dst,$src1,$src2\t! or vectors (8 bytes)" %} 10637 ins_encode %{ 10638 int vector_len = 0; 10639 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10640 %} 10641 ins_pipe( pipe_slow ); 10642 %} 10643 10644 instruct vor8B_mem(vecD dst, vecD src, memory mem) %{ 10645 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 10646 match(Set dst (OrV src (LoadVector mem))); 10647 format %{ "vpor $dst,$src,$mem\t! or vectors (8 bytes)" %} 10648 ins_encode %{ 10649 int vector_len = 0; 10650 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10651 %} 10652 ins_pipe( pipe_slow ); 10653 %} 10654 10655 instruct vor16B(vecX dst, vecX src) %{ 10656 predicate(n->as_Vector()->length_in_bytes() == 16); 10657 match(Set dst (OrV dst src)); 10658 format %{ "por $dst,$src\t! or vectors (16 bytes)" %} 10659 ins_encode %{ 10660 __ por($dst$$XMMRegister, $src$$XMMRegister); 10661 %} 10662 ins_pipe( pipe_slow ); 10663 %} 10664 10665 instruct vor16B_reg(vecX dst, vecX src1, vecX src2) %{ 10666 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 10667 match(Set dst (OrV src1 src2)); 10668 format %{ "vpor $dst,$src1,$src2\t! or vectors (16 bytes)" %} 10669 ins_encode %{ 10670 int vector_len = 0; 10671 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10672 %} 10673 ins_pipe( pipe_slow ); 10674 %} 10675 10676 instruct vor16B_mem(vecX dst, vecX src, memory mem) %{ 10677 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 10678 match(Set dst (OrV src (LoadVector mem))); 10679 format %{ "vpor $dst,$src,$mem\t! or vectors (16 bytes)" %} 10680 ins_encode %{ 10681 int vector_len = 0; 10682 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10683 %} 10684 ins_pipe( pipe_slow ); 10685 %} 10686 10687 instruct vor32B_reg(vecY dst, vecY src1, vecY src2) %{ 10688 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 10689 match(Set dst (OrV src1 src2)); 10690 format %{ "vpor $dst,$src1,$src2\t! or vectors (32 bytes)" %} 10691 ins_encode %{ 10692 int vector_len = 1; 10693 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10694 %} 10695 ins_pipe( pipe_slow ); 10696 %} 10697 10698 instruct vor32B_mem(vecY dst, vecY src, memory mem) %{ 10699 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 10700 match(Set dst (OrV src (LoadVector mem))); 10701 format %{ "vpor $dst,$src,$mem\t! or vectors (32 bytes)" %} 10702 ins_encode %{ 10703 int vector_len = 1; 10704 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10705 %} 10706 ins_pipe( pipe_slow ); 10707 %} 10708 10709 instruct vor64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 10710 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 10711 match(Set dst (OrV src1 src2)); 10712 format %{ "vpor $dst,$src1,$src2\t! or vectors (64 bytes)" %} 10713 ins_encode %{ 10714 int vector_len = 2; 10715 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10716 %} 10717 ins_pipe( pipe_slow ); 10718 %} 10719 10720 instruct vor64B_mem(vecZ dst, vecZ src, memory mem) %{ 10721 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 10722 match(Set dst (OrV src (LoadVector mem))); 10723 format %{ "vpor $dst,$src,$mem\t! or vectors (64 bytes)" %} 10724 ins_encode %{ 10725 int vector_len = 2; 10726 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10727 %} 10728 ins_pipe( pipe_slow ); 10729 %} 10730 10731 // --------------------------------- XOR -------------------------------------- 10732 10733 instruct vxor4B(vecS dst, vecS src) %{ 10734 predicate(n->as_Vector()->length_in_bytes() == 4); 10735 match(Set dst (XorV dst src)); 10736 format %{ "pxor $dst,$src\t! xor vectors (4 bytes)" %} 10737 ins_encode %{ 10738 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 10739 %} 10740 ins_pipe( pipe_slow ); 10741 %} 10742 10743 instruct vxor4B_reg(vecS dst, vecS src1, vecS src2) %{ 10744 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 10745 match(Set dst (XorV src1 src2)); 10746 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (4 bytes)" %} 10747 ins_encode %{ 10748 int vector_len = 0; 10749 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10750 %} 10751 ins_pipe( pipe_slow ); 10752 %} 10753 10754 instruct vxor4B_mem(vecS dst, vecS src, memory mem) %{ 10755 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 10756 match(Set dst (XorV src (LoadVector mem))); 10757 format %{ "vpxor $dst,$src,$mem\t! xor vectors (4 bytes)" %} 10758 ins_encode %{ 10759 int vector_len = 0; 10760 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10761 %} 10762 ins_pipe( pipe_slow ); 10763 %} 10764 10765 instruct vxor8B(vecD dst, vecD src) %{ 10766 predicate(n->as_Vector()->length_in_bytes() == 8); 10767 match(Set dst (XorV dst src)); 10768 format %{ "pxor $dst,$src\t! xor vectors (8 bytes)" %} 10769 ins_encode %{ 10770 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 10771 %} 10772 ins_pipe( pipe_slow ); 10773 %} 10774 10775 instruct vxor8B_reg(vecD dst, vecD src1, vecD src2) %{ 10776 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 10777 match(Set dst (XorV src1 src2)); 10778 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (8 bytes)" %} 10779 ins_encode %{ 10780 int vector_len = 0; 10781 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10782 %} 10783 ins_pipe( pipe_slow ); 10784 %} 10785 10786 instruct vxor8B_mem(vecD dst, vecD src, memory mem) %{ 10787 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 10788 match(Set dst (XorV src (LoadVector mem))); 10789 format %{ "vpxor $dst,$src,$mem\t! xor vectors (8 bytes)" %} 10790 ins_encode %{ 10791 int vector_len = 0; 10792 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10793 %} 10794 ins_pipe( pipe_slow ); 10795 %} 10796 10797 instruct vxor16B(vecX dst, vecX src) %{ 10798 predicate(n->as_Vector()->length_in_bytes() == 16); 10799 match(Set dst (XorV dst src)); 10800 format %{ "pxor $dst,$src\t! xor vectors (16 bytes)" %} 10801 ins_encode %{ 10802 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 10803 %} 10804 ins_pipe( pipe_slow ); 10805 %} 10806 10807 instruct vxor16B_reg(vecX dst, vecX src1, vecX src2) %{ 10808 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 10809 match(Set dst (XorV src1 src2)); 10810 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (16 bytes)" %} 10811 ins_encode %{ 10812 int vector_len = 0; 10813 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10814 %} 10815 ins_pipe( pipe_slow ); 10816 %} 10817 10818 instruct vxor16B_mem(vecX dst, vecX src, memory mem) %{ 10819 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 10820 match(Set dst (XorV src (LoadVector mem))); 10821 format %{ "vpxor $dst,$src,$mem\t! xor vectors (16 bytes)" %} 10822 ins_encode %{ 10823 int vector_len = 0; 10824 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10825 %} 10826 ins_pipe( pipe_slow ); 10827 %} 10828 10829 instruct vxor32B_reg(vecY dst, vecY src1, vecY src2) %{ 10830 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 10831 match(Set dst (XorV src1 src2)); 10832 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (32 bytes)" %} 10833 ins_encode %{ 10834 int vector_len = 1; 10835 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10836 %} 10837 ins_pipe( pipe_slow ); 10838 %} 10839 10840 instruct vxor32B_mem(vecY dst, vecY src, memory mem) %{ 10841 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 10842 match(Set dst (XorV src (LoadVector mem))); 10843 format %{ "vpxor $dst,$src,$mem\t! xor vectors (32 bytes)" %} 10844 ins_encode %{ 10845 int vector_len = 1; 10846 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10847 %} 10848 ins_pipe( pipe_slow ); 10849 %} 10850 10851 instruct vxor64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 10852 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 10853 match(Set dst (XorV src1 src2)); 10854 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (64 bytes)" %} 10855 ins_encode %{ 10856 int vector_len = 2; 10857 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10858 %} 10859 ins_pipe( pipe_slow ); 10860 %} 10861 10862 instruct vxor64B_mem(vecZ dst, vecZ src, memory mem) %{ 10863 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 10864 match(Set dst (XorV src (LoadVector mem))); 10865 format %{ "vpxor $dst,$src,$mem\t! xor vectors (64 bytes)" %} 10866 ins_encode %{ 10867 int vector_len = 2; 10868 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10869 %} 10870 ins_pipe( pipe_slow ); 10871 %} 10872