1 // 2 // Copyright (c) 2011, 2016, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Common Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // archtecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // XMM registers. 512-bit registers or 8 words each, labeled (a)-p. 63 // Word a in each register holds a Float, words ab hold a Double. 64 // The whole registers are used in SSE4.2 version intrinsics, 65 // array copy stubs and superword operations (see UseSSE42Intrinsics, 66 // UseXMMForArrayCopy and UseSuperword flags). 67 // For pre EVEX enabled architectures: 68 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX) 69 // For EVEX enabled architectures: 70 // XMM8-XMM31 must be encoded with REX (EVEX for UseAVX). 71 // 72 // Linux ABI: No register preserved across function calls 73 // XMM0-XMM7 might hold parameters 74 // Windows ABI: XMM6-XMM31 preserved across function calls 75 // XMM0-XMM3 might hold parameters 76 77 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()); 78 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1)); 79 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2)); 80 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3)); 81 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4)); 82 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5)); 83 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6)); 84 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7)); 85 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8)); 86 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9)); 87 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10)); 88 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11)); 89 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12)); 90 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13)); 91 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14)); 92 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15)); 93 94 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()); 95 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1)); 96 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2)); 97 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3)); 98 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4)); 99 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5)); 100 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6)); 101 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7)); 102 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8)); 103 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9)); 104 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10)); 105 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11)); 106 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12)); 107 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13)); 108 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14)); 109 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15)); 110 111 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()); 112 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1)); 113 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2)); 114 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3)); 115 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4)); 116 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5)); 117 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6)); 118 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7)); 119 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8)); 120 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9)); 121 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10)); 122 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11)); 123 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12)); 124 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13)); 125 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14)); 126 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15)); 127 128 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()); 129 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1)); 130 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2)); 131 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3)); 132 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4)); 133 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5)); 134 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6)); 135 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7)); 136 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8)); 137 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9)); 138 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10)); 139 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11)); 140 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12)); 141 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13)); 142 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14)); 143 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15)); 144 145 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()); 146 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1)); 147 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2)); 148 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3)); 149 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4)); 150 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5)); 151 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6)); 152 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7)); 153 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8)); 154 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9)); 155 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10)); 156 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11)); 157 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12)); 158 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13)); 159 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14)); 160 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15)); 161 162 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()); 163 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1)); 164 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2)); 165 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3)); 166 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4)); 167 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5)); 168 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6)); 169 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7)); 170 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8)); 171 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9)); 172 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10)); 173 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11)); 174 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12)); 175 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13)); 176 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14)); 177 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15)); 178 179 #ifdef _WIN64 180 181 reg_def XMM6 ( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()); 182 reg_def XMM6b( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(1)); 183 reg_def XMM6c( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(2)); 184 reg_def XMM6d( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(3)); 185 reg_def XMM6e( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(4)); 186 reg_def XMM6f( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(5)); 187 reg_def XMM6g( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(6)); 188 reg_def XMM6h( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(7)); 189 reg_def XMM6i( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(8)); 190 reg_def XMM6j( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(9)); 191 reg_def XMM6k( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(10)); 192 reg_def XMM6l( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(11)); 193 reg_def XMM6m( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(12)); 194 reg_def XMM6n( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(13)); 195 reg_def XMM6o( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(14)); 196 reg_def XMM6p( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(15)); 197 198 reg_def XMM7 ( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()); 199 reg_def XMM7b( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(1)); 200 reg_def XMM7c( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(2)); 201 reg_def XMM7d( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(3)); 202 reg_def XMM7e( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(4)); 203 reg_def XMM7f( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(5)); 204 reg_def XMM7g( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(6)); 205 reg_def XMM7h( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(7)); 206 reg_def XMM7i( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(8)); 207 reg_def XMM7j( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(9)); 208 reg_def XMM7k( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(10)); 209 reg_def XMM7l( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(11)); 210 reg_def XMM7m( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(12)); 211 reg_def XMM7n( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(13)); 212 reg_def XMM7o( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(14)); 213 reg_def XMM7p( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(15)); 214 215 reg_def XMM8 ( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()); 216 reg_def XMM8b( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(1)); 217 reg_def XMM8c( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(2)); 218 reg_def XMM8d( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(3)); 219 reg_def XMM8e( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(4)); 220 reg_def XMM8f( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(5)); 221 reg_def XMM8g( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(6)); 222 reg_def XMM8h( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(7)); 223 reg_def XMM8i( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(8)); 224 reg_def XMM8j( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(9)); 225 reg_def XMM8k( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(10)); 226 reg_def XMM8l( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(11)); 227 reg_def XMM8m( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(12)); 228 reg_def XMM8n( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(13)); 229 reg_def XMM8o( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(14)); 230 reg_def XMM8p( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(15)); 231 232 reg_def XMM9 ( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()); 233 reg_def XMM9b( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(1)); 234 reg_def XMM9c( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(2)); 235 reg_def XMM9d( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(3)); 236 reg_def XMM9e( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(4)); 237 reg_def XMM9f( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(5)); 238 reg_def XMM9g( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(6)); 239 reg_def XMM9h( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(7)); 240 reg_def XMM9i( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(8)); 241 reg_def XMM9j( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(9)); 242 reg_def XMM9k( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(10)); 243 reg_def XMM9l( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(11)); 244 reg_def XMM9m( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(12)); 245 reg_def XMM9n( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(13)); 246 reg_def XMM9o( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(14)); 247 reg_def XMM9p( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(15)); 248 249 reg_def XMM10 ( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()); 250 reg_def XMM10b( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(1)); 251 reg_def XMM10c( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(2)); 252 reg_def XMM10d( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(3)); 253 reg_def XMM10e( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(4)); 254 reg_def XMM10f( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(5)); 255 reg_def XMM10g( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(6)); 256 reg_def XMM10h( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(7)); 257 reg_def XMM10i( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(8)); 258 reg_def XMM10j( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(9)); 259 reg_def XMM10k( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(10)); 260 reg_def XMM10l( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(11)); 261 reg_def XMM10m( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(12)); 262 reg_def XMM10n( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(13)); 263 reg_def XMM10o( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(14)); 264 reg_def XMM10p( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(15)); 265 266 reg_def XMM11 ( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()); 267 reg_def XMM11b( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(1)); 268 reg_def XMM11c( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(2)); 269 reg_def XMM11d( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(3)); 270 reg_def XMM11e( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(4)); 271 reg_def XMM11f( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(5)); 272 reg_def XMM11g( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(6)); 273 reg_def XMM11h( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(7)); 274 reg_def XMM11i( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(8)); 275 reg_def XMM11j( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(9)); 276 reg_def XMM11k( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(10)); 277 reg_def XMM11l( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(11)); 278 reg_def XMM11m( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(12)); 279 reg_def XMM11n( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(13)); 280 reg_def XMM11o( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(14)); 281 reg_def XMM11p( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(15)); 282 283 reg_def XMM12 ( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()); 284 reg_def XMM12b( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(1)); 285 reg_def XMM12c( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(2)); 286 reg_def XMM12d( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(3)); 287 reg_def XMM12e( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(4)); 288 reg_def XMM12f( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(5)); 289 reg_def XMM12g( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(6)); 290 reg_def XMM12h( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(7)); 291 reg_def XMM12i( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(8)); 292 reg_def XMM12j( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(9)); 293 reg_def XMM12k( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(10)); 294 reg_def XMM12l( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(11)); 295 reg_def XMM12m( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(12)); 296 reg_def XMM12n( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(13)); 297 reg_def XMM12o( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(14)); 298 reg_def XMM12p( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(15)); 299 300 reg_def XMM13 ( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()); 301 reg_def XMM13b( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(1)); 302 reg_def XMM13c( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(2)); 303 reg_def XMM13d( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(3)); 304 reg_def XMM13e( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(4)); 305 reg_def XMM13f( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(5)); 306 reg_def XMM13g( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(6)); 307 reg_def XMM13h( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(7)); 308 reg_def XMM13i( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(8)); 309 reg_def XMM13j( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(9)); 310 reg_def XMM13k( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(10)); 311 reg_def XMM13l( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(11)); 312 reg_def XMM13m( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(12)); 313 reg_def XMM13n( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(13)); 314 reg_def XMM13o( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(14)); 315 reg_def XMM13p( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(15)); 316 317 reg_def XMM14 ( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()); 318 reg_def XMM14b( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(1)); 319 reg_def XMM14c( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(2)); 320 reg_def XMM14d( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(3)); 321 reg_def XMM14e( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(4)); 322 reg_def XMM14f( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(5)); 323 reg_def XMM14g( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(6)); 324 reg_def XMM14h( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(7)); 325 reg_def XMM14i( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(8)); 326 reg_def XMM14j( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(9)); 327 reg_def XMM14k( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(10)); 328 reg_def XMM14l( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(11)); 329 reg_def XMM14m( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(12)); 330 reg_def XMM14n( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(13)); 331 reg_def XMM14o( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(14)); 332 reg_def XMM14p( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(15)); 333 334 reg_def XMM15 ( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()); 335 reg_def XMM15b( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(1)); 336 reg_def XMM15c( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(2)); 337 reg_def XMM15d( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(3)); 338 reg_def XMM15e( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(4)); 339 reg_def XMM15f( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(5)); 340 reg_def XMM15g( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(6)); 341 reg_def XMM15h( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(7)); 342 reg_def XMM15i( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(8)); 343 reg_def XMM15j( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(9)); 344 reg_def XMM15k( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(10)); 345 reg_def XMM15l( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(11)); 346 reg_def XMM15m( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(12)); 347 reg_def XMM15n( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(13)); 348 reg_def XMM15o( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(14)); 349 reg_def XMM15p( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(15)); 350 351 reg_def XMM16 ( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()); 352 reg_def XMM16b( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(1)); 353 reg_def XMM16c( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(2)); 354 reg_def XMM16d( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(3)); 355 reg_def XMM16e( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(4)); 356 reg_def XMM16f( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(5)); 357 reg_def XMM16g( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(6)); 358 reg_def XMM16h( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(7)); 359 reg_def XMM16i( SOC, SOE, Op_RegF, 16, xmm15->as_VMReg()->next(8)); 360 reg_def XMM16j( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(9)); 361 reg_def XMM16k( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(10)); 362 reg_def XMM16l( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(11)); 363 reg_def XMM16m( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(12)); 364 reg_def XMM16n( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(13)); 365 reg_def XMM16o( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(14)); 366 reg_def XMM16p( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(15)); 367 368 reg_def XMM17 ( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()); 369 reg_def XMM17b( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(1)); 370 reg_def XMM17c( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(2)); 371 reg_def XMM17d( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(3)); 372 reg_def XMM17e( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(4)); 373 reg_def XMM17f( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(5)); 374 reg_def XMM17g( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(6)); 375 reg_def XMM17h( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(7)); 376 reg_def XMM17i( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(8)); 377 reg_def XMM17j( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(9)); 378 reg_def XMM17k( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(10)); 379 reg_def XMM17l( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(11)); 380 reg_def XMM17m( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(12)); 381 reg_def XMM17n( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(13)); 382 reg_def XMM17o( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(14)); 383 reg_def XMM17p( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(15)); 384 385 reg_def XMM18 ( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()); 386 reg_def XMM18b( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(1)); 387 reg_def XMM18c( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(2)); 388 reg_def XMM18d( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(3)); 389 reg_def XMM18e( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(4)); 390 reg_def XMM18f( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(5)); 391 reg_def XMM18g( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(6)); 392 reg_def XMM18h( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(7)); 393 reg_def XMM18i( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(8)); 394 reg_def XMM18j( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(9)); 395 reg_def XMM18k( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(10)); 396 reg_def XMM18l( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(11)); 397 reg_def XMM18m( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(12)); 398 reg_def XMM18n( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(13)); 399 reg_def XMM18o( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(14)); 400 reg_def XMM18p( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(15)); 401 402 reg_def XMM19 ( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()); 403 reg_def XMM19b( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(1)); 404 reg_def XMM19c( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(2)); 405 reg_def XMM19d( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(3)); 406 reg_def XMM19e( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(4)); 407 reg_def XMM19f( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(5)); 408 reg_def XMM19g( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(6)); 409 reg_def XMM19h( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(7)); 410 reg_def XMM19i( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(8)); 411 reg_def XMM19j( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(9)); 412 reg_def XMM19k( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(10)); 413 reg_def XMM19l( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(11)); 414 reg_def XMM19m( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(12)); 415 reg_def XMM19n( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(13)); 416 reg_def XMM19o( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(14)); 417 reg_def XMM19p( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(15)); 418 419 reg_def XMM20 ( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()); 420 reg_def XMM20b( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(1)); 421 reg_def XMM20c( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(2)); 422 reg_def XMM20d( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(3)); 423 reg_def XMM20e( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(4)); 424 reg_def XMM20f( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(5)); 425 reg_def XMM20g( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(6)); 426 reg_def XMM20h( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(7)); 427 reg_def XMM20i( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(8)); 428 reg_def XMM20j( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(9)); 429 reg_def XMM20k( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(10)); 430 reg_def XMM20l( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(11)); 431 reg_def XMM20m( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(12)); 432 reg_def XMM20n( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(13)); 433 reg_def XMM20o( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(14)); 434 reg_def XMM20p( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(15)); 435 436 reg_def XMM21 ( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()); 437 reg_def XMM21b( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(1)); 438 reg_def XMM21c( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(2)); 439 reg_def XMM21d( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(3)); 440 reg_def XMM21e( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(4)); 441 reg_def XMM21f( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(5)); 442 reg_def XMM21g( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(6)); 443 reg_def XMM21h( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(7)); 444 reg_def XMM21i( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(8)); 445 reg_def XMM21j( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(9)); 446 reg_def XMM21k( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(10)); 447 reg_def XMM21l( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(11)); 448 reg_def XMM21m( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(12)); 449 reg_def XMM21n( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(13)); 450 reg_def XMM21o( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(14)); 451 reg_def XMM21p( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(15)); 452 453 reg_def XMM22 ( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()); 454 reg_def XMM22b( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(1)); 455 reg_def XMM22c( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(2)); 456 reg_def XMM22d( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(3)); 457 reg_def XMM22e( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(4)); 458 reg_def XMM22f( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(5)); 459 reg_def XMM22g( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(6)); 460 reg_def XMM22h( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(7)); 461 reg_def XMM22i( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(8)); 462 reg_def XMM22j( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(9)); 463 reg_def XMM22k( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(10)); 464 reg_def XMM22l( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(11)); 465 reg_def XMM22m( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(12)); 466 reg_def XMM22n( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(13)); 467 reg_def XMM22o( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(14)); 468 reg_def XMM22p( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(15)); 469 470 reg_def XMM23 ( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()); 471 reg_def XMM23b( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(1)); 472 reg_def XMM23c( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(2)); 473 reg_def XMM23d( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(3)); 474 reg_def XMM23e( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(4)); 475 reg_def XMM23f( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(5)); 476 reg_def XMM23g( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(6)); 477 reg_def XMM23h( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(7)); 478 reg_def XMM23i( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(8)); 479 reg_def XMM23j( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(9)); 480 reg_def XMM23k( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(10)); 481 reg_def XMM23l( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(11)); 482 reg_def XMM23m( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(12)); 483 reg_def XMM23n( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(13)); 484 reg_def XMM23o( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(14)); 485 reg_def XMM23p( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(15)); 486 487 reg_def XMM24 ( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()); 488 reg_def XMM24b( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(1)); 489 reg_def XMM24c( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(2)); 490 reg_def XMM24d( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(3)); 491 reg_def XMM24e( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(4)); 492 reg_def XMM24f( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(5)); 493 reg_def XMM24g( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(6)); 494 reg_def XMM24h( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(7)); 495 reg_def XMM24i( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(8)); 496 reg_def XMM24j( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(9)); 497 reg_def XMM24k( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(10)); 498 reg_def XMM24l( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(11)); 499 reg_def XMM24m( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(12)); 500 reg_def XMM24n( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(13)); 501 reg_def XMM24o( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(14)); 502 reg_def XMM24p( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(15)); 503 504 reg_def XMM25 ( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()); 505 reg_def XMM25b( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(1)); 506 reg_def XMM25c( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(2)); 507 reg_def XMM25d( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(3)); 508 reg_def XMM25e( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(4)); 509 reg_def XMM25f( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(5)); 510 reg_def XMM25g( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(6)); 511 reg_def XMM25h( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(7)); 512 reg_def XMM25i( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(8)); 513 reg_def XMM25j( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(9)); 514 reg_def XMM25k( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(10)); 515 reg_def XMM25l( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(11)); 516 reg_def XMM25m( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(12)); 517 reg_def XMM25n( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(13)); 518 reg_def XMM25o( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(14)); 519 reg_def XMM25p( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(15)); 520 521 reg_def XMM26 ( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()); 522 reg_def XMM26b( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(1)); 523 reg_def XMM26c( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(2)); 524 reg_def XMM26d( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(3)); 525 reg_def XMM26e( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(4)); 526 reg_def XMM26f( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(5)); 527 reg_def XMM26g( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(6)); 528 reg_def XMM26h( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(7)); 529 reg_def XMM26i( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(8)); 530 reg_def XMM26j( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(9)); 531 reg_def XMM26k( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(10)); 532 reg_def XMM26l( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(11)); 533 reg_def XMM26m( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(12)); 534 reg_def XMM26n( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(13)); 535 reg_def XMM26o( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(14)); 536 reg_def XMM26p( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(15)); 537 538 reg_def XMM27g( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(1)); 539 reg_def XMM27c( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(2)); 540 reg_def XMM27d( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(3)); 541 reg_def XMM27e( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(4)); 542 reg_def XMM27f( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(5)); 543 reg_def XMM27g( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(6)); 544 reg_def XMM27h( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(7)); 545 reg_def XMM27i( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(8)); 546 reg_def XMM27j( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(9)); 547 reg_def XMM27k( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(10)); 548 reg_def XMM27l( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(11)); 549 reg_def XMM27m( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(12)); 550 reg_def XMM27n( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(13)); 551 reg_def XMM27o( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(14)); 552 reg_def XMM27p( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(15)); 553 554 reg_def XMM28 ( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()); 555 reg_def XMM28b( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(1)); 556 reg_def XMM28c( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(2)); 557 reg_def XMM28d( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(3)); 558 reg_def XMM28e( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(4)); 559 reg_def XMM28f( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(5)); 560 reg_def XMM28g( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(6)); 561 reg_def XMM28h( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(7)); 562 reg_def XMM28i( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(8)); 563 reg_def XMM28j( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(9)); 564 reg_def XMM28k( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(10)); 565 reg_def XMM28l( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(11)); 566 reg_def XMM28m( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(12)); 567 reg_def XMM28n( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(13)); 568 reg_def XMM28o( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(14)); 569 reg_def XMM28p( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(15)); 570 571 reg_def XMM29 ( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()); 572 reg_def XMM29b( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(1)); 573 reg_def XMM29c( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(2)); 574 reg_def XMM29d( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(3)); 575 reg_def XMM29e( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(4)); 576 reg_def XMM29f( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(5)); 577 reg_def XMM29g( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(6)); 578 reg_def XMM29h( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(7)); 579 reg_def XMM29i( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(8)); 580 reg_def XMM29j( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(9)); 581 reg_def XMM29k( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(10)); 582 reg_def XMM29l( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(11)); 583 reg_def XMM29m( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(12)); 584 reg_def XMM29n( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(13)); 585 reg_def XMM29o( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(14)); 586 reg_def XMM29p( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(15)); 587 588 reg_def XMM30 ( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()); 589 reg_def XMM30b( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(1)); 590 reg_def XMM30c( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(2)); 591 reg_def XMM30d( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(3)); 592 reg_def XMM30e( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(4)); 593 reg_def XMM30f( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(5)); 594 reg_def XMM30g( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(6)); 595 reg_def XMM30h( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(7)); 596 reg_def XMM30i( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(8)); 597 reg_def XMM30j( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(9)); 598 reg_def XMM30k( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(10)); 599 reg_def XMM30l( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(11)); 600 reg_def XMM30m( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(12)); 601 reg_def XMM30n( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(13)); 602 reg_def XMM30o( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(14)); 603 reg_def XMM30p( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(15)); 604 605 reg_def XMM31 ( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()); 606 reg_def XMM31b( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(1)); 607 reg_def XMM31c( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(2)); 608 reg_def XMM31d( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(3)); 609 reg_def XMM31e( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(4)); 610 reg_def XMM31f( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(5)); 611 reg_def XMM31g( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(6)); 612 reg_def XMM31h( SOC, SOE, Op_RegF, 31, xmm31>-as_VMReg()->next(7)); 613 reg_def XMM31i( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(8)); 614 reg_def XMM31j( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(9)); 615 reg_def XMM31k( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(10)); 616 reg_def XMM31l( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(11)); 617 reg_def XMM31m( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(12)); 618 reg_def XMM31n( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(13)); 619 reg_def XMM31o( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(14)); 620 reg_def XMM31p( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(15)); 621 622 #else // _WIN64 623 624 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()); 625 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1)); 626 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2)); 627 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3)); 628 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4)); 629 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5)); 630 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6)); 631 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7)); 632 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8)); 633 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9)); 634 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10)); 635 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11)); 636 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12)); 637 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13)); 638 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14)); 639 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15)); 640 641 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()); 642 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1)); 643 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2)); 644 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3)); 645 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4)); 646 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5)); 647 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6)); 648 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7)); 649 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8)); 650 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9)); 651 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10)); 652 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11)); 653 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12)); 654 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13)); 655 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14)); 656 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15)); 657 658 #ifdef _LP64 659 660 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()); 661 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1)); 662 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2)); 663 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3)); 664 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4)); 665 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5)); 666 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6)); 667 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7)); 668 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8)); 669 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9)); 670 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10)); 671 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11)); 672 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12)); 673 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13)); 674 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14)); 675 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15)); 676 677 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()); 678 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1)); 679 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2)); 680 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3)); 681 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4)); 682 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5)); 683 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6)); 684 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7)); 685 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8)); 686 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9)); 687 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10)); 688 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11)); 689 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12)); 690 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13)); 691 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14)); 692 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15)); 693 694 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()); 695 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1)); 696 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2)); 697 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3)); 698 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4)); 699 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5)); 700 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6)); 701 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7)); 702 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8)); 703 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9)); 704 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10)); 705 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11)); 706 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12)); 707 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13)); 708 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14)); 709 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15)); 710 711 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()); 712 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1)); 713 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2)); 714 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3)); 715 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4)); 716 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5)); 717 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6)); 718 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7)); 719 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8)); 720 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9)); 721 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10)); 722 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11)); 723 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12)); 724 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13)); 725 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14)); 726 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15)); 727 728 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()); 729 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1)); 730 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2)); 731 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3)); 732 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4)); 733 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5)); 734 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6)); 735 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7)); 736 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8)); 737 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9)); 738 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10)); 739 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11)); 740 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12)); 741 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13)); 742 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14)); 743 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15)); 744 745 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()); 746 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1)); 747 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2)); 748 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3)); 749 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4)); 750 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5)); 751 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6)); 752 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7)); 753 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8)); 754 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9)); 755 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10)); 756 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11)); 757 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12)); 758 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13)); 759 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14)); 760 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15)); 761 762 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()); 763 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1)); 764 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2)); 765 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3)); 766 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4)); 767 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5)); 768 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6)); 769 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7)); 770 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8)); 771 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9)); 772 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10)); 773 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11)); 774 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12)); 775 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13)); 776 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14)); 777 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15)); 778 779 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()); 780 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1)); 781 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2)); 782 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3)); 783 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4)); 784 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5)); 785 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6)); 786 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7)); 787 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8)); 788 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9)); 789 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10)); 790 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11)); 791 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12)); 792 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13)); 793 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14)); 794 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15)); 795 796 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()); 797 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1)); 798 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2)); 799 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3)); 800 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4)); 801 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5)); 802 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6)); 803 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7)); 804 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8)); 805 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9)); 806 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10)); 807 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11)); 808 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12)); 809 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13)); 810 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14)); 811 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15)); 812 813 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()); 814 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1)); 815 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2)); 816 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3)); 817 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4)); 818 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5)); 819 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6)); 820 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7)); 821 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8)); 822 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9)); 823 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10)); 824 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11)); 825 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12)); 826 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13)); 827 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14)); 828 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15)); 829 830 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()); 831 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1)); 832 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2)); 833 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3)); 834 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4)); 835 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5)); 836 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6)); 837 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7)); 838 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8)); 839 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9)); 840 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10)); 841 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11)); 842 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12)); 843 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13)); 844 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14)); 845 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15)); 846 847 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()); 848 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1)); 849 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2)); 850 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3)); 851 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4)); 852 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5)); 853 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6)); 854 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7)); 855 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8)); 856 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9)); 857 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10)); 858 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11)); 859 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12)); 860 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13)); 861 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14)); 862 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15)); 863 864 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()); 865 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1)); 866 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2)); 867 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3)); 868 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4)); 869 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5)); 870 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6)); 871 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7)); 872 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8)); 873 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9)); 874 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10)); 875 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11)); 876 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12)); 877 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13)); 878 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14)); 879 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15)); 880 881 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()); 882 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1)); 883 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2)); 884 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3)); 885 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4)); 886 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5)); 887 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6)); 888 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7)); 889 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8)); 890 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9)); 891 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10)); 892 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11)); 893 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12)); 894 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13)); 895 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14)); 896 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15)); 897 898 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()); 899 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1)); 900 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2)); 901 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3)); 902 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4)); 903 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5)); 904 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6)); 905 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7)); 906 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8)); 907 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9)); 908 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10)); 909 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11)); 910 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12)); 911 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13)); 912 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14)); 913 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15)); 914 915 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()); 916 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1)); 917 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2)); 918 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3)); 919 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4)); 920 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5)); 921 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6)); 922 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7)); 923 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8)); 924 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9)); 925 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10)); 926 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11)); 927 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12)); 928 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13)); 929 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14)); 930 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15)); 931 932 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()); 933 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1)); 934 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2)); 935 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3)); 936 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4)); 937 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5)); 938 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6)); 939 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7)); 940 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8)); 941 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9)); 942 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10)); 943 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11)); 944 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12)); 945 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13)); 946 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14)); 947 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15)); 948 949 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()); 950 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1)); 951 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2)); 952 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3)); 953 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4)); 954 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5)); 955 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6)); 956 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7)); 957 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8)); 958 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9)); 959 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10)); 960 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11)); 961 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12)); 962 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13)); 963 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14)); 964 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15)); 965 966 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()); 967 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1)); 968 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2)); 969 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3)); 970 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4)); 971 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5)); 972 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6)); 973 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7)); 974 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8)); 975 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9)); 976 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10)); 977 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11)); 978 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12)); 979 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13)); 980 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14)); 981 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15)); 982 983 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()); 984 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1)); 985 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2)); 986 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3)); 987 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4)); 988 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5)); 989 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6)); 990 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7)); 991 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8)); 992 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9)); 993 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10)); 994 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11)); 995 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12)); 996 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13)); 997 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14)); 998 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15)); 999 1000 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()); 1001 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1)); 1002 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2)); 1003 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3)); 1004 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4)); 1005 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5)); 1006 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6)); 1007 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7)); 1008 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8)); 1009 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9)); 1010 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10)); 1011 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11)); 1012 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12)); 1013 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13)); 1014 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14)); 1015 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15)); 1016 1017 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()); 1018 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1)); 1019 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2)); 1020 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3)); 1021 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4)); 1022 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5)); 1023 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6)); 1024 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7)); 1025 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8)); 1026 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9)); 1027 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10)); 1028 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11)); 1029 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12)); 1030 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13)); 1031 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14)); 1032 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15)); 1033 1034 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()); 1035 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1)); 1036 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2)); 1037 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3)); 1038 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4)); 1039 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5)); 1040 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6)); 1041 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7)); 1042 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8)); 1043 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9)); 1044 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10)); 1045 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11)); 1046 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12)); 1047 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13)); 1048 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14)); 1049 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15)); 1050 1051 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()); 1052 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1)); 1053 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2)); 1054 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3)); 1055 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4)); 1056 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5)); 1057 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6)); 1058 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7)); 1059 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8)); 1060 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9)); 1061 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10)); 1062 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11)); 1063 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12)); 1064 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13)); 1065 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14)); 1066 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15)); 1067 1068 #endif // _LP64 1069 1070 #endif // _WIN64 1071 1072 #ifdef _LP64 1073 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad()); 1074 #else 1075 reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad()); 1076 #endif // _LP64 1077 1078 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1079 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1080 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1081 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1082 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1083 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1084 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1085 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1086 #ifdef _LP64 1087 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1088 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1089 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1090 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1091 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1092 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1093 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1094 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1095 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 1096 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 1097 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 1098 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 1099 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 1100 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 1101 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 1102 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 1103 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 1104 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 1105 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 1106 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 1107 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 1108 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 1109 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 1110 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 1111 #endif 1112 ); 1113 1114 // flags allocation class should be last. 1115 alloc_class chunk2(RFLAGS); 1116 1117 // Singleton class for condition codes 1118 reg_class int_flags(RFLAGS); 1119 1120 // Class for pre evex float registers 1121 reg_class float_reg_legacy(XMM0, 1122 XMM1, 1123 XMM2, 1124 XMM3, 1125 XMM4, 1126 XMM5, 1127 XMM6, 1128 XMM7 1129 #ifdef _LP64 1130 ,XMM8, 1131 XMM9, 1132 XMM10, 1133 XMM11, 1134 XMM12, 1135 XMM13, 1136 XMM14, 1137 XMM15 1138 #endif 1139 ); 1140 1141 // Class for evex float registers 1142 reg_class float_reg_evex(XMM0, 1143 XMM1, 1144 XMM2, 1145 XMM3, 1146 XMM4, 1147 XMM5, 1148 XMM6, 1149 XMM7 1150 #ifdef _LP64 1151 ,XMM8, 1152 XMM9, 1153 XMM10, 1154 XMM11, 1155 XMM12, 1156 XMM13, 1157 XMM14, 1158 XMM15, 1159 XMM16, 1160 XMM17, 1161 XMM18, 1162 XMM19, 1163 XMM20, 1164 XMM21, 1165 XMM22, 1166 XMM23, 1167 XMM24, 1168 XMM25, 1169 XMM26, 1170 XMM27, 1171 XMM28, 1172 XMM29, 1173 XMM30, 1174 XMM31 1175 #endif 1176 ); 1177 1178 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} ); 1179 1180 // Class for pre evex double registers 1181 reg_class double_reg_legacy(XMM0, XMM0b, 1182 XMM1, XMM1b, 1183 XMM2, XMM2b, 1184 XMM3, XMM3b, 1185 XMM4, XMM4b, 1186 XMM5, XMM5b, 1187 XMM6, XMM6b, 1188 XMM7, XMM7b 1189 #ifdef _LP64 1190 ,XMM8, XMM8b, 1191 XMM9, XMM9b, 1192 XMM10, XMM10b, 1193 XMM11, XMM11b, 1194 XMM12, XMM12b, 1195 XMM13, XMM13b, 1196 XMM14, XMM14b, 1197 XMM15, XMM15b 1198 #endif 1199 ); 1200 1201 // Class for evex double registers 1202 reg_class double_reg_evex(XMM0, XMM0b, 1203 XMM1, XMM1b, 1204 XMM2, XMM2b, 1205 XMM3, XMM3b, 1206 XMM4, XMM4b, 1207 XMM5, XMM5b, 1208 XMM6, XMM6b, 1209 XMM7, XMM7b 1210 #ifdef _LP64 1211 ,XMM8, XMM8b, 1212 XMM9, XMM9b, 1213 XMM10, XMM10b, 1214 XMM11, XMM11b, 1215 XMM12, XMM12b, 1216 XMM13, XMM13b, 1217 XMM14, XMM14b, 1218 XMM15, XMM15b, 1219 XMM16, XMM16b, 1220 XMM17, XMM17b, 1221 XMM18, XMM18b, 1222 XMM19, XMM19b, 1223 XMM20, XMM20b, 1224 XMM21, XMM21b, 1225 XMM22, XMM22b, 1226 XMM23, XMM23b, 1227 XMM24, XMM24b, 1228 XMM25, XMM25b, 1229 XMM26, XMM26b, 1230 XMM27, XMM27b, 1231 XMM28, XMM28b, 1232 XMM29, XMM29b, 1233 XMM30, XMM30b, 1234 XMM31, XMM31b 1235 #endif 1236 ); 1237 1238 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} ); 1239 1240 // Class for pre evex 32bit vector registers 1241 reg_class vectors_reg_legacy(XMM0, 1242 XMM1, 1243 XMM2, 1244 XMM3, 1245 XMM4, 1246 XMM5, 1247 XMM6, 1248 XMM7 1249 #ifdef _LP64 1250 ,XMM8, 1251 XMM9, 1252 XMM10, 1253 XMM11, 1254 XMM12, 1255 XMM13, 1256 XMM14, 1257 XMM15 1258 #endif 1259 ); 1260 1261 // Class for evex 32bit vector registers 1262 reg_class vectors_reg_evex(XMM0, 1263 XMM1, 1264 XMM2, 1265 XMM3, 1266 XMM4, 1267 XMM5, 1268 XMM6, 1269 XMM7 1270 #ifdef _LP64 1271 ,XMM8, 1272 XMM9, 1273 XMM10, 1274 XMM11, 1275 XMM12, 1276 XMM13, 1277 XMM14, 1278 XMM15, 1279 XMM16, 1280 XMM17, 1281 XMM18, 1282 XMM19, 1283 XMM20, 1284 XMM21, 1285 XMM22, 1286 XMM23, 1287 XMM24, 1288 XMM25, 1289 XMM26, 1290 XMM27, 1291 XMM28, 1292 XMM29, 1293 XMM30, 1294 XMM31 1295 #endif 1296 ); 1297 1298 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} ); 1299 1300 // Class for all 64bit vector registers 1301 reg_class vectord_reg_legacy(XMM0, XMM0b, 1302 XMM1, XMM1b, 1303 XMM2, XMM2b, 1304 XMM3, XMM3b, 1305 XMM4, XMM4b, 1306 XMM5, XMM5b, 1307 XMM6, XMM6b, 1308 XMM7, XMM7b 1309 #ifdef _LP64 1310 ,XMM8, XMM8b, 1311 XMM9, XMM9b, 1312 XMM10, XMM10b, 1313 XMM11, XMM11b, 1314 XMM12, XMM12b, 1315 XMM13, XMM13b, 1316 XMM14, XMM14b, 1317 XMM15, XMM15b 1318 #endif 1319 ); 1320 1321 // Class for all 64bit vector registers 1322 reg_class vectord_reg_evex(XMM0, XMM0b, 1323 XMM1, XMM1b, 1324 XMM2, XMM2b, 1325 XMM3, XMM3b, 1326 XMM4, XMM4b, 1327 XMM5, XMM5b, 1328 XMM6, XMM6b, 1329 XMM7, XMM7b 1330 #ifdef _LP64 1331 ,XMM8, XMM8b, 1332 XMM9, XMM9b, 1333 XMM10, XMM10b, 1334 XMM11, XMM11b, 1335 XMM12, XMM12b, 1336 XMM13, XMM13b, 1337 XMM14, XMM14b, 1338 XMM15, XMM15b, 1339 XMM16, XMM16b, 1340 XMM17, XMM17b, 1341 XMM18, XMM18b, 1342 XMM19, XMM19b, 1343 XMM20, XMM20b, 1344 XMM21, XMM21b, 1345 XMM22, XMM22b, 1346 XMM23, XMM23b, 1347 XMM24, XMM24b, 1348 XMM25, XMM25b, 1349 XMM26, XMM26b, 1350 XMM27, XMM27b, 1351 XMM28, XMM28b, 1352 XMM29, XMM29b, 1353 XMM30, XMM30b, 1354 XMM31, XMM31b 1355 #endif 1356 ); 1357 1358 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} ); 1359 1360 // Class for all 128bit vector registers 1361 reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, 1362 XMM1, XMM1b, XMM1c, XMM1d, 1363 XMM2, XMM2b, XMM2c, XMM2d, 1364 XMM3, XMM3b, XMM3c, XMM3d, 1365 XMM4, XMM4b, XMM4c, XMM4d, 1366 XMM5, XMM5b, XMM5c, XMM5d, 1367 XMM6, XMM6b, XMM6c, XMM6d, 1368 XMM7, XMM7b, XMM7c, XMM7d 1369 #ifdef _LP64 1370 ,XMM8, XMM8b, XMM8c, XMM8d, 1371 XMM9, XMM9b, XMM9c, XMM9d, 1372 XMM10, XMM10b, XMM10c, XMM10d, 1373 XMM11, XMM11b, XMM11c, XMM11d, 1374 XMM12, XMM12b, XMM12c, XMM12d, 1375 XMM13, XMM13b, XMM13c, XMM13d, 1376 XMM14, XMM14b, XMM14c, XMM14d, 1377 XMM15, XMM15b, XMM15c, XMM15d 1378 #endif 1379 ); 1380 1381 // Class for all 128bit vector registers 1382 reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, 1383 XMM1, XMM1b, XMM1c, XMM1d, 1384 XMM2, XMM2b, XMM2c, XMM2d, 1385 XMM3, XMM3b, XMM3c, XMM3d, 1386 XMM4, XMM4b, XMM4c, XMM4d, 1387 XMM5, XMM5b, XMM5c, XMM5d, 1388 XMM6, XMM6b, XMM6c, XMM6d, 1389 XMM7, XMM7b, XMM7c, XMM7d 1390 #ifdef _LP64 1391 ,XMM8, XMM8b, XMM8c, XMM8d, 1392 XMM9, XMM9b, XMM9c, XMM9d, 1393 XMM10, XMM10b, XMM10c, XMM10d, 1394 XMM11, XMM11b, XMM11c, XMM11d, 1395 XMM12, XMM12b, XMM12c, XMM12d, 1396 XMM13, XMM13b, XMM13c, XMM13d, 1397 XMM14, XMM14b, XMM14c, XMM14d, 1398 XMM15, XMM15b, XMM15c, XMM15d, 1399 XMM16, XMM16b, XMM16c, XMM16d, 1400 XMM17, XMM17b, XMM17c, XMM17d, 1401 XMM18, XMM18b, XMM18c, XMM18d, 1402 XMM19, XMM19b, XMM19c, XMM19d, 1403 XMM20, XMM20b, XMM20c, XMM20d, 1404 XMM21, XMM21b, XMM21c, XMM21d, 1405 XMM22, XMM22b, XMM22c, XMM22d, 1406 XMM23, XMM23b, XMM23c, XMM23d, 1407 XMM24, XMM24b, XMM24c, XMM24d, 1408 XMM25, XMM25b, XMM25c, XMM25d, 1409 XMM26, XMM26b, XMM26c, XMM26d, 1410 XMM27, XMM27b, XMM27c, XMM27d, 1411 XMM28, XMM28b, XMM28c, XMM28d, 1412 XMM29, XMM29b, XMM29c, XMM29d, 1413 XMM30, XMM30b, XMM30c, XMM30d, 1414 XMM31, XMM31b, XMM31c, XMM31d 1415 #endif 1416 ); 1417 1418 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} ); 1419 1420 // Class for all 256bit vector registers 1421 reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 1422 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 1423 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 1424 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 1425 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 1426 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 1427 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 1428 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 1429 #ifdef _LP64 1430 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 1431 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 1432 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 1433 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 1434 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 1435 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1436 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1437 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h 1438 #endif 1439 ); 1440 1441 // Class for all 256bit vector registers 1442 reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 1443 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 1444 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 1445 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 1446 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 1447 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 1448 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 1449 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 1450 #ifdef _LP64 1451 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 1452 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 1453 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 1454 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 1455 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 1456 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1457 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1458 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, 1459 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, 1460 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, 1461 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, 1462 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, 1463 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, 1464 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, 1465 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, 1466 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, 1467 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, 1468 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, 1469 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, 1470 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, 1471 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, 1472 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, 1473 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, 1474 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h 1475 #endif 1476 ); 1477 1478 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} ); 1479 1480 // Class for all 512bit vector registers 1481 reg_class vectorz_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1482 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1483 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1484 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1485 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1486 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1487 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1488 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1489 #ifdef _LP64 1490 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1491 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1492 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1493 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1494 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1495 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1496 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1497 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1498 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 1499 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 1500 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 1501 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 1502 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 1503 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 1504 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 1505 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 1506 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 1507 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 1508 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 1509 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 1510 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 1511 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 1512 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 1513 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 1514 #endif 1515 ); 1516 1517 %} 1518 1519 1520 //----------SOURCE BLOCK------------------------------------------------------- 1521 // This is a block of C++ code which provides values, functions, and 1522 // definitions necessary in the rest of the architecture description 1523 1524 source_hpp %{ 1525 // Header information of the source block. 1526 // Method declarations/definitions which are used outside 1527 // the ad-scope can conveniently be defined here. 1528 // 1529 // To keep related declarations/definitions/uses close together, 1530 // we switch between source %{ }% and source_hpp %{ }% freely as needed. 1531 1532 class NativeJump; 1533 1534 class CallStubImpl { 1535 1536 //-------------------------------------------------------------- 1537 //---< Used for optimization in Compile::shorten_branches >--- 1538 //-------------------------------------------------------------- 1539 1540 public: 1541 // Size of call trampoline stub. 1542 static uint size_call_trampoline() { 1543 return 0; // no call trampolines on this platform 1544 } 1545 1546 // number of relocations needed by a call trampoline stub 1547 static uint reloc_call_trampoline() { 1548 return 0; // no call trampolines on this platform 1549 } 1550 }; 1551 1552 class HandlerImpl { 1553 1554 public: 1555 1556 static int emit_exception_handler(CodeBuffer &cbuf); 1557 static int emit_deopt_handler(CodeBuffer& cbuf); 1558 1559 static uint size_exception_handler() { 1560 // NativeCall instruction size is the same as NativeJump. 1561 // exception handler starts out as jump and can be patched to 1562 // a call be deoptimization. (4932387) 1563 // Note that this value is also credited (in output.cpp) to 1564 // the size of the code section. 1565 return NativeJump::instruction_size; 1566 } 1567 1568 #ifdef _LP64 1569 static uint size_deopt_handler() { 1570 // three 5 byte instructions 1571 return 15; 1572 } 1573 #else 1574 static uint size_deopt_handler() { 1575 // NativeCall instruction size is the same as NativeJump. 1576 // exception handler starts out as jump and can be patched to 1577 // a call be deoptimization. (4932387) 1578 // Note that this value is also credited (in output.cpp) to 1579 // the size of the code section. 1580 return 5 + NativeJump::instruction_size; // pushl(); jmp; 1581 } 1582 #endif 1583 }; 1584 1585 %} // end source_hpp 1586 1587 source %{ 1588 1589 #include "opto/addnode.hpp" 1590 1591 // Emit exception handler code. 1592 // Stuff framesize into a register and call a VM stub routine. 1593 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) { 1594 1595 // Note that the code buffer's insts_mark is always relative to insts. 1596 // That's why we must use the macroassembler to generate a handler. 1597 MacroAssembler _masm(&cbuf); 1598 address base = __ start_a_stub(size_exception_handler()); 1599 if (base == NULL) { 1600 ciEnv::current()->record_failure("CodeCache is full"); 1601 return 0; // CodeBuffer::expand failed 1602 } 1603 int offset = __ offset(); 1604 __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point())); 1605 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); 1606 __ end_a_stub(); 1607 return offset; 1608 } 1609 1610 // Emit deopt handler code. 1611 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) { 1612 1613 // Note that the code buffer's insts_mark is always relative to insts. 1614 // That's why we must use the macroassembler to generate a handler. 1615 MacroAssembler _masm(&cbuf); 1616 address base = __ start_a_stub(size_deopt_handler()); 1617 if (base == NULL) { 1618 ciEnv::current()->record_failure("CodeCache is full"); 1619 return 0; // CodeBuffer::expand failed 1620 } 1621 int offset = __ offset(); 1622 1623 #ifdef _LP64 1624 address the_pc = (address) __ pc(); 1625 Label next; 1626 // push a "the_pc" on the stack without destroying any registers 1627 // as they all may be live. 1628 1629 // push address of "next" 1630 __ call(next, relocInfo::none); // reloc none is fine since it is a disp32 1631 __ bind(next); 1632 // adjust it so it matches "the_pc" 1633 __ subptr(Address(rsp, 0), __ offset() - offset); 1634 #else 1635 InternalAddress here(__ pc()); 1636 __ pushptr(here.addr()); 1637 #endif 1638 1639 __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack())); 1640 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow"); 1641 __ end_a_stub(); 1642 return offset; 1643 } 1644 1645 1646 //============================================================================= 1647 1648 // Float masks come from different places depending on platform. 1649 #ifdef _LP64 1650 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); } 1651 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); } 1652 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); } 1653 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); } 1654 #else 1655 static address float_signmask() { return (address)float_signmask_pool; } 1656 static address float_signflip() { return (address)float_signflip_pool; } 1657 static address double_signmask() { return (address)double_signmask_pool; } 1658 static address double_signflip() { return (address)double_signflip_pool; } 1659 #endif 1660 1661 1662 const bool Matcher::match_rule_supported(int opcode) { 1663 if (!has_match_rule(opcode)) 1664 return false; 1665 1666 bool ret_value = true; 1667 switch (opcode) { 1668 case Op_PopCountI: 1669 case Op_PopCountL: 1670 if (!UsePopCountInstruction) 1671 ret_value = false; 1672 break; 1673 case Op_MulVI: 1674 if ((UseSSE < 4) && (UseAVX < 1)) // only with SSE4_1 or AVX 1675 ret_value = false; 1676 break; 1677 case Op_MulVL: 1678 case Op_MulReductionVL: 1679 if (VM_Version::supports_avx512dq() == false) 1680 ret_value = false; 1681 break; 1682 case Op_AddReductionVL: 1683 if (UseAVX < 3) // only EVEX : vector connectivity becomes an issue here 1684 ret_value = false; 1685 break; 1686 case Op_AddReductionVI: 1687 if (UseSSE < 3) // requires at least SSE3 1688 ret_value = false; 1689 break; 1690 case Op_MulReductionVI: 1691 if (UseSSE < 4) // requires at least SSE4 1692 ret_value = false; 1693 break; 1694 case Op_AddReductionVF: 1695 case Op_AddReductionVD: 1696 case Op_MulReductionVF: 1697 case Op_MulReductionVD: 1698 if (UseSSE < 1) // requires at least SSE 1699 ret_value = false; 1700 break; 1701 case Op_SqrtVD: 1702 if (UseAVX < 1) // enabled for AVX only 1703 ret_value = false; 1704 break; 1705 case Op_CompareAndSwapL: 1706 #ifdef _LP64 1707 case Op_CompareAndSwapP: 1708 #endif 1709 if (!VM_Version::supports_cx8()) 1710 ret_value = false; 1711 break; 1712 case Op_CMoveVD: 1713 if (UseAVX < 1 || UseAVX > 2) 1714 ret_value = false; 1715 break; 1716 case Op_StrIndexOf: 1717 if (!UseSSE42Intrinsics) 1718 ret_value = false; 1719 break; 1720 case Op_StrIndexOfChar: 1721 if (!UseSSE42Intrinsics) 1722 ret_value = false; 1723 break; 1724 case Op_OnSpinWait: 1725 if (VM_Version::supports_on_spin_wait() == false) 1726 ret_value = false; 1727 break; 1728 } 1729 1730 return ret_value; // Per default match rules are supported. 1731 } 1732 1733 const bool Matcher::match_rule_supported_vector(int opcode, int vlen) { 1734 // identify extra cases that we might want to provide match rules for 1735 // e.g. Op_ vector nodes and other intrinsics while guarding with vlen 1736 bool ret_value = match_rule_supported(opcode); 1737 if (ret_value) { 1738 switch (opcode) { 1739 case Op_AddVB: 1740 case Op_SubVB: 1741 if ((vlen == 64) && (VM_Version::supports_avx512bw() == false)) 1742 ret_value = false; 1743 break; 1744 case Op_URShiftVS: 1745 case Op_RShiftVS: 1746 case Op_LShiftVS: 1747 case Op_MulVS: 1748 case Op_AddVS: 1749 case Op_SubVS: 1750 if ((vlen == 32) && (VM_Version::supports_avx512bw() == false)) 1751 ret_value = false; 1752 break; 1753 case Op_CMoveVD: 1754 if (vlen != 4) 1755 ret_value = false; 1756 break; 1757 } 1758 } 1759 1760 return ret_value; // Per default match rules are supported. 1761 } 1762 1763 const bool Matcher::has_predicated_vectors(void) { 1764 bool ret_value = false; 1765 if (UseAVX > 2) { 1766 ret_value = VM_Version::supports_avx512vl(); 1767 } 1768 1769 return ret_value; 1770 } 1771 1772 const int Matcher::float_pressure(int default_pressure_threshold) { 1773 int float_pressure_threshold = default_pressure_threshold; 1774 #ifdef _LP64 1775 if (UseAVX > 2) { 1776 // Increase pressure threshold on machines with AVX3 which have 1777 // 2x more XMM registers. 1778 float_pressure_threshold = default_pressure_threshold * 2; 1779 } 1780 #endif 1781 return float_pressure_threshold; 1782 } 1783 1784 // Max vector size in bytes. 0 if not supported. 1785 const int Matcher::vector_width_in_bytes(BasicType bt) { 1786 assert(is_java_primitive(bt), "only primitive type vectors"); 1787 if (UseSSE < 2) return 0; 1788 // SSE2 supports 128bit vectors for all types. 1789 // AVX2 supports 256bit vectors for all types. 1790 // AVX2/EVEX supports 512bit vectors for all types. 1791 int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16; 1792 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE. 1793 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE)) 1794 size = (UseAVX > 2) ? 64 : 32; 1795 // Use flag to limit vector size. 1796 size = MIN2(size,(int)MaxVectorSize); 1797 // Minimum 2 values in vector (or 4 for bytes). 1798 switch (bt) { 1799 case T_DOUBLE: 1800 case T_LONG: 1801 if (size < 16) return 0; 1802 break; 1803 case T_FLOAT: 1804 case T_INT: 1805 if (size < 8) return 0; 1806 break; 1807 case T_BOOLEAN: 1808 if (size < 4) return 0; 1809 break; 1810 case T_CHAR: 1811 if (size < 4) return 0; 1812 break; 1813 case T_BYTE: 1814 if (size < 4) return 0; 1815 break; 1816 case T_SHORT: 1817 if (size < 4) return 0; 1818 break; 1819 default: 1820 ShouldNotReachHere(); 1821 } 1822 return size; 1823 } 1824 1825 // Limits on vector size (number of elements) loaded into vector. 1826 const int Matcher::max_vector_size(const BasicType bt) { 1827 return vector_width_in_bytes(bt)/type2aelembytes(bt); 1828 } 1829 const int Matcher::min_vector_size(const BasicType bt) { 1830 int max_size = max_vector_size(bt); 1831 // Min size which can be loaded into vector is 4 bytes. 1832 int size = (type2aelembytes(bt) == 1) ? 4 : 2; 1833 return MIN2(size,max_size); 1834 } 1835 1836 // Vector ideal reg corresponding to specidied size in bytes 1837 const int Matcher::vector_ideal_reg(int size) { 1838 assert(MaxVectorSize >= size, ""); 1839 switch(size) { 1840 case 4: return Op_VecS; 1841 case 8: return Op_VecD; 1842 case 16: return Op_VecX; 1843 case 32: return Op_VecY; 1844 case 64: return Op_VecZ; 1845 } 1846 ShouldNotReachHere(); 1847 return 0; 1848 } 1849 1850 // Only lowest bits of xmm reg are used for vector shift count. 1851 const int Matcher::vector_shift_count_ideal_reg(int size) { 1852 return Op_VecS; 1853 } 1854 1855 // x86 supports misaligned vectors store/load. 1856 const bool Matcher::misaligned_vectors_ok() { 1857 return !AlignVector; // can be changed by flag 1858 } 1859 1860 // x86 AES instructions are compatible with SunJCE expanded 1861 // keys, hence we do not need to pass the original key to stubs 1862 const bool Matcher::pass_original_key_for_aes() { 1863 return false; 1864 } 1865 1866 1867 const bool Matcher::convi2l_type_required = true; 1868 1869 // Check for shift by small constant as well 1870 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) { 1871 if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() && 1872 shift->in(2)->get_int() <= 3 && 1873 // Are there other uses besides address expressions? 1874 !matcher->is_visited(shift)) { 1875 address_visited.set(shift->_idx); // Flag as address_visited 1876 mstack.push(shift->in(2), Matcher::Visit); 1877 Node *conv = shift->in(1); 1878 #ifdef _LP64 1879 // Allow Matcher to match the rule which bypass 1880 // ConvI2L operation for an array index on LP64 1881 // if the index value is positive. 1882 if (conv->Opcode() == Op_ConvI2L && 1883 conv->as_Type()->type()->is_long()->_lo >= 0 && 1884 // Are there other uses besides address expressions? 1885 !matcher->is_visited(conv)) { 1886 address_visited.set(conv->_idx); // Flag as address_visited 1887 mstack.push(conv->in(1), Matcher::Pre_Visit); 1888 } else 1889 #endif 1890 mstack.push(conv, Matcher::Pre_Visit); 1891 return true; 1892 } 1893 return false; 1894 } 1895 1896 // Should the Matcher clone shifts on addressing modes, expecting them 1897 // to be subsumed into complex addressing expressions or compute them 1898 // into registers? 1899 bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { 1900 Node *off = m->in(AddPNode::Offset); 1901 if (off->is_Con()) { 1902 address_visited.test_set(m->_idx); // Flag as address_visited 1903 Node *adr = m->in(AddPNode::Address); 1904 1905 // Intel can handle 2 adds in addressing mode 1906 // AtomicAdd is not an addressing expression. 1907 // Cheap to find it by looking for screwy base. 1908 if (adr->is_AddP() && 1909 !adr->in(AddPNode::Base)->is_top() && 1910 // Are there other uses besides address expressions? 1911 !is_visited(adr)) { 1912 address_visited.set(adr->_idx); // Flag as address_visited 1913 Node *shift = adr->in(AddPNode::Offset); 1914 if (!clone_shift(shift, this, mstack, address_visited)) { 1915 mstack.push(shift, Pre_Visit); 1916 } 1917 mstack.push(adr->in(AddPNode::Address), Pre_Visit); 1918 mstack.push(adr->in(AddPNode::Base), Pre_Visit); 1919 } else { 1920 mstack.push(adr, Pre_Visit); 1921 } 1922 1923 // Clone X+offset as it also folds into most addressing expressions 1924 mstack.push(off, Visit); 1925 mstack.push(m->in(AddPNode::Base), Pre_Visit); 1926 return true; 1927 } else if (clone_shift(off, this, mstack, address_visited)) { 1928 address_visited.test_set(m->_idx); // Flag as address_visited 1929 mstack.push(m->in(AddPNode::Address), Pre_Visit); 1930 mstack.push(m->in(AddPNode::Base), Pre_Visit); 1931 return true; 1932 } 1933 return false; 1934 } 1935 1936 void Compile::reshape_address(AddPNode* addp) { 1937 } 1938 1939 // Helper methods for MachSpillCopyNode::implementation(). 1940 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 1941 int src_hi, int dst_hi, uint ireg, outputStream* st) { 1942 // In 64-bit VM size calculation is very complex. Emitting instructions 1943 // into scratch buffer is used to get size in 64-bit VM. 1944 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 1945 assert(ireg == Op_VecS || // 32bit vector 1946 (src_lo & 1) == 0 && (src_lo + 1) == src_hi && 1947 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi, 1948 "no non-adjacent vector moves" ); 1949 if (cbuf) { 1950 MacroAssembler _masm(cbuf); 1951 int offset = __ offset(); 1952 switch (ireg) { 1953 case Op_VecS: // copy whole register 1954 case Op_VecD: 1955 case Op_VecX: 1956 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1957 break; 1958 case Op_VecY: 1959 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1960 break; 1961 case Op_VecZ: 1962 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2); 1963 break; 1964 default: 1965 ShouldNotReachHere(); 1966 } 1967 int size = __ offset() - offset; 1968 #ifdef ASSERT 1969 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1970 assert(!do_size || size == 4, "incorrect size calculattion"); 1971 #endif 1972 return size; 1973 #ifndef PRODUCT 1974 } else if (!do_size) { 1975 switch (ireg) { 1976 case Op_VecS: 1977 case Op_VecD: 1978 case Op_VecX: 1979 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 1980 break; 1981 case Op_VecY: 1982 case Op_VecZ: 1983 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 1984 break; 1985 default: 1986 ShouldNotReachHere(); 1987 } 1988 #endif 1989 } 1990 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 1991 return (UseAVX > 2) ? 6 : 4; 1992 } 1993 1994 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, 1995 int stack_offset, int reg, uint ireg, outputStream* st) { 1996 // In 64-bit VM size calculation is very complex. Emitting instructions 1997 // into scratch buffer is used to get size in 64-bit VM. 1998 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 1999 if (cbuf) { 2000 MacroAssembler _masm(cbuf); 2001 int offset = __ offset(); 2002 if (is_load) { 2003 switch (ireg) { 2004 case Op_VecS: 2005 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2006 break; 2007 case Op_VecD: 2008 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2009 break; 2010 case Op_VecX: 2011 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2012 break; 2013 case Op_VecY: 2014 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2015 break; 2016 case Op_VecZ: 2017 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2); 2018 break; 2019 default: 2020 ShouldNotReachHere(); 2021 } 2022 } else { // store 2023 switch (ireg) { 2024 case Op_VecS: 2025 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2026 break; 2027 case Op_VecD: 2028 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2029 break; 2030 case Op_VecX: 2031 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2032 break; 2033 case Op_VecY: 2034 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2035 break; 2036 case Op_VecZ: 2037 __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2); 2038 break; 2039 default: 2040 ShouldNotReachHere(); 2041 } 2042 } 2043 int size = __ offset() - offset; 2044 #ifdef ASSERT 2045 int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : (UseAVX > 2) ? 6 : 4); 2046 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 2047 assert(!do_size || size == (5+offset_size), "incorrect size calculattion"); 2048 #endif 2049 return size; 2050 #ifndef PRODUCT 2051 } else if (!do_size) { 2052 if (is_load) { 2053 switch (ireg) { 2054 case Op_VecS: 2055 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2056 break; 2057 case Op_VecD: 2058 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2059 break; 2060 case Op_VecX: 2061 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2062 break; 2063 case Op_VecY: 2064 case Op_VecZ: 2065 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2066 break; 2067 default: 2068 ShouldNotReachHere(); 2069 } 2070 } else { // store 2071 switch (ireg) { 2072 case Op_VecS: 2073 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2074 break; 2075 case Op_VecD: 2076 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2077 break; 2078 case Op_VecX: 2079 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2080 break; 2081 case Op_VecY: 2082 case Op_VecZ: 2083 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2084 break; 2085 default: 2086 ShouldNotReachHere(); 2087 } 2088 } 2089 #endif 2090 } 2091 bool is_single_byte = false; 2092 int vec_len = 0; 2093 if ((UseAVX > 2) && (stack_offset != 0)) { 2094 int tuple_type = Assembler::EVEX_FVM; 2095 int input_size = Assembler::EVEX_32bit; 2096 switch (ireg) { 2097 case Op_VecS: 2098 tuple_type = Assembler::EVEX_T1S; 2099 break; 2100 case Op_VecD: 2101 tuple_type = Assembler::EVEX_T1S; 2102 input_size = Assembler::EVEX_64bit; 2103 break; 2104 case Op_VecX: 2105 break; 2106 case Op_VecY: 2107 vec_len = 1; 2108 break; 2109 case Op_VecZ: 2110 vec_len = 2; 2111 break; 2112 } 2113 is_single_byte = Assembler::query_compressed_disp_byte(stack_offset, true, vec_len, tuple_type, input_size, 0); 2114 } 2115 int offset_size = 0; 2116 int size = 5; 2117 if (UseAVX > 2 ) { 2118 if (VM_Version::supports_avx512novl() && (vec_len == 2)) { 2119 offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 2120 size += 2; // Need an additional two bytes for EVEX encoding 2121 } else if (VM_Version::supports_avx512novl() && (vec_len < 2)) { 2122 offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4); 2123 } else { 2124 offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 2125 size += 2; // Need an additional two bytes for EVEX encodding 2126 } 2127 } else { 2128 offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4); 2129 } 2130 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 2131 return size+offset_size; 2132 } 2133 2134 static inline jint replicate4_imm(int con, int width) { 2135 // Load a constant of "width" (in bytes) and replicate it to fill 32bit. 2136 assert(width == 1 || width == 2, "only byte or short types here"); 2137 int bit_width = width * 8; 2138 jint val = con; 2139 val &= (1 << bit_width) - 1; // mask off sign bits 2140 while(bit_width < 32) { 2141 val |= (val << bit_width); 2142 bit_width <<= 1; 2143 } 2144 return val; 2145 } 2146 2147 static inline jlong replicate8_imm(int con, int width) { 2148 // Load a constant of "width" (in bytes) and replicate it to fill 64bit. 2149 assert(width == 1 || width == 2 || width == 4, "only byte, short or int types here"); 2150 int bit_width = width * 8; 2151 jlong val = con; 2152 val &= (((jlong) 1) << bit_width) - 1; // mask off sign bits 2153 while(bit_width < 64) { 2154 val |= (val << bit_width); 2155 bit_width <<= 1; 2156 } 2157 return val; 2158 } 2159 2160 #ifndef PRODUCT 2161 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const { 2162 st->print("nop \t# %d bytes pad for loops and calls", _count); 2163 } 2164 #endif 2165 2166 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const { 2167 MacroAssembler _masm(&cbuf); 2168 __ nop(_count); 2169 } 2170 2171 uint MachNopNode::size(PhaseRegAlloc*) const { 2172 return _count; 2173 } 2174 2175 #ifndef PRODUCT 2176 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const { 2177 st->print("# breakpoint"); 2178 } 2179 #endif 2180 2181 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const { 2182 MacroAssembler _masm(&cbuf); 2183 __ int3(); 2184 } 2185 2186 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const { 2187 return MachNode::size(ra_); 2188 } 2189 2190 %} 2191 2192 encode %{ 2193 2194 enc_class call_epilog %{ 2195 if (VerifyStackAtCalls) { 2196 // Check that stack depth is unchanged: find majik cookie on stack 2197 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word)); 2198 MacroAssembler _masm(&cbuf); 2199 Label L; 2200 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d); 2201 __ jccb(Assembler::equal, L); 2202 // Die if stack mismatch 2203 __ int3(); 2204 __ bind(L); 2205 } 2206 %} 2207 2208 %} 2209 2210 2211 //----------OPERANDS----------------------------------------------------------- 2212 // Operand definitions must precede instruction definitions for correct parsing 2213 // in the ADLC because operands constitute user defined types which are used in 2214 // instruction definitions. 2215 2216 // This one generically applies only for evex, so only one version 2217 operand vecZ() %{ 2218 constraint(ALLOC_IN_RC(vectorz_reg)); 2219 match(VecZ); 2220 2221 format %{ %} 2222 interface(REG_INTER); 2223 %} 2224 2225 // Comparison Code for FP conditional move 2226 operand cmpOp_vcmppd() %{ 2227 match(Bool); 2228 2229 predicate(n->as_Bool()->_test._test != BoolTest::overflow && 2230 n->as_Bool()->_test._test != BoolTest::no_overflow); 2231 format %{ "" %} 2232 interface(COND_INTER) %{ 2233 equal (0x0, "eq"); 2234 less (0x1, "lt"); 2235 less_equal (0x2, "le"); 2236 not_equal (0xC, "ne"); 2237 greater_equal(0xD, "ge"); 2238 greater (0xE, "gt"); 2239 //TODO cannot compile (adlc breaks) without two next lines with error: 2240 // x86_64.ad(13987) Syntax Error: :In operand cmpOp_vcmppd: Do not support this encode constant: ' %{ 2241 // equal' for overflow. 2242 overflow (0x20, "o"); // not really supported by the instruction 2243 no_overflow (0x21, "no"); // not really supported by the instruction 2244 %} 2245 %} 2246 2247 2248 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit) 2249 2250 // ============================================================================ 2251 2252 instruct ShouldNotReachHere() %{ 2253 match(Halt); 2254 format %{ "int3\t# ShouldNotReachHere" %} 2255 ins_encode %{ 2256 __ int3(); 2257 %} 2258 ins_pipe(pipe_slow); 2259 %} 2260 2261 // =================================EVEX special=============================== 2262 2263 instruct setMask(rRegI dst, rRegI src) %{ 2264 predicate(Matcher::has_predicated_vectors()); 2265 match(Set dst (SetVectMaskI src)); 2266 effect(TEMP dst); 2267 format %{ "setvectmask $dst, $src" %} 2268 ins_encode %{ 2269 __ setvectmask($dst$$Register, $src$$Register); 2270 %} 2271 ins_pipe(pipe_slow); 2272 %} 2273 2274 // ============================================================================ 2275 2276 instruct addF_reg(regF dst, regF src) %{ 2277 predicate((UseSSE>=1) && (UseAVX == 0)); 2278 match(Set dst (AddF dst src)); 2279 2280 format %{ "addss $dst, $src" %} 2281 ins_cost(150); 2282 ins_encode %{ 2283 __ addss($dst$$XMMRegister, $src$$XMMRegister); 2284 %} 2285 ins_pipe(pipe_slow); 2286 %} 2287 2288 instruct addF_mem(regF dst, memory src) %{ 2289 predicate((UseSSE>=1) && (UseAVX == 0)); 2290 match(Set dst (AddF dst (LoadF src))); 2291 2292 format %{ "addss $dst, $src" %} 2293 ins_cost(150); 2294 ins_encode %{ 2295 __ addss($dst$$XMMRegister, $src$$Address); 2296 %} 2297 ins_pipe(pipe_slow); 2298 %} 2299 2300 instruct addF_imm(regF dst, immF con) %{ 2301 predicate((UseSSE>=1) && (UseAVX == 0)); 2302 match(Set dst (AddF dst con)); 2303 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2304 ins_cost(150); 2305 ins_encode %{ 2306 __ addss($dst$$XMMRegister, $constantaddress($con)); 2307 %} 2308 ins_pipe(pipe_slow); 2309 %} 2310 2311 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{ 2312 predicate(UseAVX > 0); 2313 match(Set dst (AddF src1 src2)); 2314 2315 format %{ "vaddss $dst, $src1, $src2" %} 2316 ins_cost(150); 2317 ins_encode %{ 2318 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2319 %} 2320 ins_pipe(pipe_slow); 2321 %} 2322 2323 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{ 2324 predicate(UseAVX > 0); 2325 match(Set dst (AddF src1 (LoadF src2))); 2326 2327 format %{ "vaddss $dst, $src1, $src2" %} 2328 ins_cost(150); 2329 ins_encode %{ 2330 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2331 %} 2332 ins_pipe(pipe_slow); 2333 %} 2334 2335 instruct addF_reg_imm(regF dst, regF src, immF con) %{ 2336 predicate(UseAVX > 0); 2337 match(Set dst (AddF src con)); 2338 2339 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2340 ins_cost(150); 2341 ins_encode %{ 2342 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2343 %} 2344 ins_pipe(pipe_slow); 2345 %} 2346 2347 instruct addD_reg(regD dst, regD src) %{ 2348 predicate((UseSSE>=2) && (UseAVX == 0)); 2349 match(Set dst (AddD dst src)); 2350 2351 format %{ "addsd $dst, $src" %} 2352 ins_cost(150); 2353 ins_encode %{ 2354 __ addsd($dst$$XMMRegister, $src$$XMMRegister); 2355 %} 2356 ins_pipe(pipe_slow); 2357 %} 2358 2359 instruct addD_mem(regD dst, memory src) %{ 2360 predicate((UseSSE>=2) && (UseAVX == 0)); 2361 match(Set dst (AddD dst (LoadD src))); 2362 2363 format %{ "addsd $dst, $src" %} 2364 ins_cost(150); 2365 ins_encode %{ 2366 __ addsd($dst$$XMMRegister, $src$$Address); 2367 %} 2368 ins_pipe(pipe_slow); 2369 %} 2370 2371 instruct addD_imm(regD dst, immD con) %{ 2372 predicate((UseSSE>=2) && (UseAVX == 0)); 2373 match(Set dst (AddD dst con)); 2374 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2375 ins_cost(150); 2376 ins_encode %{ 2377 __ addsd($dst$$XMMRegister, $constantaddress($con)); 2378 %} 2379 ins_pipe(pipe_slow); 2380 %} 2381 2382 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{ 2383 predicate(UseAVX > 0); 2384 match(Set dst (AddD src1 src2)); 2385 2386 format %{ "vaddsd $dst, $src1, $src2" %} 2387 ins_cost(150); 2388 ins_encode %{ 2389 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2390 %} 2391 ins_pipe(pipe_slow); 2392 %} 2393 2394 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{ 2395 predicate(UseAVX > 0); 2396 match(Set dst (AddD src1 (LoadD src2))); 2397 2398 format %{ "vaddsd $dst, $src1, $src2" %} 2399 ins_cost(150); 2400 ins_encode %{ 2401 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2402 %} 2403 ins_pipe(pipe_slow); 2404 %} 2405 2406 instruct addD_reg_imm(regD dst, regD src, immD con) %{ 2407 predicate(UseAVX > 0); 2408 match(Set dst (AddD src con)); 2409 2410 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2411 ins_cost(150); 2412 ins_encode %{ 2413 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2414 %} 2415 ins_pipe(pipe_slow); 2416 %} 2417 2418 instruct subF_reg(regF dst, regF src) %{ 2419 predicate((UseSSE>=1) && (UseAVX == 0)); 2420 match(Set dst (SubF dst src)); 2421 2422 format %{ "subss $dst, $src" %} 2423 ins_cost(150); 2424 ins_encode %{ 2425 __ subss($dst$$XMMRegister, $src$$XMMRegister); 2426 %} 2427 ins_pipe(pipe_slow); 2428 %} 2429 2430 instruct subF_mem(regF dst, memory src) %{ 2431 predicate((UseSSE>=1) && (UseAVX == 0)); 2432 match(Set dst (SubF dst (LoadF src))); 2433 2434 format %{ "subss $dst, $src" %} 2435 ins_cost(150); 2436 ins_encode %{ 2437 __ subss($dst$$XMMRegister, $src$$Address); 2438 %} 2439 ins_pipe(pipe_slow); 2440 %} 2441 2442 instruct subF_imm(regF dst, immF con) %{ 2443 predicate((UseSSE>=1) && (UseAVX == 0)); 2444 match(Set dst (SubF dst con)); 2445 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2446 ins_cost(150); 2447 ins_encode %{ 2448 __ subss($dst$$XMMRegister, $constantaddress($con)); 2449 %} 2450 ins_pipe(pipe_slow); 2451 %} 2452 2453 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{ 2454 predicate(UseAVX > 0); 2455 match(Set dst (SubF src1 src2)); 2456 2457 format %{ "vsubss $dst, $src1, $src2" %} 2458 ins_cost(150); 2459 ins_encode %{ 2460 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2461 %} 2462 ins_pipe(pipe_slow); 2463 %} 2464 2465 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{ 2466 predicate(UseAVX > 0); 2467 match(Set dst (SubF src1 (LoadF src2))); 2468 2469 format %{ "vsubss $dst, $src1, $src2" %} 2470 ins_cost(150); 2471 ins_encode %{ 2472 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2473 %} 2474 ins_pipe(pipe_slow); 2475 %} 2476 2477 instruct subF_reg_imm(regF dst, regF src, immF con) %{ 2478 predicate(UseAVX > 0); 2479 match(Set dst (SubF src con)); 2480 2481 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2482 ins_cost(150); 2483 ins_encode %{ 2484 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2485 %} 2486 ins_pipe(pipe_slow); 2487 %} 2488 2489 instruct subD_reg(regD dst, regD src) %{ 2490 predicate((UseSSE>=2) && (UseAVX == 0)); 2491 match(Set dst (SubD dst src)); 2492 2493 format %{ "subsd $dst, $src" %} 2494 ins_cost(150); 2495 ins_encode %{ 2496 __ subsd($dst$$XMMRegister, $src$$XMMRegister); 2497 %} 2498 ins_pipe(pipe_slow); 2499 %} 2500 2501 instruct subD_mem(regD dst, memory src) %{ 2502 predicate((UseSSE>=2) && (UseAVX == 0)); 2503 match(Set dst (SubD dst (LoadD src))); 2504 2505 format %{ "subsd $dst, $src" %} 2506 ins_cost(150); 2507 ins_encode %{ 2508 __ subsd($dst$$XMMRegister, $src$$Address); 2509 %} 2510 ins_pipe(pipe_slow); 2511 %} 2512 2513 instruct subD_imm(regD dst, immD con) %{ 2514 predicate((UseSSE>=2) && (UseAVX == 0)); 2515 match(Set dst (SubD dst con)); 2516 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2517 ins_cost(150); 2518 ins_encode %{ 2519 __ subsd($dst$$XMMRegister, $constantaddress($con)); 2520 %} 2521 ins_pipe(pipe_slow); 2522 %} 2523 2524 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{ 2525 predicate(UseAVX > 0); 2526 match(Set dst (SubD src1 src2)); 2527 2528 format %{ "vsubsd $dst, $src1, $src2" %} 2529 ins_cost(150); 2530 ins_encode %{ 2531 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2532 %} 2533 ins_pipe(pipe_slow); 2534 %} 2535 2536 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{ 2537 predicate(UseAVX > 0); 2538 match(Set dst (SubD src1 (LoadD src2))); 2539 2540 format %{ "vsubsd $dst, $src1, $src2" %} 2541 ins_cost(150); 2542 ins_encode %{ 2543 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2544 %} 2545 ins_pipe(pipe_slow); 2546 %} 2547 2548 instruct subD_reg_imm(regD dst, regD src, immD con) %{ 2549 predicate(UseAVX > 0); 2550 match(Set dst (SubD src con)); 2551 2552 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2553 ins_cost(150); 2554 ins_encode %{ 2555 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2556 %} 2557 ins_pipe(pipe_slow); 2558 %} 2559 2560 instruct mulF_reg(regF dst, regF src) %{ 2561 predicate((UseSSE>=1) && (UseAVX == 0)); 2562 match(Set dst (MulF dst src)); 2563 2564 format %{ "mulss $dst, $src" %} 2565 ins_cost(150); 2566 ins_encode %{ 2567 __ mulss($dst$$XMMRegister, $src$$XMMRegister); 2568 %} 2569 ins_pipe(pipe_slow); 2570 %} 2571 2572 instruct mulF_mem(regF dst, memory src) %{ 2573 predicate((UseSSE>=1) && (UseAVX == 0)); 2574 match(Set dst (MulF dst (LoadF src))); 2575 2576 format %{ "mulss $dst, $src" %} 2577 ins_cost(150); 2578 ins_encode %{ 2579 __ mulss($dst$$XMMRegister, $src$$Address); 2580 %} 2581 ins_pipe(pipe_slow); 2582 %} 2583 2584 instruct mulF_imm(regF dst, immF con) %{ 2585 predicate((UseSSE>=1) && (UseAVX == 0)); 2586 match(Set dst (MulF dst con)); 2587 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2588 ins_cost(150); 2589 ins_encode %{ 2590 __ mulss($dst$$XMMRegister, $constantaddress($con)); 2591 %} 2592 ins_pipe(pipe_slow); 2593 %} 2594 2595 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{ 2596 predicate(UseAVX > 0); 2597 match(Set dst (MulF src1 src2)); 2598 2599 format %{ "vmulss $dst, $src1, $src2" %} 2600 ins_cost(150); 2601 ins_encode %{ 2602 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2603 %} 2604 ins_pipe(pipe_slow); 2605 %} 2606 2607 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{ 2608 predicate(UseAVX > 0); 2609 match(Set dst (MulF src1 (LoadF src2))); 2610 2611 format %{ "vmulss $dst, $src1, $src2" %} 2612 ins_cost(150); 2613 ins_encode %{ 2614 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2615 %} 2616 ins_pipe(pipe_slow); 2617 %} 2618 2619 instruct mulF_reg_imm(regF dst, regF src, immF con) %{ 2620 predicate(UseAVX > 0); 2621 match(Set dst (MulF src con)); 2622 2623 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2624 ins_cost(150); 2625 ins_encode %{ 2626 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2627 %} 2628 ins_pipe(pipe_slow); 2629 %} 2630 2631 instruct mulD_reg(regD dst, regD src) %{ 2632 predicate((UseSSE>=2) && (UseAVX == 0)); 2633 match(Set dst (MulD dst src)); 2634 2635 format %{ "mulsd $dst, $src" %} 2636 ins_cost(150); 2637 ins_encode %{ 2638 __ mulsd($dst$$XMMRegister, $src$$XMMRegister); 2639 %} 2640 ins_pipe(pipe_slow); 2641 %} 2642 2643 instruct mulD_mem(regD dst, memory src) %{ 2644 predicate((UseSSE>=2) && (UseAVX == 0)); 2645 match(Set dst (MulD dst (LoadD src))); 2646 2647 format %{ "mulsd $dst, $src" %} 2648 ins_cost(150); 2649 ins_encode %{ 2650 __ mulsd($dst$$XMMRegister, $src$$Address); 2651 %} 2652 ins_pipe(pipe_slow); 2653 %} 2654 2655 instruct mulD_imm(regD dst, immD con) %{ 2656 predicate((UseSSE>=2) && (UseAVX == 0)); 2657 match(Set dst (MulD dst con)); 2658 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2659 ins_cost(150); 2660 ins_encode %{ 2661 __ mulsd($dst$$XMMRegister, $constantaddress($con)); 2662 %} 2663 ins_pipe(pipe_slow); 2664 %} 2665 2666 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{ 2667 predicate(UseAVX > 0); 2668 match(Set dst (MulD src1 src2)); 2669 2670 format %{ "vmulsd $dst, $src1, $src2" %} 2671 ins_cost(150); 2672 ins_encode %{ 2673 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2674 %} 2675 ins_pipe(pipe_slow); 2676 %} 2677 2678 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{ 2679 predicate(UseAVX > 0); 2680 match(Set dst (MulD src1 (LoadD src2))); 2681 2682 format %{ "vmulsd $dst, $src1, $src2" %} 2683 ins_cost(150); 2684 ins_encode %{ 2685 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2686 %} 2687 ins_pipe(pipe_slow); 2688 %} 2689 2690 instruct mulD_reg_imm(regD dst, regD src, immD con) %{ 2691 predicate(UseAVX > 0); 2692 match(Set dst (MulD src con)); 2693 2694 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2695 ins_cost(150); 2696 ins_encode %{ 2697 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2698 %} 2699 ins_pipe(pipe_slow); 2700 %} 2701 2702 instruct divF_reg(regF dst, regF src) %{ 2703 predicate((UseSSE>=1) && (UseAVX == 0)); 2704 match(Set dst (DivF dst src)); 2705 2706 format %{ "divss $dst, $src" %} 2707 ins_cost(150); 2708 ins_encode %{ 2709 __ divss($dst$$XMMRegister, $src$$XMMRegister); 2710 %} 2711 ins_pipe(pipe_slow); 2712 %} 2713 2714 instruct divF_mem(regF dst, memory src) %{ 2715 predicate((UseSSE>=1) && (UseAVX == 0)); 2716 match(Set dst (DivF dst (LoadF src))); 2717 2718 format %{ "divss $dst, $src" %} 2719 ins_cost(150); 2720 ins_encode %{ 2721 __ divss($dst$$XMMRegister, $src$$Address); 2722 %} 2723 ins_pipe(pipe_slow); 2724 %} 2725 2726 instruct divF_imm(regF dst, immF con) %{ 2727 predicate((UseSSE>=1) && (UseAVX == 0)); 2728 match(Set dst (DivF dst con)); 2729 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2730 ins_cost(150); 2731 ins_encode %{ 2732 __ divss($dst$$XMMRegister, $constantaddress($con)); 2733 %} 2734 ins_pipe(pipe_slow); 2735 %} 2736 2737 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{ 2738 predicate(UseAVX > 0); 2739 match(Set dst (DivF src1 src2)); 2740 2741 format %{ "vdivss $dst, $src1, $src2" %} 2742 ins_cost(150); 2743 ins_encode %{ 2744 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2745 %} 2746 ins_pipe(pipe_slow); 2747 %} 2748 2749 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{ 2750 predicate(UseAVX > 0); 2751 match(Set dst (DivF src1 (LoadF src2))); 2752 2753 format %{ "vdivss $dst, $src1, $src2" %} 2754 ins_cost(150); 2755 ins_encode %{ 2756 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2757 %} 2758 ins_pipe(pipe_slow); 2759 %} 2760 2761 instruct divF_reg_imm(regF dst, regF src, immF con) %{ 2762 predicate(UseAVX > 0); 2763 match(Set dst (DivF src con)); 2764 2765 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2766 ins_cost(150); 2767 ins_encode %{ 2768 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2769 %} 2770 ins_pipe(pipe_slow); 2771 %} 2772 2773 instruct divD_reg(regD dst, regD src) %{ 2774 predicate((UseSSE>=2) && (UseAVX == 0)); 2775 match(Set dst (DivD dst src)); 2776 2777 format %{ "divsd $dst, $src" %} 2778 ins_cost(150); 2779 ins_encode %{ 2780 __ divsd($dst$$XMMRegister, $src$$XMMRegister); 2781 %} 2782 ins_pipe(pipe_slow); 2783 %} 2784 2785 instruct divD_mem(regD dst, memory src) %{ 2786 predicate((UseSSE>=2) && (UseAVX == 0)); 2787 match(Set dst (DivD dst (LoadD src))); 2788 2789 format %{ "divsd $dst, $src" %} 2790 ins_cost(150); 2791 ins_encode %{ 2792 __ divsd($dst$$XMMRegister, $src$$Address); 2793 %} 2794 ins_pipe(pipe_slow); 2795 %} 2796 2797 instruct divD_imm(regD dst, immD con) %{ 2798 predicate((UseSSE>=2) && (UseAVX == 0)); 2799 match(Set dst (DivD dst con)); 2800 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2801 ins_cost(150); 2802 ins_encode %{ 2803 __ divsd($dst$$XMMRegister, $constantaddress($con)); 2804 %} 2805 ins_pipe(pipe_slow); 2806 %} 2807 2808 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{ 2809 predicate(UseAVX > 0); 2810 match(Set dst (DivD src1 src2)); 2811 2812 format %{ "vdivsd $dst, $src1, $src2" %} 2813 ins_cost(150); 2814 ins_encode %{ 2815 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2816 %} 2817 ins_pipe(pipe_slow); 2818 %} 2819 2820 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{ 2821 predicate(UseAVX > 0); 2822 match(Set dst (DivD src1 (LoadD src2))); 2823 2824 format %{ "vdivsd $dst, $src1, $src2" %} 2825 ins_cost(150); 2826 ins_encode %{ 2827 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2828 %} 2829 ins_pipe(pipe_slow); 2830 %} 2831 2832 instruct divD_reg_imm(regD dst, regD src, immD con) %{ 2833 predicate(UseAVX > 0); 2834 match(Set dst (DivD src con)); 2835 2836 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2837 ins_cost(150); 2838 ins_encode %{ 2839 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2840 %} 2841 ins_pipe(pipe_slow); 2842 %} 2843 2844 instruct absF_reg(regF dst) %{ 2845 predicate((UseSSE>=1) && (UseAVX == 0)); 2846 match(Set dst (AbsF dst)); 2847 ins_cost(150); 2848 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %} 2849 ins_encode %{ 2850 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask())); 2851 %} 2852 ins_pipe(pipe_slow); 2853 %} 2854 2855 instruct absF_reg_reg(regF dst, regF src) %{ 2856 predicate(VM_Version::supports_avxonly()); 2857 match(Set dst (AbsF src)); 2858 ins_cost(150); 2859 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 2860 ins_encode %{ 2861 int vector_len = 0; 2862 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 2863 ExternalAddress(float_signmask()), vector_len); 2864 %} 2865 ins_pipe(pipe_slow); 2866 %} 2867 2868 #ifdef _LP64 2869 instruct absF_reg_reg_evex(regF dst, regF src) %{ 2870 predicate(UseAVX > 2 && VM_Version::supports_avx512vl()); 2871 match(Set dst (AbsF src)); 2872 ins_cost(150); 2873 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 2874 ins_encode %{ 2875 int vector_len = 0; 2876 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 2877 ExternalAddress(float_signmask()), vector_len); 2878 %} 2879 ins_pipe(pipe_slow); 2880 %} 2881 2882 instruct absF_reg_reg_evex_special(regF dst, regF src1, regF src2) %{ 2883 predicate(VM_Version::supports_avx512novl()); 2884 match(Set dst (AbsF src1)); 2885 effect(TEMP src2); 2886 ins_cost(150); 2887 format %{ "vabsss $dst, $src1, $src2, [0x7fffffff]\t# abs float by sign masking" %} 2888 ins_encode %{ 2889 int vector_len = 0; 2890 __ vabsss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 2891 ExternalAddress(float_signmask()), vector_len); 2892 %} 2893 ins_pipe(pipe_slow); 2894 %} 2895 #else // _LP64 2896 instruct absF_reg_reg_evex(regF dst, regF src) %{ 2897 predicate(UseAVX > 2); 2898 match(Set dst (AbsF src)); 2899 ins_cost(150); 2900 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 2901 ins_encode %{ 2902 int vector_len = 0; 2903 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 2904 ExternalAddress(float_signmask()), vector_len); 2905 %} 2906 ins_pipe(pipe_slow); 2907 %} 2908 #endif 2909 2910 instruct absD_reg(regD dst) %{ 2911 predicate((UseSSE>=2) && (UseAVX == 0)); 2912 match(Set dst (AbsD dst)); 2913 ins_cost(150); 2914 format %{ "andpd $dst, [0x7fffffffffffffff]\t" 2915 "# abs double by sign masking" %} 2916 ins_encode %{ 2917 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask())); 2918 %} 2919 ins_pipe(pipe_slow); 2920 %} 2921 2922 instruct absD_reg_reg(regD dst, regD src) %{ 2923 predicate(VM_Version::supports_avxonly()); 2924 match(Set dst (AbsD src)); 2925 ins_cost(150); 2926 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 2927 "# abs double by sign masking" %} 2928 ins_encode %{ 2929 int vector_len = 0; 2930 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 2931 ExternalAddress(double_signmask()), vector_len); 2932 %} 2933 ins_pipe(pipe_slow); 2934 %} 2935 2936 #ifdef _LP64 2937 instruct absD_reg_reg_evex(regD dst, regD src) %{ 2938 predicate(UseAVX > 2 && VM_Version::supports_avx512vl()); 2939 match(Set dst (AbsD src)); 2940 ins_cost(150); 2941 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 2942 "# abs double by sign masking" %} 2943 ins_encode %{ 2944 int vector_len = 0; 2945 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 2946 ExternalAddress(double_signmask()), vector_len); 2947 %} 2948 ins_pipe(pipe_slow); 2949 %} 2950 2951 instruct absD_reg_reg_evex_special(regD dst, regD src1, regD src2) %{ 2952 predicate(VM_Version::supports_avx512novl()); 2953 match(Set dst (AbsD src1)); 2954 effect(TEMP src2); 2955 ins_cost(150); 2956 format %{ "vabssd $dst, $src1, $src2, [0x7fffffffffffffff]\t# abs float by sign masking" %} 2957 ins_encode %{ 2958 int vector_len = 0; 2959 __ vabssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 2960 ExternalAddress(double_signmask()), vector_len); 2961 %} 2962 ins_pipe(pipe_slow); 2963 %} 2964 #else // _LP64 2965 instruct absD_reg_reg_evex(regD dst, regD src) %{ 2966 predicate(UseAVX > 2); 2967 match(Set dst (AbsD src)); 2968 ins_cost(150); 2969 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 2970 "# abs double by sign masking" %} 2971 ins_encode %{ 2972 int vector_len = 0; 2973 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 2974 ExternalAddress(double_signmask()), vector_len); 2975 %} 2976 ins_pipe(pipe_slow); 2977 %} 2978 #endif 2979 2980 instruct negF_reg(regF dst) %{ 2981 predicate((UseSSE>=1) && (UseAVX == 0)); 2982 match(Set dst (NegF dst)); 2983 ins_cost(150); 2984 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %} 2985 ins_encode %{ 2986 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip())); 2987 %} 2988 ins_pipe(pipe_slow); 2989 %} 2990 2991 instruct negF_reg_reg(regF dst, regF src) %{ 2992 predicate(UseAVX > 0); 2993 match(Set dst (NegF src)); 2994 ins_cost(150); 2995 format %{ "vnegatess $dst, $src, [0x80000000]\t# neg float by sign flipping" %} 2996 ins_encode %{ 2997 __ vnegatess($dst$$XMMRegister, $src$$XMMRegister, 2998 ExternalAddress(float_signflip())); 2999 %} 3000 ins_pipe(pipe_slow); 3001 %} 3002 3003 instruct negD_reg(regD dst) %{ 3004 predicate((UseSSE>=2) && (UseAVX == 0)); 3005 match(Set dst (NegD dst)); 3006 ins_cost(150); 3007 format %{ "xorpd $dst, [0x8000000000000000]\t" 3008 "# neg double by sign flipping" %} 3009 ins_encode %{ 3010 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip())); 3011 %} 3012 ins_pipe(pipe_slow); 3013 %} 3014 3015 instruct negD_reg_reg(regD dst, regD src) %{ 3016 predicate(UseAVX > 0); 3017 match(Set dst (NegD src)); 3018 ins_cost(150); 3019 format %{ "vnegatess $dst, $src, [0x8000000000000000]\t" 3020 "# neg double by sign flipping" %} 3021 ins_encode %{ 3022 __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister, 3023 ExternalAddress(double_signflip())); 3024 %} 3025 ins_pipe(pipe_slow); 3026 %} 3027 3028 instruct sqrtF_reg(regF dst, regF src) %{ 3029 predicate(UseSSE>=1); 3030 match(Set dst (ConvD2F (SqrtD (ConvF2D src)))); 3031 3032 format %{ "sqrtss $dst, $src" %} 3033 ins_cost(150); 3034 ins_encode %{ 3035 __ sqrtss($dst$$XMMRegister, $src$$XMMRegister); 3036 %} 3037 ins_pipe(pipe_slow); 3038 %} 3039 3040 instruct sqrtF_mem(regF dst, memory src) %{ 3041 predicate(UseSSE>=1); 3042 match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF src))))); 3043 3044 format %{ "sqrtss $dst, $src" %} 3045 ins_cost(150); 3046 ins_encode %{ 3047 __ sqrtss($dst$$XMMRegister, $src$$Address); 3048 %} 3049 ins_pipe(pipe_slow); 3050 %} 3051 3052 instruct sqrtF_imm(regF dst, immF con) %{ 3053 predicate(UseSSE>=1); 3054 match(Set dst (ConvD2F (SqrtD (ConvF2D con)))); 3055 format %{ "sqrtss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 3056 ins_cost(150); 3057 ins_encode %{ 3058 __ sqrtss($dst$$XMMRegister, $constantaddress($con)); 3059 %} 3060 ins_pipe(pipe_slow); 3061 %} 3062 3063 instruct sqrtD_reg(regD dst, regD src) %{ 3064 predicate(UseSSE>=2); 3065 match(Set dst (SqrtD src)); 3066 3067 format %{ "sqrtsd $dst, $src" %} 3068 ins_cost(150); 3069 ins_encode %{ 3070 __ sqrtsd($dst$$XMMRegister, $src$$XMMRegister); 3071 %} 3072 ins_pipe(pipe_slow); 3073 %} 3074 3075 instruct sqrtD_mem(regD dst, memory src) %{ 3076 predicate(UseSSE>=2); 3077 match(Set dst (SqrtD (LoadD src))); 3078 3079 format %{ "sqrtsd $dst, $src" %} 3080 ins_cost(150); 3081 ins_encode %{ 3082 __ sqrtsd($dst$$XMMRegister, $src$$Address); 3083 %} 3084 ins_pipe(pipe_slow); 3085 %} 3086 3087 instruct sqrtD_imm(regD dst, immD con) %{ 3088 predicate(UseSSE>=2); 3089 match(Set dst (SqrtD con)); 3090 format %{ "sqrtsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 3091 ins_cost(150); 3092 ins_encode %{ 3093 __ sqrtsd($dst$$XMMRegister, $constantaddress($con)); 3094 %} 3095 ins_pipe(pipe_slow); 3096 %} 3097 3098 instruct onspinwait() %{ 3099 match(OnSpinWait); 3100 ins_cost(200); 3101 3102 format %{ 3103 $$template 3104 if (os::is_MP()) { 3105 $$emit$$"pause\t! membar_onspinwait" 3106 } else { 3107 $$emit$$"MEMBAR-onspinwait ! (empty encoding)" 3108 } 3109 %} 3110 ins_encode %{ 3111 __ pause(); 3112 %} 3113 ins_pipe(pipe_slow); 3114 %} 3115 3116 // ====================VECTOR INSTRUCTIONS===================================== 3117 3118 // Load vectors (4 bytes long) 3119 instruct loadV4(vecS dst, memory mem) %{ 3120 predicate(n->as_LoadVector()->memory_size() == 4); 3121 match(Set dst (LoadVector mem)); 3122 ins_cost(125); 3123 format %{ "movd $dst,$mem\t! load vector (4 bytes)" %} 3124 ins_encode %{ 3125 __ movdl($dst$$XMMRegister, $mem$$Address); 3126 %} 3127 ins_pipe( pipe_slow ); 3128 %} 3129 3130 // Load vectors (8 bytes long) 3131 instruct loadV8(vecD dst, memory mem) %{ 3132 predicate(n->as_LoadVector()->memory_size() == 8); 3133 match(Set dst (LoadVector mem)); 3134 ins_cost(125); 3135 format %{ "movq $dst,$mem\t! load vector (8 bytes)" %} 3136 ins_encode %{ 3137 __ movq($dst$$XMMRegister, $mem$$Address); 3138 %} 3139 ins_pipe( pipe_slow ); 3140 %} 3141 3142 // Load vectors (16 bytes long) 3143 instruct loadV16(vecX dst, memory mem) %{ 3144 predicate(n->as_LoadVector()->memory_size() == 16); 3145 match(Set dst (LoadVector mem)); 3146 ins_cost(125); 3147 format %{ "movdqu $dst,$mem\t! load vector (16 bytes)" %} 3148 ins_encode %{ 3149 __ movdqu($dst$$XMMRegister, $mem$$Address); 3150 %} 3151 ins_pipe( pipe_slow ); 3152 %} 3153 3154 // Load vectors (32 bytes long) 3155 instruct loadV32(vecY dst, memory mem) %{ 3156 predicate(n->as_LoadVector()->memory_size() == 32); 3157 match(Set dst (LoadVector mem)); 3158 ins_cost(125); 3159 format %{ "vmovdqu $dst,$mem\t! load vector (32 bytes)" %} 3160 ins_encode %{ 3161 __ vmovdqu($dst$$XMMRegister, $mem$$Address); 3162 %} 3163 ins_pipe( pipe_slow ); 3164 %} 3165 3166 // Load vectors (64 bytes long) 3167 instruct loadV64_dword(vecZ dst, memory mem) %{ 3168 predicate(n->as_LoadVector()->memory_size() == 64 && n->as_LoadVector()->element_size() <= 4); 3169 match(Set dst (LoadVector mem)); 3170 ins_cost(125); 3171 format %{ "vmovdqul $dst k0,$mem\t! load vector (64 bytes)" %} 3172 ins_encode %{ 3173 int vector_len = 2; 3174 __ evmovdqul($dst$$XMMRegister, $mem$$Address, vector_len); 3175 %} 3176 ins_pipe( pipe_slow ); 3177 %} 3178 3179 // Load vectors (64 bytes long) 3180 instruct loadV64_qword(vecZ dst, memory mem) %{ 3181 predicate(n->as_LoadVector()->memory_size() == 64 && n->as_LoadVector()->element_size() > 4); 3182 match(Set dst (LoadVector mem)); 3183 ins_cost(125); 3184 format %{ "vmovdquq $dst k0,$mem\t! load vector (64 bytes)" %} 3185 ins_encode %{ 3186 int vector_len = 2; 3187 __ evmovdquq($dst$$XMMRegister, $mem$$Address, vector_len); 3188 %} 3189 ins_pipe( pipe_slow ); 3190 %} 3191 3192 // Store vectors 3193 instruct storeV4(memory mem, vecS src) %{ 3194 predicate(n->as_StoreVector()->memory_size() == 4); 3195 match(Set mem (StoreVector mem src)); 3196 ins_cost(145); 3197 format %{ "movd $mem,$src\t! store vector (4 bytes)" %} 3198 ins_encode %{ 3199 __ movdl($mem$$Address, $src$$XMMRegister); 3200 %} 3201 ins_pipe( pipe_slow ); 3202 %} 3203 3204 instruct storeV8(memory mem, vecD src) %{ 3205 predicate(n->as_StoreVector()->memory_size() == 8); 3206 match(Set mem (StoreVector mem src)); 3207 ins_cost(145); 3208 format %{ "movq $mem,$src\t! store vector (8 bytes)" %} 3209 ins_encode %{ 3210 __ movq($mem$$Address, $src$$XMMRegister); 3211 %} 3212 ins_pipe( pipe_slow ); 3213 %} 3214 3215 instruct storeV16(memory mem, vecX src) %{ 3216 predicate(n->as_StoreVector()->memory_size() == 16); 3217 match(Set mem (StoreVector mem src)); 3218 ins_cost(145); 3219 format %{ "movdqu $mem,$src\t! store vector (16 bytes)" %} 3220 ins_encode %{ 3221 __ movdqu($mem$$Address, $src$$XMMRegister); 3222 %} 3223 ins_pipe( pipe_slow ); 3224 %} 3225 3226 instruct storeV32(memory mem, vecY src) %{ 3227 predicate(n->as_StoreVector()->memory_size() == 32); 3228 match(Set mem (StoreVector mem src)); 3229 ins_cost(145); 3230 format %{ "vmovdqu $mem,$src\t! store vector (32 bytes)" %} 3231 ins_encode %{ 3232 __ vmovdqu($mem$$Address, $src$$XMMRegister); 3233 %} 3234 ins_pipe( pipe_slow ); 3235 %} 3236 3237 instruct storeV64_dword(memory mem, vecZ src) %{ 3238 predicate(n->as_StoreVector()->memory_size() == 64 && n->as_StoreVector()->element_size() <= 4); 3239 match(Set mem (StoreVector mem src)); 3240 ins_cost(145); 3241 format %{ "vmovdqul $mem k0,$src\t! store vector (64 bytes)" %} 3242 ins_encode %{ 3243 int vector_len = 2; 3244 __ evmovdqul($mem$$Address, $src$$XMMRegister, vector_len); 3245 %} 3246 ins_pipe( pipe_slow ); 3247 %} 3248 3249 instruct storeV64_qword(memory mem, vecZ src) %{ 3250 predicate(n->as_StoreVector()->memory_size() == 64 && n->as_StoreVector()->element_size() > 4); 3251 match(Set mem (StoreVector mem src)); 3252 ins_cost(145); 3253 format %{ "vmovdquq $mem k0,$src\t! store vector (64 bytes)" %} 3254 ins_encode %{ 3255 int vector_len = 2; 3256 __ evmovdquq($mem$$Address, $src$$XMMRegister, vector_len); 3257 %} 3258 ins_pipe( pipe_slow ); 3259 %} 3260 3261 // ====================LEGACY REPLICATE======================================= 3262 3263 instruct Repl4B_mem(vecS dst, memory mem) %{ 3264 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3265 match(Set dst (ReplicateB (LoadB mem))); 3266 format %{ "punpcklbw $dst,$mem\n\t" 3267 "pshuflw $dst,$dst,0x00\t! replicate4B" %} 3268 ins_encode %{ 3269 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 3270 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3271 %} 3272 ins_pipe( pipe_slow ); 3273 %} 3274 3275 instruct Repl8B_mem(vecD dst, memory mem) %{ 3276 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3277 match(Set dst (ReplicateB (LoadB mem))); 3278 format %{ "punpcklbw $dst,$mem\n\t" 3279 "pshuflw $dst,$dst,0x00\t! replicate8B" %} 3280 ins_encode %{ 3281 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 3282 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3283 %} 3284 ins_pipe( pipe_slow ); 3285 %} 3286 3287 instruct Repl16B(vecX dst, rRegI src) %{ 3288 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3289 match(Set dst (ReplicateB src)); 3290 format %{ "movd $dst,$src\n\t" 3291 "punpcklbw $dst,$dst\n\t" 3292 "pshuflw $dst,$dst,0x00\n\t" 3293 "punpcklqdq $dst,$dst\t! replicate16B" %} 3294 ins_encode %{ 3295 __ movdl($dst$$XMMRegister, $src$$Register); 3296 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3297 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3298 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3299 %} 3300 ins_pipe( pipe_slow ); 3301 %} 3302 3303 instruct Repl16B_mem(vecX dst, memory mem) %{ 3304 predicate(n->as_Vector()->length() == 16 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3305 match(Set dst (ReplicateB (LoadB mem))); 3306 format %{ "punpcklbw $dst,$mem\n\t" 3307 "pshuflw $dst,$dst,0x00\n\t" 3308 "punpcklqdq $dst,$dst\t! replicate16B" %} 3309 ins_encode %{ 3310 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 3311 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3312 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3313 %} 3314 ins_pipe( pipe_slow ); 3315 %} 3316 3317 instruct Repl32B(vecY dst, rRegI src) %{ 3318 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3319 match(Set dst (ReplicateB src)); 3320 format %{ "movd $dst,$src\n\t" 3321 "punpcklbw $dst,$dst\n\t" 3322 "pshuflw $dst,$dst,0x00\n\t" 3323 "punpcklqdq $dst,$dst\n\t" 3324 "vinserti128_high $dst,$dst\t! replicate32B" %} 3325 ins_encode %{ 3326 __ movdl($dst$$XMMRegister, $src$$Register); 3327 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3328 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3329 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3330 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3331 %} 3332 ins_pipe( pipe_slow ); 3333 %} 3334 3335 instruct Repl32B_mem(vecY dst, memory mem) %{ 3336 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3337 match(Set dst (ReplicateB (LoadB mem))); 3338 format %{ "punpcklbw $dst,$mem\n\t" 3339 "pshuflw $dst,$dst,0x00\n\t" 3340 "punpcklqdq $dst,$dst\n\t" 3341 "vinserti128_high $dst,$dst\t! replicate32B" %} 3342 ins_encode %{ 3343 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 3344 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3345 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3346 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3347 %} 3348 ins_pipe( pipe_slow ); 3349 %} 3350 3351 instruct Repl16B_imm(vecX dst, immI con) %{ 3352 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3353 match(Set dst (ReplicateB con)); 3354 format %{ "movq $dst,[$constantaddress]\n\t" 3355 "punpcklqdq $dst,$dst\t! replicate16B($con)" %} 3356 ins_encode %{ 3357 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3358 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3359 %} 3360 ins_pipe( pipe_slow ); 3361 %} 3362 3363 instruct Repl32B_imm(vecY dst, immI con) %{ 3364 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3365 match(Set dst (ReplicateB con)); 3366 format %{ "movq $dst,[$constantaddress]\n\t" 3367 "punpcklqdq $dst,$dst\n\t" 3368 "vinserti128_high $dst,$dst\t! lreplicate32B($con)" %} 3369 ins_encode %{ 3370 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3371 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3372 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3373 %} 3374 ins_pipe( pipe_slow ); 3375 %} 3376 3377 instruct Repl4S(vecD dst, rRegI src) %{ 3378 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vlbw()); 3379 match(Set dst (ReplicateS src)); 3380 format %{ "movd $dst,$src\n\t" 3381 "pshuflw $dst,$dst,0x00\t! replicate4S" %} 3382 ins_encode %{ 3383 __ movdl($dst$$XMMRegister, $src$$Register); 3384 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3385 %} 3386 ins_pipe( pipe_slow ); 3387 %} 3388 3389 instruct Repl4S_mem(vecD dst, memory mem) %{ 3390 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3391 match(Set dst (ReplicateS (LoadS mem))); 3392 format %{ "pshuflw $dst,$mem,0x00\t! replicate4S" %} 3393 ins_encode %{ 3394 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3395 %} 3396 ins_pipe( pipe_slow ); 3397 %} 3398 3399 instruct Repl8S(vecX dst, rRegI src) %{ 3400 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vlbw()); 3401 match(Set dst (ReplicateS src)); 3402 format %{ "movd $dst,$src\n\t" 3403 "pshuflw $dst,$dst,0x00\n\t" 3404 "punpcklqdq $dst,$dst\t! replicate8S" %} 3405 ins_encode %{ 3406 __ movdl($dst$$XMMRegister, $src$$Register); 3407 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3408 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3409 %} 3410 ins_pipe( pipe_slow ); 3411 %} 3412 3413 instruct Repl8S_mem(vecX dst, memory mem) %{ 3414 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3415 match(Set dst (ReplicateS (LoadS mem))); 3416 format %{ "pshuflw $dst,$mem,0x00\n\t" 3417 "punpcklqdq $dst,$dst\t! replicate8S" %} 3418 ins_encode %{ 3419 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3420 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3421 %} 3422 ins_pipe( pipe_slow ); 3423 %} 3424 3425 instruct Repl8S_imm(vecX dst, immI con) %{ 3426 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vlbw()); 3427 match(Set dst (ReplicateS con)); 3428 format %{ "movq $dst,[$constantaddress]\n\t" 3429 "punpcklqdq $dst,$dst\t! replicate8S($con)" %} 3430 ins_encode %{ 3431 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3432 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3433 %} 3434 ins_pipe( pipe_slow ); 3435 %} 3436 3437 instruct Repl16S(vecY dst, rRegI src) %{ 3438 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3439 match(Set dst (ReplicateS src)); 3440 format %{ "movd $dst,$src\n\t" 3441 "pshuflw $dst,$dst,0x00\n\t" 3442 "punpcklqdq $dst,$dst\n\t" 3443 "vinserti128_high $dst,$dst\t! replicate16S" %} 3444 ins_encode %{ 3445 __ movdl($dst$$XMMRegister, $src$$Register); 3446 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3447 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3448 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3449 %} 3450 ins_pipe( pipe_slow ); 3451 %} 3452 3453 instruct Repl16S_mem(vecY dst, memory mem) %{ 3454 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3455 match(Set dst (ReplicateS (LoadS mem))); 3456 format %{ "pshuflw $dst,$mem,0x00\n\t" 3457 "punpcklqdq $dst,$dst\n\t" 3458 "vinserti128_high $dst,$dst\t! replicate16S" %} 3459 ins_encode %{ 3460 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3461 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3462 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3463 %} 3464 ins_pipe( pipe_slow ); 3465 %} 3466 3467 instruct Repl16S_imm(vecY dst, immI con) %{ 3468 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3469 match(Set dst (ReplicateS con)); 3470 format %{ "movq $dst,[$constantaddress]\n\t" 3471 "punpcklqdq $dst,$dst\n\t" 3472 "vinserti128_high $dst,$dst\t! replicate16S($con)" %} 3473 ins_encode %{ 3474 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3475 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3476 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3477 %} 3478 ins_pipe( pipe_slow ); 3479 %} 3480 3481 instruct Repl4I(vecX dst, rRegI src) %{ 3482 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3483 match(Set dst (ReplicateI src)); 3484 format %{ "movd $dst,$src\n\t" 3485 "pshufd $dst,$dst,0x00\t! replicate4I" %} 3486 ins_encode %{ 3487 __ movdl($dst$$XMMRegister, $src$$Register); 3488 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3489 %} 3490 ins_pipe( pipe_slow ); 3491 %} 3492 3493 instruct Repl4I_mem(vecX dst, memory mem) %{ 3494 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3495 match(Set dst (ReplicateI (LoadI mem))); 3496 format %{ "pshufd $dst,$mem,0x00\t! replicate4I" %} 3497 ins_encode %{ 3498 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3499 %} 3500 ins_pipe( pipe_slow ); 3501 %} 3502 3503 instruct Repl8I(vecY dst, rRegI src) %{ 3504 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3505 match(Set dst (ReplicateI src)); 3506 format %{ "movd $dst,$src\n\t" 3507 "pshufd $dst,$dst,0x00\n\t" 3508 "vinserti128_high $dst,$dst\t! replicate8I" %} 3509 ins_encode %{ 3510 __ movdl($dst$$XMMRegister, $src$$Register); 3511 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3512 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3513 %} 3514 ins_pipe( pipe_slow ); 3515 %} 3516 3517 instruct Repl8I_mem(vecY dst, memory mem) %{ 3518 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3519 match(Set dst (ReplicateI (LoadI mem))); 3520 format %{ "pshufd $dst,$mem,0x00\n\t" 3521 "vinserti128_high $dst,$dst\t! replicate8I" %} 3522 ins_encode %{ 3523 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3524 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3525 %} 3526 ins_pipe( pipe_slow ); 3527 %} 3528 3529 instruct Repl4I_imm(vecX dst, immI con) %{ 3530 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3531 match(Set dst (ReplicateI con)); 3532 format %{ "movq $dst,[$constantaddress]\t! replicate4I($con)\n\t" 3533 "punpcklqdq $dst,$dst" %} 3534 ins_encode %{ 3535 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3536 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3537 %} 3538 ins_pipe( pipe_slow ); 3539 %} 3540 3541 instruct Repl8I_imm(vecY dst, immI con) %{ 3542 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3543 match(Set dst (ReplicateI con)); 3544 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 3545 "punpcklqdq $dst,$dst\n\t" 3546 "vinserti128_high $dst,$dst" %} 3547 ins_encode %{ 3548 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3549 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3550 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3551 %} 3552 ins_pipe( pipe_slow ); 3553 %} 3554 3555 // Long could be loaded into xmm register directly from memory. 3556 instruct Repl2L_mem(vecX dst, memory mem) %{ 3557 predicate(n->as_Vector()->length() == 2 && !VM_Version::supports_avx512vlbw()); 3558 match(Set dst (ReplicateL (LoadL mem))); 3559 format %{ "movq $dst,$mem\n\t" 3560 "punpcklqdq $dst,$dst\t! replicate2L" %} 3561 ins_encode %{ 3562 __ movq($dst$$XMMRegister, $mem$$Address); 3563 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3564 %} 3565 ins_pipe( pipe_slow ); 3566 %} 3567 3568 // Replicate long (8 byte) scalar to be vector 3569 #ifdef _LP64 3570 instruct Repl4L(vecY dst, rRegL src) %{ 3571 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3572 match(Set dst (ReplicateL src)); 3573 format %{ "movdq $dst,$src\n\t" 3574 "punpcklqdq $dst,$dst\n\t" 3575 "vinserti128_high $dst,$dst\t! replicate4L" %} 3576 ins_encode %{ 3577 __ movdq($dst$$XMMRegister, $src$$Register); 3578 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3579 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3580 %} 3581 ins_pipe( pipe_slow ); 3582 %} 3583 #else // _LP64 3584 instruct Repl4L(vecY dst, eRegL src, regD tmp) %{ 3585 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3586 match(Set dst (ReplicateL src)); 3587 effect(TEMP dst, USE src, TEMP tmp); 3588 format %{ "movdl $dst,$src.lo\n\t" 3589 "movdl $tmp,$src.hi\n\t" 3590 "punpckldq $dst,$tmp\n\t" 3591 "punpcklqdq $dst,$dst\n\t" 3592 "vinserti128_high $dst,$dst\t! replicate4L" %} 3593 ins_encode %{ 3594 __ movdl($dst$$XMMRegister, $src$$Register); 3595 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3596 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3597 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3598 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3599 %} 3600 ins_pipe( pipe_slow ); 3601 %} 3602 #endif // _LP64 3603 3604 instruct Repl4L_imm(vecY dst, immL con) %{ 3605 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3606 match(Set dst (ReplicateL con)); 3607 format %{ "movq $dst,[$constantaddress]\n\t" 3608 "punpcklqdq $dst,$dst\n\t" 3609 "vinserti128_high $dst,$dst\t! replicate4L($con)" %} 3610 ins_encode %{ 3611 __ movq($dst$$XMMRegister, $constantaddress($con)); 3612 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3613 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3614 %} 3615 ins_pipe( pipe_slow ); 3616 %} 3617 3618 instruct Repl4L_mem(vecY dst, memory mem) %{ 3619 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3620 match(Set dst (ReplicateL (LoadL mem))); 3621 format %{ "movq $dst,$mem\n\t" 3622 "punpcklqdq $dst,$dst\n\t" 3623 "vinserti128_high $dst,$dst\t! replicate4L" %} 3624 ins_encode %{ 3625 __ movq($dst$$XMMRegister, $mem$$Address); 3626 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3627 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3628 %} 3629 ins_pipe( pipe_slow ); 3630 %} 3631 3632 instruct Repl2F_mem(vecD dst, memory mem) %{ 3633 predicate(n->as_Vector()->length() == 2 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3634 match(Set dst (ReplicateF (LoadF mem))); 3635 format %{ "pshufd $dst,$mem,0x00\t! replicate2F" %} 3636 ins_encode %{ 3637 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3638 %} 3639 ins_pipe( pipe_slow ); 3640 %} 3641 3642 instruct Repl4F_mem(vecX dst, memory mem) %{ 3643 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3644 match(Set dst (ReplicateF (LoadF mem))); 3645 format %{ "pshufd $dst,$mem,0x00\t! replicate4F" %} 3646 ins_encode %{ 3647 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3648 %} 3649 ins_pipe( pipe_slow ); 3650 %} 3651 3652 instruct Repl8F(vecY dst, regF src) %{ 3653 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3654 match(Set dst (ReplicateF src)); 3655 format %{ "pshufd $dst,$src,0x00\n\t" 3656 "vinsertf128_high $dst,$dst\t! replicate8F" %} 3657 ins_encode %{ 3658 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3659 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3660 %} 3661 ins_pipe( pipe_slow ); 3662 %} 3663 3664 instruct Repl8F_mem(vecY dst, memory mem) %{ 3665 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3666 match(Set dst (ReplicateF (LoadF mem))); 3667 format %{ "pshufd $dst,$mem,0x00\n\t" 3668 "vinsertf128_high $dst,$dst\t! replicate8F" %} 3669 ins_encode %{ 3670 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3671 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3672 %} 3673 ins_pipe( pipe_slow ); 3674 %} 3675 3676 instruct Repl2F_zero(vecD dst, immF0 zero) %{ 3677 predicate(n->as_Vector()->length() == 2 && UseAVX < 3); 3678 match(Set dst (ReplicateF zero)); 3679 format %{ "xorps $dst,$dst\t! replicate2F zero" %} 3680 ins_encode %{ 3681 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 3682 %} 3683 ins_pipe( fpu_reg_reg ); 3684 %} 3685 3686 instruct Repl4F_zero(vecX dst, immF0 zero) %{ 3687 predicate(n->as_Vector()->length() == 4 && UseAVX < 3); 3688 match(Set dst (ReplicateF zero)); 3689 format %{ "xorps $dst,$dst\t! replicate4F zero" %} 3690 ins_encode %{ 3691 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 3692 %} 3693 ins_pipe( fpu_reg_reg ); 3694 %} 3695 3696 instruct Repl8F_zero(vecY dst, immF0 zero) %{ 3697 predicate(n->as_Vector()->length() == 8 && UseAVX < 3); 3698 match(Set dst (ReplicateF zero)); 3699 format %{ "vxorps $dst,$dst,$dst\t! replicate8F zero" %} 3700 ins_encode %{ 3701 int vector_len = 1; 3702 __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3703 %} 3704 ins_pipe( fpu_reg_reg ); 3705 %} 3706 3707 instruct Repl2D_mem(vecX dst, memory mem) %{ 3708 predicate(n->as_Vector()->length() == 2 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3709 match(Set dst (ReplicateD (LoadD mem))); 3710 format %{ "pshufd $dst,$mem,0x44\t! replicate2D" %} 3711 ins_encode %{ 3712 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44); 3713 %} 3714 ins_pipe( pipe_slow ); 3715 %} 3716 3717 instruct Repl4D(vecY dst, regD src) %{ 3718 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3719 match(Set dst (ReplicateD src)); 3720 format %{ "pshufd $dst,$src,0x44\n\t" 3721 "vinsertf128_high $dst,$dst\t! replicate4D" %} 3722 ins_encode %{ 3723 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 3724 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3725 %} 3726 ins_pipe( pipe_slow ); 3727 %} 3728 3729 instruct Repl4D_mem(vecY dst, memory mem) %{ 3730 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3731 match(Set dst (ReplicateD (LoadD mem))); 3732 format %{ "pshufd $dst,$mem,0x44\n\t" 3733 "vinsertf128_high $dst,$dst\t! replicate4D" %} 3734 ins_encode %{ 3735 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44); 3736 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3737 %} 3738 ins_pipe( pipe_slow ); 3739 %} 3740 3741 // Replicate double (8 byte) scalar zero to be vector 3742 instruct Repl2D_zero(vecX dst, immD0 zero) %{ 3743 predicate(n->as_Vector()->length() == 2 && UseAVX < 3); 3744 match(Set dst (ReplicateD zero)); 3745 format %{ "xorpd $dst,$dst\t! replicate2D zero" %} 3746 ins_encode %{ 3747 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister); 3748 %} 3749 ins_pipe( fpu_reg_reg ); 3750 %} 3751 3752 instruct Repl4D_zero(vecY dst, immD0 zero) %{ 3753 predicate(n->as_Vector()->length() == 4 && UseAVX < 3); 3754 match(Set dst (ReplicateD zero)); 3755 format %{ "vxorpd $dst,$dst,$dst,vect256\t! replicate4D zero" %} 3756 ins_encode %{ 3757 int vector_len = 1; 3758 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3759 %} 3760 ins_pipe( fpu_reg_reg ); 3761 %} 3762 3763 // ====================GENERIC REPLICATE========================================== 3764 3765 // Replicate byte scalar to be vector 3766 instruct Repl4B(vecS dst, rRegI src) %{ 3767 predicate(n->as_Vector()->length() == 4); 3768 match(Set dst (ReplicateB src)); 3769 format %{ "movd $dst,$src\n\t" 3770 "punpcklbw $dst,$dst\n\t" 3771 "pshuflw $dst,$dst,0x00\t! replicate4B" %} 3772 ins_encode %{ 3773 __ movdl($dst$$XMMRegister, $src$$Register); 3774 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3775 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3776 %} 3777 ins_pipe( pipe_slow ); 3778 %} 3779 3780 instruct Repl8B(vecD dst, rRegI src) %{ 3781 predicate(n->as_Vector()->length() == 8); 3782 match(Set dst (ReplicateB src)); 3783 format %{ "movd $dst,$src\n\t" 3784 "punpcklbw $dst,$dst\n\t" 3785 "pshuflw $dst,$dst,0x00\t! replicate8B" %} 3786 ins_encode %{ 3787 __ movdl($dst$$XMMRegister, $src$$Register); 3788 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3789 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3790 %} 3791 ins_pipe( pipe_slow ); 3792 %} 3793 3794 // Replicate byte scalar immediate to be vector by loading from const table. 3795 instruct Repl4B_imm(vecS dst, immI con) %{ 3796 predicate(n->as_Vector()->length() == 4); 3797 match(Set dst (ReplicateB con)); 3798 format %{ "movdl $dst,[$constantaddress]\t! replicate4B($con)" %} 3799 ins_encode %{ 3800 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 1))); 3801 %} 3802 ins_pipe( pipe_slow ); 3803 %} 3804 3805 instruct Repl8B_imm(vecD dst, immI con) %{ 3806 predicate(n->as_Vector()->length() == 8); 3807 match(Set dst (ReplicateB con)); 3808 format %{ "movq $dst,[$constantaddress]\t! replicate8B($con)" %} 3809 ins_encode %{ 3810 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3811 %} 3812 ins_pipe( pipe_slow ); 3813 %} 3814 3815 // Replicate byte scalar zero to be vector 3816 instruct Repl4B_zero(vecS dst, immI0 zero) %{ 3817 predicate(n->as_Vector()->length() == 4); 3818 match(Set dst (ReplicateB zero)); 3819 format %{ "pxor $dst,$dst\t! replicate4B zero" %} 3820 ins_encode %{ 3821 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3822 %} 3823 ins_pipe( fpu_reg_reg ); 3824 %} 3825 3826 instruct Repl8B_zero(vecD dst, immI0 zero) %{ 3827 predicate(n->as_Vector()->length() == 8); 3828 match(Set dst (ReplicateB zero)); 3829 format %{ "pxor $dst,$dst\t! replicate8B zero" %} 3830 ins_encode %{ 3831 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3832 %} 3833 ins_pipe( fpu_reg_reg ); 3834 %} 3835 3836 instruct Repl16B_zero(vecX dst, immI0 zero) %{ 3837 predicate(n->as_Vector()->length() == 16); 3838 match(Set dst (ReplicateB zero)); 3839 format %{ "pxor $dst,$dst\t! replicate16B zero" %} 3840 ins_encode %{ 3841 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3842 %} 3843 ins_pipe( fpu_reg_reg ); 3844 %} 3845 3846 instruct Repl32B_zero(vecY dst, immI0 zero) %{ 3847 predicate(n->as_Vector()->length() == 32); 3848 match(Set dst (ReplicateB zero)); 3849 format %{ "vpxor $dst,$dst,$dst\t! replicate32B zero" %} 3850 ins_encode %{ 3851 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3852 int vector_len = 1; 3853 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3854 %} 3855 ins_pipe( fpu_reg_reg ); 3856 %} 3857 3858 // Replicate char/short (2 byte) scalar to be vector 3859 instruct Repl2S(vecS dst, rRegI src) %{ 3860 predicate(n->as_Vector()->length() == 2); 3861 match(Set dst (ReplicateS src)); 3862 format %{ "movd $dst,$src\n\t" 3863 "pshuflw $dst,$dst,0x00\t! replicate2S" %} 3864 ins_encode %{ 3865 __ movdl($dst$$XMMRegister, $src$$Register); 3866 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3867 %} 3868 ins_pipe( fpu_reg_reg ); 3869 %} 3870 3871 // Replicate char/short (2 byte) scalar immediate to be vector by loading from const table. 3872 instruct Repl2S_imm(vecS dst, immI con) %{ 3873 predicate(n->as_Vector()->length() == 2); 3874 match(Set dst (ReplicateS con)); 3875 format %{ "movdl $dst,[$constantaddress]\t! replicate2S($con)" %} 3876 ins_encode %{ 3877 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 2))); 3878 %} 3879 ins_pipe( fpu_reg_reg ); 3880 %} 3881 3882 instruct Repl4S_imm(vecD dst, immI con) %{ 3883 predicate(n->as_Vector()->length() == 4); 3884 match(Set dst (ReplicateS con)); 3885 format %{ "movq $dst,[$constantaddress]\t! replicate4S($con)" %} 3886 ins_encode %{ 3887 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3888 %} 3889 ins_pipe( fpu_reg_reg ); 3890 %} 3891 3892 // Replicate char/short (2 byte) scalar zero to be vector 3893 instruct Repl2S_zero(vecS dst, immI0 zero) %{ 3894 predicate(n->as_Vector()->length() == 2); 3895 match(Set dst (ReplicateS zero)); 3896 format %{ "pxor $dst,$dst\t! replicate2S zero" %} 3897 ins_encode %{ 3898 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3899 %} 3900 ins_pipe( fpu_reg_reg ); 3901 %} 3902 3903 instruct Repl4S_zero(vecD dst, immI0 zero) %{ 3904 predicate(n->as_Vector()->length() == 4); 3905 match(Set dst (ReplicateS zero)); 3906 format %{ "pxor $dst,$dst\t! replicate4S zero" %} 3907 ins_encode %{ 3908 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3909 %} 3910 ins_pipe( fpu_reg_reg ); 3911 %} 3912 3913 instruct Repl8S_zero(vecX dst, immI0 zero) %{ 3914 predicate(n->as_Vector()->length() == 8); 3915 match(Set dst (ReplicateS zero)); 3916 format %{ "pxor $dst,$dst\t! replicate8S zero" %} 3917 ins_encode %{ 3918 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3919 %} 3920 ins_pipe( fpu_reg_reg ); 3921 %} 3922 3923 instruct Repl16S_zero(vecY dst, immI0 zero) %{ 3924 predicate(n->as_Vector()->length() == 16); 3925 match(Set dst (ReplicateS zero)); 3926 format %{ "vpxor $dst,$dst,$dst\t! replicate16S zero" %} 3927 ins_encode %{ 3928 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3929 int vector_len = 1; 3930 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3931 %} 3932 ins_pipe( fpu_reg_reg ); 3933 %} 3934 3935 // Replicate integer (4 byte) scalar to be vector 3936 instruct Repl2I(vecD dst, rRegI src) %{ 3937 predicate(n->as_Vector()->length() == 2); 3938 match(Set dst (ReplicateI src)); 3939 format %{ "movd $dst,$src\n\t" 3940 "pshufd $dst,$dst,0x00\t! replicate2I" %} 3941 ins_encode %{ 3942 __ movdl($dst$$XMMRegister, $src$$Register); 3943 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3944 %} 3945 ins_pipe( fpu_reg_reg ); 3946 %} 3947 3948 // Integer could be loaded into xmm register directly from memory. 3949 instruct Repl2I_mem(vecD dst, memory mem) %{ 3950 predicate(n->as_Vector()->length() == 2); 3951 match(Set dst (ReplicateI (LoadI mem))); 3952 format %{ "movd $dst,$mem\n\t" 3953 "pshufd $dst,$dst,0x00\t! replicate2I" %} 3954 ins_encode %{ 3955 __ movdl($dst$$XMMRegister, $mem$$Address); 3956 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3957 %} 3958 ins_pipe( fpu_reg_reg ); 3959 %} 3960 3961 // Replicate integer (4 byte) scalar immediate to be vector by loading from const table. 3962 instruct Repl2I_imm(vecD dst, immI con) %{ 3963 predicate(n->as_Vector()->length() == 2); 3964 match(Set dst (ReplicateI con)); 3965 format %{ "movq $dst,[$constantaddress]\t! replicate2I($con)" %} 3966 ins_encode %{ 3967 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3968 %} 3969 ins_pipe( fpu_reg_reg ); 3970 %} 3971 3972 // Replicate integer (4 byte) scalar zero to be vector 3973 instruct Repl2I_zero(vecD dst, immI0 zero) %{ 3974 predicate(n->as_Vector()->length() == 2); 3975 match(Set dst (ReplicateI zero)); 3976 format %{ "pxor $dst,$dst\t! replicate2I" %} 3977 ins_encode %{ 3978 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3979 %} 3980 ins_pipe( fpu_reg_reg ); 3981 %} 3982 3983 instruct Repl4I_zero(vecX dst, immI0 zero) %{ 3984 predicate(n->as_Vector()->length() == 4); 3985 match(Set dst (ReplicateI zero)); 3986 format %{ "pxor $dst,$dst\t! replicate4I zero)" %} 3987 ins_encode %{ 3988 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3989 %} 3990 ins_pipe( fpu_reg_reg ); 3991 %} 3992 3993 instruct Repl8I_zero(vecY dst, immI0 zero) %{ 3994 predicate(n->as_Vector()->length() == 8); 3995 match(Set dst (ReplicateI zero)); 3996 format %{ "vpxor $dst,$dst,$dst\t! replicate8I zero" %} 3997 ins_encode %{ 3998 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3999 int vector_len = 1; 4000 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4001 %} 4002 ins_pipe( fpu_reg_reg ); 4003 %} 4004 4005 // Replicate long (8 byte) scalar to be vector 4006 #ifdef _LP64 4007 instruct Repl2L(vecX dst, rRegL src) %{ 4008 predicate(n->as_Vector()->length() == 2); 4009 match(Set dst (ReplicateL src)); 4010 format %{ "movdq $dst,$src\n\t" 4011 "punpcklqdq $dst,$dst\t! replicate2L" %} 4012 ins_encode %{ 4013 __ movdq($dst$$XMMRegister, $src$$Register); 4014 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4015 %} 4016 ins_pipe( pipe_slow ); 4017 %} 4018 #else // _LP64 4019 instruct Repl2L(vecX dst, eRegL src, regD tmp) %{ 4020 predicate(n->as_Vector()->length() == 2); 4021 match(Set dst (ReplicateL src)); 4022 effect(TEMP dst, USE src, TEMP tmp); 4023 format %{ "movdl $dst,$src.lo\n\t" 4024 "movdl $tmp,$src.hi\n\t" 4025 "punpckldq $dst,$tmp\n\t" 4026 "punpcklqdq $dst,$dst\t! replicate2L"%} 4027 ins_encode %{ 4028 __ movdl($dst$$XMMRegister, $src$$Register); 4029 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4030 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4031 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4032 %} 4033 ins_pipe( pipe_slow ); 4034 %} 4035 #endif // _LP64 4036 4037 // Replicate long (8 byte) scalar immediate to be vector by loading from const table. 4038 instruct Repl2L_imm(vecX dst, immL con) %{ 4039 predicate(n->as_Vector()->length() == 2); 4040 match(Set dst (ReplicateL con)); 4041 format %{ "movq $dst,[$constantaddress]\n\t" 4042 "punpcklqdq $dst,$dst\t! replicate2L($con)" %} 4043 ins_encode %{ 4044 __ movq($dst$$XMMRegister, $constantaddress($con)); 4045 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4046 %} 4047 ins_pipe( pipe_slow ); 4048 %} 4049 4050 // Replicate long (8 byte) scalar zero to be vector 4051 instruct Repl2L_zero(vecX dst, immL0 zero) %{ 4052 predicate(n->as_Vector()->length() == 2); 4053 match(Set dst (ReplicateL zero)); 4054 format %{ "pxor $dst,$dst\t! replicate2L zero" %} 4055 ins_encode %{ 4056 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4057 %} 4058 ins_pipe( fpu_reg_reg ); 4059 %} 4060 4061 instruct Repl4L_zero(vecY dst, immL0 zero) %{ 4062 predicate(n->as_Vector()->length() == 4); 4063 match(Set dst (ReplicateL zero)); 4064 format %{ "vpxor $dst,$dst,$dst\t! replicate4L zero" %} 4065 ins_encode %{ 4066 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 4067 int vector_len = 1; 4068 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4069 %} 4070 ins_pipe( fpu_reg_reg ); 4071 %} 4072 4073 // Replicate float (4 byte) scalar to be vector 4074 instruct Repl2F(vecD dst, regF src) %{ 4075 predicate(n->as_Vector()->length() == 2); 4076 match(Set dst (ReplicateF src)); 4077 format %{ "pshufd $dst,$dst,0x00\t! replicate2F" %} 4078 ins_encode %{ 4079 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 4080 %} 4081 ins_pipe( fpu_reg_reg ); 4082 %} 4083 4084 instruct Repl4F(vecX dst, regF src) %{ 4085 predicate(n->as_Vector()->length() == 4); 4086 match(Set dst (ReplicateF src)); 4087 format %{ "pshufd $dst,$dst,0x00\t! replicate4F" %} 4088 ins_encode %{ 4089 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 4090 %} 4091 ins_pipe( pipe_slow ); 4092 %} 4093 4094 // Replicate double (8 bytes) scalar to be vector 4095 instruct Repl2D(vecX dst, regD src) %{ 4096 predicate(n->as_Vector()->length() == 2); 4097 match(Set dst (ReplicateD src)); 4098 format %{ "pshufd $dst,$src,0x44\t! replicate2D" %} 4099 ins_encode %{ 4100 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 4101 %} 4102 ins_pipe( pipe_slow ); 4103 %} 4104 4105 // ====================EVEX REPLICATE============================================= 4106 4107 instruct Repl4B_mem_evex(vecS dst, memory mem) %{ 4108 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vlbw()); 4109 match(Set dst (ReplicateB (LoadB mem))); 4110 format %{ "vpbroadcastb $dst,$mem\t! replicate4B" %} 4111 ins_encode %{ 4112 int vector_len = 0; 4113 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 4114 %} 4115 ins_pipe( pipe_slow ); 4116 %} 4117 4118 instruct Repl8B_mem_evex(vecD dst, memory mem) %{ 4119 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); 4120 match(Set dst (ReplicateB (LoadB mem))); 4121 format %{ "vpbroadcastb $dst,$mem\t! replicate8B" %} 4122 ins_encode %{ 4123 int vector_len = 0; 4124 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 4125 %} 4126 ins_pipe( pipe_slow ); 4127 %} 4128 4129 instruct Repl16B_evex(vecX dst, rRegI src) %{ 4130 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 4131 match(Set dst (ReplicateB src)); 4132 format %{ "vpbroadcastb $dst,$src\t! replicate16B" %} 4133 ins_encode %{ 4134 int vector_len = 0; 4135 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 4136 %} 4137 ins_pipe( pipe_slow ); 4138 %} 4139 4140 instruct Repl16B_mem_evex(vecX dst, memory mem) %{ 4141 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 4142 match(Set dst (ReplicateB (LoadB mem))); 4143 format %{ "vpbroadcastb $dst,$mem\t! replicate16B" %} 4144 ins_encode %{ 4145 int vector_len = 0; 4146 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 4147 %} 4148 ins_pipe( pipe_slow ); 4149 %} 4150 4151 instruct Repl32B_evex(vecY dst, rRegI src) %{ 4152 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw()); 4153 match(Set dst (ReplicateB src)); 4154 format %{ "vpbroadcastb $dst,$src\t! replicate32B" %} 4155 ins_encode %{ 4156 int vector_len = 1; 4157 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 4158 %} 4159 ins_pipe( pipe_slow ); 4160 %} 4161 4162 instruct Repl32B_mem_evex(vecY dst, memory mem) %{ 4163 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw()); 4164 match(Set dst (ReplicateB (LoadB mem))); 4165 format %{ "vpbroadcastb $dst,$mem\t! replicate32B" %} 4166 ins_encode %{ 4167 int vector_len = 1; 4168 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 4169 %} 4170 ins_pipe( pipe_slow ); 4171 %} 4172 4173 instruct Repl64B_evex(vecZ dst, rRegI src) %{ 4174 predicate(n->as_Vector()->length() == 64 && VM_Version::supports_avx512bw()); 4175 match(Set dst (ReplicateB src)); 4176 format %{ "vpbroadcastb $dst,$src\t! upper replicate64B" %} 4177 ins_encode %{ 4178 int vector_len = 2; 4179 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 4180 %} 4181 ins_pipe( pipe_slow ); 4182 %} 4183 4184 instruct Repl64B_mem_evex(vecZ dst, memory mem) %{ 4185 predicate(n->as_Vector()->length() == 64 && VM_Version::supports_avx512bw()); 4186 match(Set dst (ReplicateB (LoadB mem))); 4187 format %{ "vpbroadcastb $dst,$mem\t! replicate64B" %} 4188 ins_encode %{ 4189 int vector_len = 2; 4190 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 4191 %} 4192 ins_pipe( pipe_slow ); 4193 %} 4194 4195 instruct Repl16B_imm_evex(vecX dst, immI con) %{ 4196 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 4197 match(Set dst (ReplicateB con)); 4198 format %{ "movq $dst,[$constantaddress]\n\t" 4199 "vpbroadcastb $dst,$dst\t! replicate16B" %} 4200 ins_encode %{ 4201 int vector_len = 0; 4202 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 4203 __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4204 %} 4205 ins_pipe( pipe_slow ); 4206 %} 4207 4208 instruct Repl32B_imm_evex(vecY dst, immI con) %{ 4209 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw()); 4210 match(Set dst (ReplicateB con)); 4211 format %{ "movq $dst,[$constantaddress]\n\t" 4212 "vpbroadcastb $dst,$dst\t! replicate32B" %} 4213 ins_encode %{ 4214 int vector_len = 1; 4215 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 4216 __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4217 %} 4218 ins_pipe( pipe_slow ); 4219 %} 4220 4221 instruct Repl64B_imm_evex(vecZ dst, immI con) %{ 4222 predicate(n->as_Vector()->length() == 64 && VM_Version::supports_avx512bw()); 4223 match(Set dst (ReplicateB con)); 4224 format %{ "movq $dst,[$constantaddress]\n\t" 4225 "vpbroadcastb $dst,$dst\t! upper replicate64B" %} 4226 ins_encode %{ 4227 int vector_len = 2; 4228 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 4229 __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4230 %} 4231 ins_pipe( pipe_slow ); 4232 %} 4233 4234 instruct Repl64B_zero_evex(vecZ dst, immI0 zero) %{ 4235 predicate(n->as_Vector()->length() == 64 && UseAVX > 2); 4236 match(Set dst (ReplicateB zero)); 4237 format %{ "vpxor $dst k0,$dst,$dst\t! replicate64B zero" %} 4238 ins_encode %{ 4239 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 4240 int vector_len = 2; 4241 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4242 %} 4243 ins_pipe( fpu_reg_reg ); 4244 %} 4245 4246 instruct Repl4S_evex(vecD dst, rRegI src) %{ 4247 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vlbw()); 4248 match(Set dst (ReplicateS src)); 4249 format %{ "vpbroadcastw $dst,$src\t! replicate4S" %} 4250 ins_encode %{ 4251 int vector_len = 0; 4252 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 4253 %} 4254 ins_pipe( pipe_slow ); 4255 %} 4256 4257 instruct Repl4S_mem_evex(vecD dst, memory mem) %{ 4258 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vlbw()); 4259 match(Set dst (ReplicateS (LoadS mem))); 4260 format %{ "vpbroadcastw $dst,$mem\t! replicate4S" %} 4261 ins_encode %{ 4262 int vector_len = 0; 4263 __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 4264 %} 4265 ins_pipe( pipe_slow ); 4266 %} 4267 4268 instruct Repl8S_evex(vecX dst, rRegI src) %{ 4269 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); 4270 match(Set dst (ReplicateS src)); 4271 format %{ "vpbroadcastw $dst,$src\t! replicate8S" %} 4272 ins_encode %{ 4273 int vector_len = 0; 4274 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 4275 %} 4276 ins_pipe( pipe_slow ); 4277 %} 4278 4279 instruct Repl8S_mem_evex(vecX dst, memory mem) %{ 4280 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); 4281 match(Set dst (ReplicateS (LoadS mem))); 4282 format %{ "vpbroadcastw $dst,$mem\t! replicate8S" %} 4283 ins_encode %{ 4284 int vector_len = 0; 4285 __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 4286 %} 4287 ins_pipe( pipe_slow ); 4288 %} 4289 4290 instruct Repl16S_evex(vecY dst, rRegI src) %{ 4291 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 4292 match(Set dst (ReplicateS src)); 4293 format %{ "vpbroadcastw $dst,$src\t! replicate16S" %} 4294 ins_encode %{ 4295 int vector_len = 1; 4296 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 4297 %} 4298 ins_pipe( pipe_slow ); 4299 %} 4300 4301 instruct Repl16S_mem_evex(vecY dst, memory mem) %{ 4302 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 4303 match(Set dst (ReplicateS (LoadS mem))); 4304 format %{ "vpbroadcastw $dst,$mem\t! replicate16S" %} 4305 ins_encode %{ 4306 int vector_len = 1; 4307 __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 4308 %} 4309 ins_pipe( pipe_slow ); 4310 %} 4311 4312 instruct Repl32S_evex(vecZ dst, rRegI src) %{ 4313 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512bw()); 4314 match(Set dst (ReplicateS src)); 4315 format %{ "vpbroadcastw $dst,$src\t! replicate32S" %} 4316 ins_encode %{ 4317 int vector_len = 2; 4318 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 4319 %} 4320 ins_pipe( pipe_slow ); 4321 %} 4322 4323 instruct Repl32S_mem_evex(vecZ dst, memory mem) %{ 4324 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512bw()); 4325 match(Set dst (ReplicateS (LoadS mem))); 4326 format %{ "vpbroadcastw $dst,$mem\t! replicate32S" %} 4327 ins_encode %{ 4328 int vector_len = 2; 4329 __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 4330 %} 4331 ins_pipe( pipe_slow ); 4332 %} 4333 4334 instruct Repl8S_imm_evex(vecX dst, immI con) %{ 4335 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); 4336 match(Set dst (ReplicateS con)); 4337 format %{ "movq $dst,[$constantaddress]\n\t" 4338 "vpbroadcastw $dst,$dst\t! replicate8S" %} 4339 ins_encode %{ 4340 int vector_len = 0; 4341 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 4342 __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4343 %} 4344 ins_pipe( pipe_slow ); 4345 %} 4346 4347 instruct Repl16S_imm_evex(vecY dst, immI con) %{ 4348 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 4349 match(Set dst (ReplicateS con)); 4350 format %{ "movq $dst,[$constantaddress]\n\t" 4351 "vpbroadcastw $dst,$dst\t! replicate16S" %} 4352 ins_encode %{ 4353 int vector_len = 1; 4354 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 4355 __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4356 %} 4357 ins_pipe( pipe_slow ); 4358 %} 4359 4360 instruct Repl32S_imm_evex(vecZ dst, immI con) %{ 4361 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512bw()); 4362 match(Set dst (ReplicateS con)); 4363 format %{ "movq $dst,[$constantaddress]\n\t" 4364 "vpbroadcastw $dst,$dst\t! replicate32S" %} 4365 ins_encode %{ 4366 int vector_len = 2; 4367 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 4368 __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4369 %} 4370 ins_pipe( pipe_slow ); 4371 %} 4372 4373 instruct Repl32S_zero_evex(vecZ dst, immI0 zero) %{ 4374 predicate(n->as_Vector()->length() == 32 && UseAVX > 2); 4375 match(Set dst (ReplicateS zero)); 4376 format %{ "vpxor $dst k0,$dst,$dst\t! replicate32S zero" %} 4377 ins_encode %{ 4378 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 4379 int vector_len = 2; 4380 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4381 %} 4382 ins_pipe( fpu_reg_reg ); 4383 %} 4384 4385 instruct Repl4I_evex(vecX dst, rRegI src) %{ 4386 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4387 match(Set dst (ReplicateI src)); 4388 format %{ "vpbroadcastd $dst,$src\t! replicate4I" %} 4389 ins_encode %{ 4390 int vector_len = 0; 4391 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 4392 %} 4393 ins_pipe( pipe_slow ); 4394 %} 4395 4396 instruct Repl4I_mem_evex(vecX dst, memory mem) %{ 4397 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4398 match(Set dst (ReplicateI (LoadI mem))); 4399 format %{ "vpbroadcastd $dst,$mem\t! replicate4I" %} 4400 ins_encode %{ 4401 int vector_len = 0; 4402 __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 4403 %} 4404 ins_pipe( pipe_slow ); 4405 %} 4406 4407 instruct Repl8I_evex(vecY dst, rRegI src) %{ 4408 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4409 match(Set dst (ReplicateI src)); 4410 format %{ "vpbroadcastd $dst,$src\t! replicate8I" %} 4411 ins_encode %{ 4412 int vector_len = 1; 4413 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 4414 %} 4415 ins_pipe( pipe_slow ); 4416 %} 4417 4418 instruct Repl8I_mem_evex(vecY dst, memory mem) %{ 4419 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4420 match(Set dst (ReplicateI (LoadI mem))); 4421 format %{ "vpbroadcastd $dst,$mem\t! replicate8I" %} 4422 ins_encode %{ 4423 int vector_len = 1; 4424 __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 4425 %} 4426 ins_pipe( pipe_slow ); 4427 %} 4428 4429 instruct Repl16I_evex(vecZ dst, rRegI src) %{ 4430 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4431 match(Set dst (ReplicateI src)); 4432 format %{ "vpbroadcastd $dst,$src\t! replicate16I" %} 4433 ins_encode %{ 4434 int vector_len = 2; 4435 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 4436 %} 4437 ins_pipe( pipe_slow ); 4438 %} 4439 4440 instruct Repl16I_mem_evex(vecZ dst, memory mem) %{ 4441 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4442 match(Set dst (ReplicateI (LoadI mem))); 4443 format %{ "vpbroadcastd $dst,$mem\t! replicate16I" %} 4444 ins_encode %{ 4445 int vector_len = 2; 4446 __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 4447 %} 4448 ins_pipe( pipe_slow ); 4449 %} 4450 4451 instruct Repl4I_imm_evex(vecX dst, immI con) %{ 4452 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4453 match(Set dst (ReplicateI con)); 4454 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 4455 "vpbroadcastd $dst,$dst\t! replicate4I" %} 4456 ins_encode %{ 4457 int vector_len = 0; 4458 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4459 __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4460 %} 4461 ins_pipe( pipe_slow ); 4462 %} 4463 4464 instruct Repl8I_imm_evex(vecY dst, immI con) %{ 4465 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4466 match(Set dst (ReplicateI con)); 4467 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 4468 "vpbroadcastd $dst,$dst\t! replicate8I" %} 4469 ins_encode %{ 4470 int vector_len = 1; 4471 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4472 __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4473 %} 4474 ins_pipe( pipe_slow ); 4475 %} 4476 4477 instruct Repl16I_imm_evex(vecZ dst, immI con) %{ 4478 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4479 match(Set dst (ReplicateI con)); 4480 format %{ "movq $dst,[$constantaddress]\t! replicate16I($con)\n\t" 4481 "vpbroadcastd $dst,$dst\t! replicate16I" %} 4482 ins_encode %{ 4483 int vector_len = 2; 4484 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4485 __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4486 %} 4487 ins_pipe( pipe_slow ); 4488 %} 4489 4490 instruct Repl16I_zero_evex(vecZ dst, immI0 zero) %{ 4491 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4492 match(Set dst (ReplicateI zero)); 4493 format %{ "vpxor $dst k0,$dst,$dst\t! replicate16I zero" %} 4494 ins_encode %{ 4495 // Use vxorpd since AVX does not have vpxor for 512-bit (AVX2 will have it). 4496 int vector_len = 2; 4497 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4498 %} 4499 ins_pipe( fpu_reg_reg ); 4500 %} 4501 4502 // Replicate long (8 byte) scalar to be vector 4503 #ifdef _LP64 4504 instruct Repl4L_evex(vecY dst, rRegL src) %{ 4505 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4506 match(Set dst (ReplicateL src)); 4507 format %{ "vpbroadcastq $dst,$src\t! replicate4L" %} 4508 ins_encode %{ 4509 int vector_len = 1; 4510 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vector_len); 4511 %} 4512 ins_pipe( pipe_slow ); 4513 %} 4514 4515 instruct Repl8L_evex(vecZ dst, rRegL src) %{ 4516 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4517 match(Set dst (ReplicateL src)); 4518 format %{ "vpbroadcastq $dst,$src\t! replicate8L" %} 4519 ins_encode %{ 4520 int vector_len = 2; 4521 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vector_len); 4522 %} 4523 ins_pipe( pipe_slow ); 4524 %} 4525 #else // _LP64 4526 instruct Repl4L_evex(vecY dst, eRegL src, regD tmp) %{ 4527 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4528 match(Set dst (ReplicateL src)); 4529 effect(TEMP dst, USE src, TEMP tmp); 4530 format %{ "movdl $dst,$src.lo\n\t" 4531 "movdl $tmp,$src.hi\n\t" 4532 "punpckldq $dst,$tmp\n\t" 4533 "vpbroadcastq $dst,$dst\t! replicate4L" %} 4534 ins_encode %{ 4535 int vector_len = 1; 4536 __ movdl($dst$$XMMRegister, $src$$Register); 4537 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4538 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4539 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4540 %} 4541 ins_pipe( pipe_slow ); 4542 %} 4543 4544 instruct Repl8L_evex(vecZ dst, eRegL src, regD tmp) %{ 4545 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4546 match(Set dst (ReplicateL src)); 4547 effect(TEMP dst, USE src, TEMP tmp); 4548 format %{ "movdl $dst,$src.lo\n\t" 4549 "movdl $tmp,$src.hi\n\t" 4550 "punpckldq $dst,$tmp\n\t" 4551 "vpbroadcastq $dst,$dst\t! replicate8L" %} 4552 ins_encode %{ 4553 int vector_len = 2; 4554 __ movdl($dst$$XMMRegister, $src$$Register); 4555 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4556 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4557 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4558 %} 4559 ins_pipe( pipe_slow ); 4560 %} 4561 #endif // _LP64 4562 4563 instruct Repl4L_imm_evex(vecY dst, immL con) %{ 4564 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4565 match(Set dst (ReplicateL con)); 4566 format %{ "movq $dst,[$constantaddress]\n\t" 4567 "vpbroadcastq $dst,$dst\t! replicate4L" %} 4568 ins_encode %{ 4569 int vector_len = 1; 4570 __ movq($dst$$XMMRegister, $constantaddress($con)); 4571 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4572 %} 4573 ins_pipe( pipe_slow ); 4574 %} 4575 4576 instruct Repl8L_imm_evex(vecZ dst, immL con) %{ 4577 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4578 match(Set dst (ReplicateL con)); 4579 format %{ "movq $dst,[$constantaddress]\n\t" 4580 "vpbroadcastq $dst,$dst\t! replicate8L" %} 4581 ins_encode %{ 4582 int vector_len = 2; 4583 __ movq($dst$$XMMRegister, $constantaddress($con)); 4584 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4585 %} 4586 ins_pipe( pipe_slow ); 4587 %} 4588 4589 instruct Repl2L_mem_evex(vecX dst, memory mem) %{ 4590 predicate(n->as_Vector()->length() == 2 && VM_Version::supports_avx512vl()); 4591 match(Set dst (ReplicateL (LoadL mem))); 4592 format %{ "vpbroadcastd $dst,$mem\t! replicate2L" %} 4593 ins_encode %{ 4594 int vector_len = 0; 4595 __ evpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4596 %} 4597 ins_pipe( pipe_slow ); 4598 %} 4599 4600 instruct Repl4L_mem_evex(vecY dst, memory mem) %{ 4601 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4602 match(Set dst (ReplicateL (LoadL mem))); 4603 format %{ "vpbroadcastd $dst,$mem\t! replicate4L" %} 4604 ins_encode %{ 4605 int vector_len = 1; 4606 __ evpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4607 %} 4608 ins_pipe( pipe_slow ); 4609 %} 4610 4611 instruct Repl8L_mem_evex(vecZ dst, memory mem) %{ 4612 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4613 match(Set dst (ReplicateL (LoadL mem))); 4614 format %{ "vpbroadcastd $dst,$mem\t! replicate8L" %} 4615 ins_encode %{ 4616 int vector_len = 2; 4617 __ evpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4618 %} 4619 ins_pipe( pipe_slow ); 4620 %} 4621 4622 instruct Repl8L_zero_evex(vecZ dst, immL0 zero) %{ 4623 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4624 match(Set dst (ReplicateL zero)); 4625 format %{ "vpxor $dst k0,$dst,$dst\t! replicate8L zero" %} 4626 ins_encode %{ 4627 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 4628 int vector_len = 2; 4629 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4630 %} 4631 ins_pipe( fpu_reg_reg ); 4632 %} 4633 4634 instruct Repl8F_evex(vecY dst, regF src) %{ 4635 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4636 match(Set dst (ReplicateF src)); 4637 format %{ "vbroadcastss $dst,$src\t! replicate8F" %} 4638 ins_encode %{ 4639 int vector_len = 1; 4640 __ evpbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4641 %} 4642 ins_pipe( pipe_slow ); 4643 %} 4644 4645 instruct Repl8F_mem_evex(vecY dst, memory mem) %{ 4646 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4647 match(Set dst (ReplicateF (LoadF mem))); 4648 format %{ "vbroadcastss $dst,$mem\t! replicate8F" %} 4649 ins_encode %{ 4650 int vector_len = 1; 4651 __ evpbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len); 4652 %} 4653 ins_pipe( pipe_slow ); 4654 %} 4655 4656 instruct Repl16F_evex(vecZ dst, regF src) %{ 4657 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4658 match(Set dst (ReplicateF src)); 4659 format %{ "vbroadcastss $dst,$src\t! replicate16F" %} 4660 ins_encode %{ 4661 int vector_len = 2; 4662 __ evpbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4663 %} 4664 ins_pipe( pipe_slow ); 4665 %} 4666 4667 instruct Repl16F_mem_evex(vecZ dst, memory mem) %{ 4668 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4669 match(Set dst (ReplicateF (LoadF mem))); 4670 format %{ "vbroadcastss $dst,$mem\t! replicate16F" %} 4671 ins_encode %{ 4672 int vector_len = 2; 4673 __ evpbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len); 4674 %} 4675 ins_pipe( pipe_slow ); 4676 %} 4677 4678 instruct Repl2F_zero_evex(vecD dst, immF0 zero) %{ 4679 predicate(n->as_Vector()->length() == 2 && UseAVX > 2); 4680 match(Set dst (ReplicateF zero)); 4681 format %{ "vpxor $dst k0,$dst,$dst\t! replicate2F zero" %} 4682 ins_encode %{ 4683 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4684 int vector_len = 2; 4685 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4686 %} 4687 ins_pipe( fpu_reg_reg ); 4688 %} 4689 4690 instruct Repl4F_zero_evex(vecX dst, immF0 zero) %{ 4691 predicate(n->as_Vector()->length() == 4 && UseAVX > 2); 4692 match(Set dst (ReplicateF zero)); 4693 format %{ "vpxor $dst k0,$dst,$dst\t! replicate4F zero" %} 4694 ins_encode %{ 4695 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4696 int vector_len = 2; 4697 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4698 %} 4699 ins_pipe( fpu_reg_reg ); 4700 %} 4701 4702 instruct Repl8F_zero_evex(vecY dst, immF0 zero) %{ 4703 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4704 match(Set dst (ReplicateF zero)); 4705 format %{ "vpxor $dst k0,$dst,$dst\t! replicate8F zero" %} 4706 ins_encode %{ 4707 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4708 int vector_len = 2; 4709 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4710 %} 4711 ins_pipe( fpu_reg_reg ); 4712 %} 4713 4714 instruct Repl16F_zero_evex(vecZ dst, immF0 zero) %{ 4715 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4716 match(Set dst (ReplicateF zero)); 4717 format %{ "vpxor $dst k0,$dst,$dst\t! replicate16F zero" %} 4718 ins_encode %{ 4719 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4720 int vector_len = 2; 4721 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4722 %} 4723 ins_pipe( fpu_reg_reg ); 4724 %} 4725 4726 instruct Repl4D_evex(vecY dst, regD src) %{ 4727 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4728 match(Set dst (ReplicateD src)); 4729 format %{ "vbroadcastsd $dst,$src\t! replicate4D" %} 4730 ins_encode %{ 4731 int vector_len = 1; 4732 __ evpbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4733 %} 4734 ins_pipe( pipe_slow ); 4735 %} 4736 4737 instruct Repl4D_mem_evex(vecY dst, memory mem) %{ 4738 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4739 match(Set dst (ReplicateD (LoadD mem))); 4740 format %{ "vbroadcastsd $dst,$mem\t! replicate4D" %} 4741 ins_encode %{ 4742 int vector_len = 1; 4743 __ evpbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len); 4744 %} 4745 ins_pipe( pipe_slow ); 4746 %} 4747 4748 instruct Repl8D_evex(vecZ dst, regD src) %{ 4749 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4750 match(Set dst (ReplicateD src)); 4751 format %{ "vbroadcastsd $dst,$src\t! replicate8D" %} 4752 ins_encode %{ 4753 int vector_len = 2; 4754 __ evpbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4755 %} 4756 ins_pipe( pipe_slow ); 4757 %} 4758 4759 instruct Repl8D_mem_evex(vecZ dst, memory mem) %{ 4760 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4761 match(Set dst (ReplicateD (LoadD mem))); 4762 format %{ "vbroadcastsd $dst,$mem\t! replicate8D" %} 4763 ins_encode %{ 4764 int vector_len = 2; 4765 __ evpbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len); 4766 %} 4767 ins_pipe( pipe_slow ); 4768 %} 4769 4770 instruct Repl2D_zero_evex(vecX dst, immD0 zero) %{ 4771 predicate(n->as_Vector()->length() == 2 && UseAVX > 2); 4772 match(Set dst (ReplicateD zero)); 4773 format %{ "vpxor $dst k0,$dst,$dst\t! replicate2D zero" %} 4774 ins_encode %{ 4775 // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation 4776 int vector_len = 2; 4777 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4778 %} 4779 ins_pipe( fpu_reg_reg ); 4780 %} 4781 4782 instruct Repl4D_zero_evex(vecY dst, immD0 zero) %{ 4783 predicate(n->as_Vector()->length() == 4 && UseAVX > 2); 4784 match(Set dst (ReplicateD zero)); 4785 format %{ "vpxor $dst k0,$dst,$dst\t! replicate4D zero" %} 4786 ins_encode %{ 4787 // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation 4788 int vector_len = 2; 4789 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4790 %} 4791 ins_pipe( fpu_reg_reg ); 4792 %} 4793 4794 instruct Repl8D_zero_evex(vecZ dst, immD0 zero) %{ 4795 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4796 match(Set dst (ReplicateD zero)); 4797 format %{ "vpxor $dst k0,$dst,$dst,vect512\t! replicate8D zero" %} 4798 ins_encode %{ 4799 // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation 4800 int vector_len = 2; 4801 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4802 %} 4803 ins_pipe( fpu_reg_reg ); 4804 %} 4805 4806 // ====================REDUCTION ARITHMETIC======================================= 4807 4808 instruct rsadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 4809 predicate(UseSSE > 2 && UseAVX == 0); 4810 match(Set dst (AddReductionVI src1 src2)); 4811 effect(TEMP tmp2, TEMP tmp); 4812 format %{ "movdqu $tmp2,$src2\n\t" 4813 "phaddd $tmp2,$tmp2\n\t" 4814 "movd $tmp,$src1\n\t" 4815 "paddd $tmp,$tmp2\n\t" 4816 "movd $dst,$tmp\t! add reduction2I" %} 4817 ins_encode %{ 4818 __ movdqu($tmp2$$XMMRegister, $src2$$XMMRegister); 4819 __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister); 4820 __ movdl($tmp$$XMMRegister, $src1$$Register); 4821 __ paddd($tmp$$XMMRegister, $tmp2$$XMMRegister); 4822 __ movdl($dst$$Register, $tmp$$XMMRegister); 4823 %} 4824 ins_pipe( pipe_slow ); 4825 %} 4826 4827 instruct rvadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 4828 predicate(VM_Version::supports_avxonly()); 4829 match(Set dst (AddReductionVI src1 src2)); 4830 effect(TEMP tmp, TEMP tmp2); 4831 format %{ "vphaddd $tmp,$src2,$src2\n\t" 4832 "movd $tmp2,$src1\n\t" 4833 "vpaddd $tmp2,$tmp2,$tmp\n\t" 4834 "movd $dst,$tmp2\t! add reduction2I" %} 4835 ins_encode %{ 4836 int vector_len = 0; 4837 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 4838 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4839 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 4840 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4841 %} 4842 ins_pipe( pipe_slow ); 4843 %} 4844 4845 instruct rvadd2I_reduction_reg_evex(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 4846 predicate(UseAVX > 2); 4847 match(Set dst (AddReductionVI src1 src2)); 4848 effect(TEMP tmp, TEMP tmp2); 4849 format %{ "pshufd $tmp2,$src2,0x1\n\t" 4850 "vpaddd $tmp,$src2,$tmp2\n\t" 4851 "movd $tmp2,$src1\n\t" 4852 "vpaddd $tmp2,$tmp,$tmp2\n\t" 4853 "movd $dst,$tmp2\t! add reduction2I" %} 4854 ins_encode %{ 4855 int vector_len = 0; 4856 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 4857 __ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4858 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4859 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4860 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4861 %} 4862 ins_pipe( pipe_slow ); 4863 %} 4864 4865 instruct rsadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 4866 predicate(UseSSE > 2 && UseAVX == 0); 4867 match(Set dst (AddReductionVI src1 src2)); 4868 effect(TEMP tmp, TEMP tmp2); 4869 format %{ "movdqu $tmp,$src2\n\t" 4870 "phaddd $tmp,$tmp\n\t" 4871 "phaddd $tmp,$tmp\n\t" 4872 "movd $tmp2,$src1\n\t" 4873 "paddd $tmp2,$tmp\n\t" 4874 "movd $dst,$tmp2\t! add reduction4I" %} 4875 ins_encode %{ 4876 __ movdqu($tmp$$XMMRegister, $src2$$XMMRegister); 4877 __ phaddd($tmp$$XMMRegister, $tmp$$XMMRegister); 4878 __ phaddd($tmp$$XMMRegister, $tmp$$XMMRegister); 4879 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4880 __ paddd($tmp2$$XMMRegister, $tmp$$XMMRegister); 4881 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4882 %} 4883 ins_pipe( pipe_slow ); 4884 %} 4885 4886 instruct rvadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 4887 predicate(VM_Version::supports_avxonly()); 4888 match(Set dst (AddReductionVI src1 src2)); 4889 effect(TEMP tmp, TEMP tmp2); 4890 format %{ "vphaddd $tmp,$src2,$src2\n\t" 4891 "vphaddd $tmp,$tmp,$tmp\n\t" 4892 "movd $tmp2,$src1\n\t" 4893 "vpaddd $tmp2,$tmp2,$tmp\n\t" 4894 "movd $dst,$tmp2\t! add reduction4I" %} 4895 ins_encode %{ 4896 int vector_len = 0; 4897 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 4898 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); 4899 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4900 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 4901 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4902 %} 4903 ins_pipe( pipe_slow ); 4904 %} 4905 4906 instruct rvadd4I_reduction_reg_evex(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 4907 predicate(UseAVX > 2); 4908 match(Set dst (AddReductionVI src1 src2)); 4909 effect(TEMP tmp, TEMP tmp2); 4910 format %{ "pshufd $tmp2,$src2,0xE\n\t" 4911 "vpaddd $tmp,$src2,$tmp2\n\t" 4912 "pshufd $tmp2,$tmp,0x1\n\t" 4913 "vpaddd $tmp,$tmp,$tmp2\n\t" 4914 "movd $tmp2,$src1\n\t" 4915 "vpaddd $tmp2,$tmp,$tmp2\n\t" 4916 "movd $dst,$tmp2\t! add reduction4I" %} 4917 ins_encode %{ 4918 int vector_len = 0; 4919 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 4920 __ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4921 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 4922 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4923 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4924 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4925 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4926 %} 4927 ins_pipe( pipe_slow ); 4928 %} 4929 4930 instruct rvadd8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 4931 predicate(VM_Version::supports_avxonly()); 4932 match(Set dst (AddReductionVI src1 src2)); 4933 effect(TEMP tmp, TEMP tmp2); 4934 format %{ "vphaddd $tmp,$src2,$src2\n\t" 4935 "vphaddd $tmp,$tmp,$tmp2\n\t" 4936 "vextracti128_high $tmp2,$tmp\n\t" 4937 "vpaddd $tmp,$tmp,$tmp2\n\t" 4938 "movd $tmp2,$src1\n\t" 4939 "vpaddd $tmp2,$tmp2,$tmp\n\t" 4940 "movd $dst,$tmp2\t! add reduction8I" %} 4941 ins_encode %{ 4942 int vector_len = 1; 4943 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 4944 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4945 __ vextracti128_high($tmp2$$XMMRegister, $tmp$$XMMRegister); 4946 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4947 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4948 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4949 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4950 %} 4951 ins_pipe( pipe_slow ); 4952 %} 4953 4954 instruct rvadd8I_reduction_reg_evex(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 4955 predicate(UseAVX > 2); 4956 match(Set dst (AddReductionVI src1 src2)); 4957 effect(TEMP tmp, TEMP tmp2); 4958 format %{ "vextracti128_high $tmp,$src2\n\t" 4959 "vpaddd $tmp,$tmp,$src2\n\t" 4960 "pshufd $tmp2,$tmp,0xE\n\t" 4961 "vpaddd $tmp,$tmp,$tmp2\n\t" 4962 "pshufd $tmp2,$tmp,0x1\n\t" 4963 "vpaddd $tmp,$tmp,$tmp2\n\t" 4964 "movd $tmp2,$src1\n\t" 4965 "vpaddd $tmp2,$tmp,$tmp2\n\t" 4966 "movd $dst,$tmp2\t! add reduction8I" %} 4967 ins_encode %{ 4968 int vector_len = 0; 4969 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 4970 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 4971 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 4972 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4973 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 4974 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4975 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4976 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4977 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4978 %} 4979 ins_pipe( pipe_slow ); 4980 %} 4981 4982 instruct rvadd16I_reduction_reg_evex(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ 4983 predicate(UseAVX > 2); 4984 match(Set dst (AddReductionVI src1 src2)); 4985 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 4986 format %{ "vextracti64x4_high $tmp3,$src2\n\t" 4987 "vpaddd $tmp3,$tmp3,$src2\n\t" 4988 "vextracti128_high $tmp,$tmp3\n\t" 4989 "vpaddd $tmp,$tmp,$tmp3\n\t" 4990 "pshufd $tmp2,$tmp,0xE\n\t" 4991 "vpaddd $tmp,$tmp,$tmp2\n\t" 4992 "pshufd $tmp2,$tmp,0x1\n\t" 4993 "vpaddd $tmp,$tmp,$tmp2\n\t" 4994 "movd $tmp2,$src1\n\t" 4995 "vpaddd $tmp2,$tmp,$tmp2\n\t" 4996 "movd $dst,$tmp2\t! mul reduction16I" %} 4997 ins_encode %{ 4998 __ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister); 4999 __ vpaddd($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); 5000 __ vextracti128_high($tmp$$XMMRegister, $tmp3$$XMMRegister); 5001 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); 5002 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5003 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5004 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5005 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5006 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5007 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5008 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5009 %} 5010 ins_pipe( pipe_slow ); 5011 %} 5012 5013 #ifdef _LP64 5014 instruct rvadd2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, regF tmp, regF tmp2) %{ 5015 predicate(UseAVX > 2); 5016 match(Set dst (AddReductionVL src1 src2)); 5017 effect(TEMP tmp, TEMP tmp2); 5018 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5019 "vpaddq $tmp,$src2,$tmp2\n\t" 5020 "movdq $tmp2,$src1\n\t" 5021 "vpaddq $tmp2,$tmp,$tmp2\n\t" 5022 "movdq $dst,$tmp2\t! add reduction2L" %} 5023 ins_encode %{ 5024 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5025 __ vpaddq($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, 0); 5026 __ movdq($tmp2$$XMMRegister, $src1$$Register); 5027 __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5028 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5029 %} 5030 ins_pipe( pipe_slow ); 5031 %} 5032 5033 instruct rvadd4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{ 5034 predicate(UseAVX > 2); 5035 match(Set dst (AddReductionVL src1 src2)); 5036 effect(TEMP tmp, TEMP tmp2); 5037 format %{ "vextracti128_high $tmp,$src2\n\t" 5038 "vpaddq $tmp2,$tmp,$src2\n\t" 5039 "pshufd $tmp,$tmp2,0xE\n\t" 5040 "vpaddq $tmp2,$tmp2,$tmp\n\t" 5041 "movdq $tmp,$src1\n\t" 5042 "vpaddq $tmp2,$tmp2,$tmp\n\t" 5043 "movdq $dst,$tmp2\t! add reduction4L" %} 5044 ins_encode %{ 5045 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 5046 __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); 5047 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5048 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5049 __ movdq($tmp$$XMMRegister, $src1$$Register); 5050 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5051 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5052 %} 5053 ins_pipe( pipe_slow ); 5054 %} 5055 5056 instruct rvadd8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{ 5057 predicate(UseAVX > 2); 5058 match(Set dst (AddReductionVL src1 src2)); 5059 effect(TEMP tmp, TEMP tmp2); 5060 format %{ "vextracti64x4_high $tmp2,$src2\n\t" 5061 "vpaddq $tmp2,$tmp2,$src2\n\t" 5062 "vextracti128_high $tmp,$tmp2\n\t" 5063 "vpaddq $tmp2,$tmp2,$tmp\n\t" 5064 "pshufd $tmp,$tmp2,0xE\n\t" 5065 "vpaddq $tmp2,$tmp2,$tmp\n\t" 5066 "movdq $tmp,$src1\n\t" 5067 "vpaddq $tmp2,$tmp2,$tmp\n\t" 5068 "movdq $dst,$tmp2\t! add reduction8L" %} 5069 ins_encode %{ 5070 __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5071 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 5072 __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); 5073 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5074 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5075 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5076 __ movdq($tmp$$XMMRegister, $src1$$Register); 5077 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5078 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5079 %} 5080 ins_pipe( pipe_slow ); 5081 %} 5082 #endif 5083 5084 instruct rsadd2F_reduction_reg(regF dst, vecD src2, regF tmp) %{ 5085 predicate(UseSSE >= 1 && UseAVX == 0); 5086 match(Set dst (AddReductionVF dst src2)); 5087 effect(TEMP dst, TEMP tmp); 5088 format %{ "addss $dst,$src2\n\t" 5089 "pshufd $tmp,$src2,0x01\n\t" 5090 "addss $dst,$tmp\t! add reduction2F" %} 5091 ins_encode %{ 5092 __ addss($dst$$XMMRegister, $src2$$XMMRegister); 5093 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5094 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 5095 %} 5096 ins_pipe( pipe_slow ); 5097 %} 5098 5099 instruct rvadd2F_reduction_reg(regF dst, vecD src2, regF tmp) %{ 5100 predicate(UseAVX > 0); 5101 match(Set dst (AddReductionVF dst src2)); 5102 effect(TEMP dst, TEMP tmp); 5103 format %{ "vaddss $dst,$dst,$src2\n\t" 5104 "pshufd $tmp,$src2,0x01\n\t" 5105 "vaddss $dst,$dst,$tmp\t! add reduction2F" %} 5106 ins_encode %{ 5107 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5108 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5109 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5110 %} 5111 ins_pipe( pipe_slow ); 5112 %} 5113 5114 instruct rsadd4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ 5115 predicate(UseSSE >= 1 && UseAVX == 0); 5116 match(Set dst (AddReductionVF dst src2)); 5117 effect(TEMP dst, TEMP tmp); 5118 format %{ "addss $dst,$src2\n\t" 5119 "pshufd $tmp,$src2,0x01\n\t" 5120 "addss $dst,$tmp\n\t" 5121 "pshufd $tmp,$src2,0x02\n\t" 5122 "addss $dst,$tmp\n\t" 5123 "pshufd $tmp,$src2,0x03\n\t" 5124 "addss $dst,$tmp\t! add reduction4F" %} 5125 ins_encode %{ 5126 __ addss($dst$$XMMRegister, $src2$$XMMRegister); 5127 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5128 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 5129 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5130 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 5131 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5132 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 5133 %} 5134 ins_pipe( pipe_slow ); 5135 %} 5136 5137 instruct rvadd4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ 5138 predicate(UseAVX > 0); 5139 match(Set dst (AddReductionVF dst src2)); 5140 effect(TEMP tmp, TEMP dst); 5141 format %{ "vaddss $dst,dst,$src2\n\t" 5142 "pshufd $tmp,$src2,0x01\n\t" 5143 "vaddss $dst,$dst,$tmp\n\t" 5144 "pshufd $tmp,$src2,0x02\n\t" 5145 "vaddss $dst,$dst,$tmp\n\t" 5146 "pshufd $tmp,$src2,0x03\n\t" 5147 "vaddss $dst,$dst,$tmp\t! add reduction4F" %} 5148 ins_encode %{ 5149 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5150 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5151 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5152 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5153 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5154 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5155 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5156 %} 5157 ins_pipe( pipe_slow ); 5158 %} 5159 5160 instruct radd8F_reduction_reg(regF dst, vecY src2, regF tmp, regF tmp2) %{ 5161 predicate(UseAVX > 0); 5162 match(Set dst (AddReductionVF dst src2)); 5163 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5164 format %{ "vaddss $dst,$dst,$src2\n\t" 5165 "pshufd $tmp,$src2,0x01\n\t" 5166 "vaddss $dst,$dst,$tmp\n\t" 5167 "pshufd $tmp,$src2,0x02\n\t" 5168 "vaddss $dst,$dst,$tmp\n\t" 5169 "pshufd $tmp,$src2,0x03\n\t" 5170 "vaddss $dst,$dst,$tmp\n\t" 5171 "vextractf128_high $tmp2,$src2\n\t" 5172 "vaddss $dst,$dst,$tmp2\n\t" 5173 "pshufd $tmp,$tmp2,0x01\n\t" 5174 "vaddss $dst,$dst,$tmp\n\t" 5175 "pshufd $tmp,$tmp2,0x02\n\t" 5176 "vaddss $dst,$dst,$tmp\n\t" 5177 "pshufd $tmp,$tmp2,0x03\n\t" 5178 "vaddss $dst,$dst,$tmp\t! add reduction8F" %} 5179 ins_encode %{ 5180 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5181 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5182 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5183 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5184 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5185 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5186 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5187 __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5188 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5189 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5190 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5191 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5192 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5193 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5194 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5195 %} 5196 ins_pipe( pipe_slow ); 5197 %} 5198 5199 instruct radd16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{ 5200 predicate(UseAVX > 2); 5201 match(Set dst (AddReductionVF dst src2)); 5202 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5203 format %{ "vaddss $dst,$dst,$src2\n\t" 5204 "pshufd $tmp,$src2,0x01\n\t" 5205 "vaddss $dst,$dst,$tmp\n\t" 5206 "pshufd $tmp,$src2,0x02\n\t" 5207 "vaddss $dst,$dst,$tmp\n\t" 5208 "pshufd $tmp,$src2,0x03\n\t" 5209 "vaddss $dst,$dst,$tmp\n\t" 5210 "vextractf32x4 $tmp2,$src2,0x1\n\t" 5211 "vaddss $dst,$dst,$tmp2\n\t" 5212 "pshufd $tmp,$tmp2,0x01\n\t" 5213 "vaddss $dst,$dst,$tmp\n\t" 5214 "pshufd $tmp,$tmp2,0x02\n\t" 5215 "vaddss $dst,$dst,$tmp\n\t" 5216 "pshufd $tmp,$tmp2,0x03\n\t" 5217 "vaddss $dst,$dst,$tmp\n\t" 5218 "vextractf32x4 $tmp2,$src2,0x2\n\t" 5219 "vaddss $dst,$dst,$tmp2\n\t" 5220 "pshufd $tmp,$tmp2,0x01\n\t" 5221 "vaddss $dst,$dst,$tmp\n\t" 5222 "pshufd $tmp,$tmp2,0x02\n\t" 5223 "vaddss $dst,$dst,$tmp\n\t" 5224 "pshufd $tmp,$tmp2,0x03\n\t" 5225 "vaddss $dst,$dst,$tmp\n\t" 5226 "vextractf32x4 $tmp2,$src2,0x3\n\t" 5227 "vaddss $dst,$dst,$tmp2\n\t" 5228 "pshufd $tmp,$tmp2,0x01\n\t" 5229 "vaddss $dst,$dst,$tmp\n\t" 5230 "pshufd $tmp,$tmp2,0x02\n\t" 5231 "vaddss $dst,$dst,$tmp\n\t" 5232 "pshufd $tmp,$tmp2,0x03\n\t" 5233 "vaddss $dst,$dst,$tmp\t! add reduction16F" %} 5234 ins_encode %{ 5235 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5236 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5237 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5238 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5239 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5240 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5241 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5242 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5243 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5244 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5245 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5246 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5247 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5248 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5249 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5250 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 5251 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5252 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5253 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5254 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5255 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5256 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5257 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5258 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 5259 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5260 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5261 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5262 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5263 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5264 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5265 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5266 %} 5267 ins_pipe( pipe_slow ); 5268 %} 5269 5270 instruct rsadd2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ 5271 predicate(UseSSE >= 1 && UseAVX == 0); 5272 match(Set dst (AddReductionVD dst src2)); 5273 effect(TEMP tmp, TEMP dst); 5274 format %{ "addsd $dst,$src2\n\t" 5275 "pshufd $tmp,$src2,0xE\n\t" 5276 "addsd $dst,$tmp\t! add reduction2D" %} 5277 ins_encode %{ 5278 __ addsd($dst$$XMMRegister, $src2$$XMMRegister); 5279 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5280 __ addsd($dst$$XMMRegister, $tmp$$XMMRegister); 5281 %} 5282 ins_pipe( pipe_slow ); 5283 %} 5284 5285 instruct rvadd2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ 5286 predicate(UseAVX > 0); 5287 match(Set dst (AddReductionVD dst src2)); 5288 effect(TEMP tmp, TEMP dst); 5289 format %{ "vaddsd $dst,$dst,$src2\n\t" 5290 "pshufd $tmp,$src2,0xE\n\t" 5291 "vaddsd $dst,$dst,$tmp\t! add reduction2D" %} 5292 ins_encode %{ 5293 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5294 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5295 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5296 %} 5297 ins_pipe( pipe_slow ); 5298 %} 5299 5300 instruct rvadd4D_reduction_reg(regD dst, vecY src2, regD tmp, regD tmp2) %{ 5301 predicate(UseAVX > 0); 5302 match(Set dst (AddReductionVD dst src2)); 5303 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5304 format %{ "vaddsd $dst,$dst,$src2\n\t" 5305 "pshufd $tmp,$src2,0xE\n\t" 5306 "vaddsd $dst,$dst,$tmp\n\t" 5307 "vextractf32x4 $tmp2,$src2,0x1\n\t" 5308 "vaddsd $dst,$dst,$tmp2\n\t" 5309 "pshufd $tmp,$tmp2,0xE\n\t" 5310 "vaddsd $dst,$dst,$tmp\t! add reduction4D" %} 5311 ins_encode %{ 5312 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5313 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5314 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5315 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5316 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5317 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5318 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5319 %} 5320 ins_pipe( pipe_slow ); 5321 %} 5322 5323 instruct rvadd8D_reduction_reg(regD dst, vecZ src2, regD tmp, regD tmp2) %{ 5324 predicate(UseAVX > 2); 5325 match(Set dst (AddReductionVD dst src2)); 5326 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5327 format %{ "vaddsd $dst,$dst,$src2\n\t" 5328 "pshufd $tmp,$src2,0xE\n\t" 5329 "vaddsd $dst,$dst,$tmp\n\t" 5330 "vextractf32x4 $tmp2,$src2,0x1\n\t" 5331 "vaddsd $dst,$dst,$tmp2\n\t" 5332 "pshufd $tmp,$tmp2,0xE\n\t" 5333 "vaddsd $dst,$dst,$tmp\n\t" 5334 "vextractf32x4 $tmp2,$src2,0x2\n\t" 5335 "vaddsd $dst,$dst,$tmp2\n\t" 5336 "pshufd $tmp,$tmp2,0xE\n\t" 5337 "vaddsd $dst,$dst,$tmp\n\t" 5338 "vextractf32x4 $tmp2,$src2,0x3\n\t" 5339 "vaddsd $dst,$dst,$tmp2\n\t" 5340 "pshufd $tmp,$tmp2,0xE\n\t" 5341 "vaddsd $dst,$dst,$tmp\t! add reduction8D" %} 5342 ins_encode %{ 5343 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5344 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5345 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5346 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5347 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5348 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5349 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5350 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 5351 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5352 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5353 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5354 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 5355 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5356 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5357 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5358 %} 5359 ins_pipe( pipe_slow ); 5360 %} 5361 5362 instruct rsmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 5363 predicate(UseSSE > 3 && UseAVX == 0); 5364 match(Set dst (MulReductionVI src1 src2)); 5365 effect(TEMP tmp, TEMP tmp2); 5366 format %{ "pshufd $tmp2,$src2,0x1\n\t" 5367 "pmulld $tmp2,$src2\n\t" 5368 "movd $tmp,$src1\n\t" 5369 "pmulld $tmp2,$tmp\n\t" 5370 "movd $dst,$tmp2\t! mul reduction2I" %} 5371 ins_encode %{ 5372 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5373 __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister); 5374 __ movdl($tmp$$XMMRegister, $src1$$Register); 5375 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 5376 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5377 %} 5378 ins_pipe( pipe_slow ); 5379 %} 5380 5381 instruct rvmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 5382 predicate(UseAVX > 0); 5383 match(Set dst (MulReductionVI src1 src2)); 5384 effect(TEMP tmp, TEMP tmp2); 5385 format %{ "pshufd $tmp2,$src2,0x1\n\t" 5386 "vpmulld $tmp,$src2,$tmp2\n\t" 5387 "movd $tmp2,$src1\n\t" 5388 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5389 "movd $dst,$tmp2\t! mul reduction2I" %} 5390 ins_encode %{ 5391 int vector_len = 0; 5392 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5393 __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5394 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5395 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5396 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5397 %} 5398 ins_pipe( pipe_slow ); 5399 %} 5400 5401 instruct rsmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 5402 predicate(UseSSE > 3 && UseAVX == 0); 5403 match(Set dst (MulReductionVI src1 src2)); 5404 effect(TEMP tmp, TEMP tmp2); 5405 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5406 "pmulld $tmp2,$src2\n\t" 5407 "pshufd $tmp,$tmp2,0x1\n\t" 5408 "pmulld $tmp2,$tmp\n\t" 5409 "movd $tmp,$src1\n\t" 5410 "pmulld $tmp2,$tmp\n\t" 5411 "movd $dst,$tmp2\t! mul reduction4I" %} 5412 ins_encode %{ 5413 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5414 __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister); 5415 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x1); 5416 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 5417 __ movdl($tmp$$XMMRegister, $src1$$Register); 5418 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 5419 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5420 %} 5421 ins_pipe( pipe_slow ); 5422 %} 5423 5424 instruct rvmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 5425 predicate(UseAVX > 0); 5426 match(Set dst (MulReductionVI src1 src2)); 5427 effect(TEMP tmp, TEMP tmp2); 5428 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5429 "vpmulld $tmp,$src2,$tmp2\n\t" 5430 "pshufd $tmp2,$tmp,0x1\n\t" 5431 "vpmulld $tmp,$tmp,$tmp2\n\t" 5432 "movd $tmp2,$src1\n\t" 5433 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5434 "movd $dst,$tmp2\t! mul reduction4I" %} 5435 ins_encode %{ 5436 int vector_len = 0; 5437 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5438 __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5439 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5440 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5441 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5442 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5443 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5444 %} 5445 ins_pipe( pipe_slow ); 5446 %} 5447 5448 instruct rvmul8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 5449 predicate(UseAVX > 0); 5450 match(Set dst (MulReductionVI src1 src2)); 5451 effect(TEMP tmp, TEMP tmp2); 5452 format %{ "vextracti128_high $tmp,$src2\n\t" 5453 "vpmulld $tmp,$tmp,$src2\n\t" 5454 "pshufd $tmp2,$tmp,0xE\n\t" 5455 "vpmulld $tmp,$tmp,$tmp2\n\t" 5456 "pshufd $tmp2,$tmp,0x1\n\t" 5457 "vpmulld $tmp,$tmp,$tmp2\n\t" 5458 "movd $tmp2,$src1\n\t" 5459 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5460 "movd $dst,$tmp2\t! mul reduction8I" %} 5461 ins_encode %{ 5462 int vector_len = 0; 5463 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 5464 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 5465 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5466 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5467 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5468 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5469 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5470 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5471 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5472 %} 5473 ins_pipe( pipe_slow ); 5474 %} 5475 5476 instruct rvmul16I_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ 5477 predicate(UseAVX > 2); 5478 match(Set dst (MulReductionVI src1 src2)); 5479 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 5480 format %{ "vextracti64x4_high $tmp3,$src2\n\t" 5481 "vpmulld $tmp3,$tmp3,$src2\n\t" 5482 "vextracti128_high $tmp,$tmp3\n\t" 5483 "vpmulld $tmp,$tmp,$src2\n\t" 5484 "pshufd $tmp2,$tmp,0xE\n\t" 5485 "vpmulld $tmp,$tmp,$tmp2\n\t" 5486 "pshufd $tmp2,$tmp,0x1\n\t" 5487 "vpmulld $tmp,$tmp,$tmp2\n\t" 5488 "movd $tmp2,$src1\n\t" 5489 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5490 "movd $dst,$tmp2\t! mul reduction16I" %} 5491 ins_encode %{ 5492 __ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister); 5493 __ vpmulld($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); 5494 __ vextracti128_high($tmp$$XMMRegister, $tmp3$$XMMRegister); 5495 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); 5496 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5497 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5498 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5499 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5500 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5501 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5502 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5503 %} 5504 ins_pipe( pipe_slow ); 5505 %} 5506 5507 #ifdef _LP64 5508 instruct rvmul2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, regF tmp, regF tmp2) %{ 5509 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 5510 match(Set dst (MulReductionVL src1 src2)); 5511 effect(TEMP tmp, TEMP tmp2); 5512 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5513 "vpmullq $tmp,$src2,$tmp2\n\t" 5514 "movdq $tmp2,$src1\n\t" 5515 "vpmullq $tmp2,$tmp,$tmp2\n\t" 5516 "movdq $dst,$tmp2\t! mul reduction2L" %} 5517 ins_encode %{ 5518 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5519 __ vpmullq($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, 0); 5520 __ movdq($tmp2$$XMMRegister, $src1$$Register); 5521 __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5522 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5523 %} 5524 ins_pipe( pipe_slow ); 5525 %} 5526 5527 instruct rvmul4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{ 5528 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 5529 match(Set dst (MulReductionVL src1 src2)); 5530 effect(TEMP tmp, TEMP tmp2); 5531 format %{ "vextracti128_high $tmp,$src2\n\t" 5532 "vpmullq $tmp2,$tmp,$src2\n\t" 5533 "pshufd $tmp,$tmp2,0xE\n\t" 5534 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5535 "movdq $tmp,$src1\n\t" 5536 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5537 "movdq $dst,$tmp2\t! mul reduction4L" %} 5538 ins_encode %{ 5539 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 5540 __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); 5541 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5542 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5543 __ movdq($tmp$$XMMRegister, $src1$$Register); 5544 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5545 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5546 %} 5547 ins_pipe( pipe_slow ); 5548 %} 5549 5550 instruct rvmul8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{ 5551 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 5552 match(Set dst (MulReductionVL src1 src2)); 5553 effect(TEMP tmp, TEMP tmp2); 5554 format %{ "vextracti64x4_high $tmp2,$src2\n\t" 5555 "vpmullq $tmp2,$tmp2,$src2\n\t" 5556 "vextracti128_high $tmp,$tmp2\n\t" 5557 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5558 "pshufd $tmp,$tmp2,0xE\n\t" 5559 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5560 "movdq $tmp,$src1\n\t" 5561 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5562 "movdq $dst,$tmp2\t! mul reduction8L" %} 5563 ins_encode %{ 5564 __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5565 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 5566 __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); 5567 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5568 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5569 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5570 __ movdq($tmp$$XMMRegister, $src1$$Register); 5571 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5572 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5573 %} 5574 ins_pipe( pipe_slow ); 5575 %} 5576 #endif 5577 5578 instruct rsmul2F_reduction(regF dst, vecD src2, regF tmp) %{ 5579 predicate(UseSSE >= 1 && UseAVX == 0); 5580 match(Set dst (MulReductionVF dst src2)); 5581 effect(TEMP dst, TEMP tmp); 5582 format %{ "mulss $dst,$src2\n\t" 5583 "pshufd $tmp,$src2,0x01\n\t" 5584 "mulss $dst,$tmp\t! mul reduction2F" %} 5585 ins_encode %{ 5586 __ mulss($dst$$XMMRegister, $src2$$XMMRegister); 5587 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5588 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5589 %} 5590 ins_pipe( pipe_slow ); 5591 %} 5592 5593 instruct rvmul2F_reduction_reg(regF dst, vecD src2, regF tmp) %{ 5594 predicate(UseAVX > 0); 5595 match(Set dst (MulReductionVF dst src2)); 5596 effect(TEMP tmp, TEMP dst); 5597 format %{ "vmulss $dst,$dst,$src2\n\t" 5598 "pshufd $tmp,$src2,0x01\n\t" 5599 "vmulss $dst,$dst,$tmp\t! mul reduction2F" %} 5600 ins_encode %{ 5601 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5602 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5603 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5604 %} 5605 ins_pipe( pipe_slow ); 5606 %} 5607 5608 instruct rsmul4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ 5609 predicate(UseSSE >= 1 && UseAVX == 0); 5610 match(Set dst (MulReductionVF dst src2)); 5611 effect(TEMP dst, TEMP tmp); 5612 format %{ "mulss $dst,$src2\n\t" 5613 "pshufd $tmp,$src2,0x01\n\t" 5614 "mulss $dst,$tmp\n\t" 5615 "pshufd $tmp,$src2,0x02\n\t" 5616 "mulss $dst,$tmp\n\t" 5617 "pshufd $tmp,$src2,0x03\n\t" 5618 "mulss $dst,$tmp\t! mul reduction4F" %} 5619 ins_encode %{ 5620 __ mulss($dst$$XMMRegister, $src2$$XMMRegister); 5621 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5622 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5623 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5624 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5625 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5626 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5627 %} 5628 ins_pipe( pipe_slow ); 5629 %} 5630 5631 instruct rvmul4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ 5632 predicate(UseAVX > 0); 5633 match(Set dst (MulReductionVF dst src2)); 5634 effect(TEMP tmp, TEMP dst); 5635 format %{ "vmulss $dst,$dst,$src2\n\t" 5636 "pshufd $tmp,$src2,0x01\n\t" 5637 "vmulss $dst,$dst,$tmp\n\t" 5638 "pshufd $tmp,$src2,0x02\n\t" 5639 "vmulss $dst,$dst,$tmp\n\t" 5640 "pshufd $tmp,$src2,0x03\n\t" 5641 "vmulss $dst,$dst,$tmp\t! mul reduction4F" %} 5642 ins_encode %{ 5643 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5644 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5645 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5646 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5647 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5648 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5649 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5650 %} 5651 ins_pipe( pipe_slow ); 5652 %} 5653 5654 instruct rvmul8F_reduction_reg(regF dst, vecY src2, regF tmp, regF tmp2) %{ 5655 predicate(UseAVX > 0); 5656 match(Set dst (MulReductionVF dst src2)); 5657 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5658 format %{ "vmulss $dst,$dst,$src2\n\t" 5659 "pshufd $tmp,$src2,0x01\n\t" 5660 "vmulss $dst,$dst,$tmp\n\t" 5661 "pshufd $tmp,$src2,0x02\n\t" 5662 "vmulss $dst,$dst,$tmp\n\t" 5663 "pshufd $tmp,$src2,0x03\n\t" 5664 "vmulss $dst,$dst,$tmp\n\t" 5665 "vextractf128_high $tmp2,$src2\n\t" 5666 "vmulss $dst,$dst,$tmp2\n\t" 5667 "pshufd $tmp,$tmp2,0x01\n\t" 5668 "vmulss $dst,$dst,$tmp\n\t" 5669 "pshufd $tmp,$tmp2,0x02\n\t" 5670 "vmulss $dst,$dst,$tmp\n\t" 5671 "pshufd $tmp,$tmp2,0x03\n\t" 5672 "vmulss $dst,$dst,$tmp\t! mul reduction8F" %} 5673 ins_encode %{ 5674 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5675 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5676 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5677 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5678 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5679 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5680 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5681 __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5682 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5683 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5684 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5685 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5686 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5687 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5688 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5689 %} 5690 ins_pipe( pipe_slow ); 5691 %} 5692 5693 instruct rvmul16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{ 5694 predicate(UseAVX > 2); 5695 match(Set dst (MulReductionVF dst src2)); 5696 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5697 format %{ "vmulss $dst,$dst,$src2\n\t" 5698 "pshufd $tmp,$src2,0x01\n\t" 5699 "vmulss $dst,$dst,$tmp\n\t" 5700 "pshufd $tmp,$src2,0x02\n\t" 5701 "vmulss $dst,$dst,$tmp\n\t" 5702 "pshufd $tmp,$src2,0x03\n\t" 5703 "vmulss $dst,$dst,$tmp\n\t" 5704 "vextractf32x4 $tmp2,$src2,0x1\n\t" 5705 "vmulss $dst,$dst,$tmp2\n\t" 5706 "pshufd $tmp,$tmp2,0x01\n\t" 5707 "vmulss $dst,$dst,$tmp\n\t" 5708 "pshufd $tmp,$tmp2,0x02\n\t" 5709 "vmulss $dst,$dst,$tmp\n\t" 5710 "pshufd $tmp,$tmp2,0x03\n\t" 5711 "vmulss $dst,$dst,$tmp\n\t" 5712 "vextractf32x4 $tmp2,$src2,0x2\n\t" 5713 "vmulss $dst,$dst,$tmp2\n\t" 5714 "pshufd $tmp,$tmp2,0x01\n\t" 5715 "vmulss $dst,$dst,$tmp\n\t" 5716 "pshufd $tmp,$tmp2,0x02\n\t" 5717 "vmulss $dst,$dst,$tmp\n\t" 5718 "pshufd $tmp,$tmp2,0x03\n\t" 5719 "vmulss $dst,$dst,$tmp\n\t" 5720 "vextractf32x4 $tmp2,$src2,0x3\n\t" 5721 "vmulss $dst,$dst,$tmp2\n\t" 5722 "pshufd $tmp,$tmp2,0x01\n\t" 5723 "vmulss $dst,$dst,$tmp\n\t" 5724 "pshufd $tmp,$tmp2,0x02\n\t" 5725 "vmulss $dst,$dst,$tmp\n\t" 5726 "pshufd $tmp,$tmp2,0x03\n\t" 5727 "vmulss $dst,$dst,$tmp\t! mul reduction16F" %} 5728 ins_encode %{ 5729 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5730 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5731 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5732 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5733 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5734 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5735 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5736 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5737 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5738 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5739 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5740 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5741 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5742 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5743 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5744 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 5745 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5746 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5747 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5748 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5749 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5750 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5751 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5752 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 5753 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5754 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5755 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5756 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5757 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5758 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5759 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5760 %} 5761 ins_pipe( pipe_slow ); 5762 %} 5763 5764 instruct rsmul2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ 5765 predicate(UseSSE >= 1 && UseAVX == 0); 5766 match(Set dst (MulReductionVD dst src2)); 5767 effect(TEMP dst, TEMP tmp); 5768 format %{ "mulsd $dst,$src2\n\t" 5769 "pshufd $tmp,$src2,0xE\n\t" 5770 "mulsd $dst,$tmp\t! mul reduction2D" %} 5771 ins_encode %{ 5772 __ mulsd($dst$$XMMRegister, $src2$$XMMRegister); 5773 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5774 __ mulsd($dst$$XMMRegister, $tmp$$XMMRegister); 5775 %} 5776 ins_pipe( pipe_slow ); 5777 %} 5778 5779 instruct rvmul2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ 5780 predicate(UseAVX > 0); 5781 match(Set dst (MulReductionVD dst src2)); 5782 effect(TEMP tmp, TEMP dst); 5783 format %{ "vmulsd $dst,$dst,$src2\n\t" 5784 "pshufd $tmp,$src2,0xE\n\t" 5785 "vmulsd $dst,$dst,$tmp\t! mul reduction2D" %} 5786 ins_encode %{ 5787 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5788 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5789 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5790 %} 5791 ins_pipe( pipe_slow ); 5792 %} 5793 5794 instruct rvmul4D_reduction_reg(regD dst, vecY src2, regD tmp, regD tmp2) %{ 5795 predicate(UseAVX > 0); 5796 match(Set dst (MulReductionVD dst src2)); 5797 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5798 format %{ "vmulsd $dst,$dst,$src2\n\t" 5799 "pshufd $tmp,$src2,0xE\n\t" 5800 "vmulsd $dst,$dst,$tmp\n\t" 5801 "vextractf128_high $tmp2,$src2\n\t" 5802 "vmulsd $dst,$dst,$tmp2\n\t" 5803 "pshufd $tmp,$tmp2,0xE\n\t" 5804 "vmulsd $dst,$dst,$tmp\t! mul reduction4D" %} 5805 ins_encode %{ 5806 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5807 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5808 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5809 __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5810 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5811 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5812 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5813 %} 5814 ins_pipe( pipe_slow ); 5815 %} 5816 5817 instruct rvmul8D_reduction_reg(regD dst, vecZ src2, regD tmp, regD tmp2) %{ 5818 predicate(UseAVX > 2); 5819 match(Set dst (MulReductionVD dst src2)); 5820 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5821 format %{ "vmulsd $dst,$dst,$src2\n\t" 5822 "pshufd $tmp,$src2,0xE\n\t" 5823 "vmulsd $dst,$dst,$tmp\n\t" 5824 "vextractf32x4 $tmp2,$src2,0x1\n\t" 5825 "vmulsd $dst,$dst,$tmp2\n\t" 5826 "pshufd $tmp,$src2,0xE\n\t" 5827 "vmulsd $dst,$dst,$tmp\n\t" 5828 "vextractf32x4 $tmp2,$src2,0x2\n\t" 5829 "vmulsd $dst,$dst,$tmp2\n\t" 5830 "pshufd $tmp,$tmp2,0xE\n\t" 5831 "vmulsd $dst,$dst,$tmp\n\t" 5832 "vextractf32x4 $tmp2,$src2,0x3\n\t" 5833 "vmulsd $dst,$dst,$tmp2\n\t" 5834 "pshufd $tmp,$tmp2,0xE\n\t" 5835 "vmulsd $dst,$dst,$tmp\t! mul reduction8D" %} 5836 ins_encode %{ 5837 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5838 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5839 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5840 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5841 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5842 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5843 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5844 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 5845 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5846 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5847 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5848 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 5849 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5850 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5851 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5852 %} 5853 ins_pipe( pipe_slow ); 5854 %} 5855 5856 // ====================VECTOR ARITHMETIC======================================= 5857 5858 // --------------------------------- ADD -------------------------------------- 5859 5860 // Bytes vector add 5861 instruct vadd4B(vecS dst, vecS src) %{ 5862 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 5863 match(Set dst (AddVB dst src)); 5864 format %{ "paddb $dst,$src\t! add packed4B" %} 5865 ins_encode %{ 5866 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5867 %} 5868 ins_pipe( pipe_slow ); 5869 %} 5870 5871 instruct vadd4B_reg_avx(vecS dst, vecS src1, vecS src2) %{ 5872 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 5873 match(Set dst (AddVB src1 src2)); 5874 format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %} 5875 ins_encode %{ 5876 int vector_len = 0; 5877 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5878 %} 5879 ins_pipe( pipe_slow ); 5880 %} 5881 5882 instruct vadd4B_reg_evex(vecS dst, vecS src1, vecS src2) %{ 5883 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 5884 match(Set dst (AddVB src1 src2)); 5885 format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %} 5886 ins_encode %{ 5887 int vector_len = 0; 5888 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5889 %} 5890 ins_pipe( pipe_slow ); 5891 %} 5892 5893 instruct vadd4B_reg_evex_special(vecS dst, vecS src1, vecS src2) %{ 5894 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 5895 match(Set dst (AddVB dst src2)); 5896 effect(TEMP src1); 5897 format %{ "vpaddb $dst,$dst,$src2\t! add packed4B" %} 5898 ins_encode %{ 5899 int vector_len = 0; 5900 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5901 %} 5902 ins_pipe( pipe_slow ); 5903 %} 5904 5905 instruct vadd4B_mem_avx(vecS dst, vecS src, memory mem) %{ 5906 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 5907 match(Set dst (AddVB src (LoadVector mem))); 5908 format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %} 5909 ins_encode %{ 5910 int vector_len = 0; 5911 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5912 %} 5913 ins_pipe( pipe_slow ); 5914 %} 5915 5916 instruct vadd4B_mem_evex(vecS dst, vecS src, memory mem) %{ 5917 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 5918 match(Set dst (AddVB src (LoadVector mem))); 5919 format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %} 5920 ins_encode %{ 5921 int vector_len = 0; 5922 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5923 %} 5924 ins_pipe( pipe_slow ); 5925 %} 5926 5927 instruct vadd4B_mem_evex_special(vecS dst, vecS src, memory mem) %{ 5928 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 5929 match(Set dst (AddVB dst (LoadVector mem))); 5930 effect(TEMP src); 5931 format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %} 5932 ins_encode %{ 5933 int vector_len = 0; 5934 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5935 %} 5936 ins_pipe( pipe_slow ); 5937 %} 5938 5939 instruct vadd8B(vecD dst, vecD src) %{ 5940 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 5941 match(Set dst (AddVB dst src)); 5942 format %{ "paddb $dst,$src\t! add packed8B" %} 5943 ins_encode %{ 5944 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5945 %} 5946 ins_pipe( pipe_slow ); 5947 %} 5948 5949 instruct vadd8B_reg_avx(vecD dst, vecD src1, vecD src2) %{ 5950 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 5951 match(Set dst (AddVB src1 src2)); 5952 format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %} 5953 ins_encode %{ 5954 int vector_len = 0; 5955 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5956 %} 5957 ins_pipe( pipe_slow ); 5958 %} 5959 5960 instruct vadd8B_reg_evex(vecD dst, vecD src1, vecD src2) %{ 5961 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 5962 match(Set dst (AddVB src1 src2)); 5963 format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %} 5964 ins_encode %{ 5965 int vector_len = 0; 5966 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5967 %} 5968 ins_pipe( pipe_slow ); 5969 %} 5970 5971 instruct vadd8B_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 5972 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 5973 match(Set dst (AddVB dst src2)); 5974 effect(TEMP src1); 5975 format %{ "vpaddb $dst,$dst,$src2\t! add packed8B" %} 5976 ins_encode %{ 5977 int vector_len = 0; 5978 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5979 %} 5980 ins_pipe( pipe_slow ); 5981 %} 5982 5983 instruct vadd8B_mem_avx(vecD dst, vecD src, memory mem) %{ 5984 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 5985 match(Set dst (AddVB src (LoadVector mem))); 5986 format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %} 5987 ins_encode %{ 5988 int vector_len = 0; 5989 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5990 %} 5991 ins_pipe( pipe_slow ); 5992 %} 5993 5994 instruct vadd8B_mem_evex(vecD dst, vecD src, memory mem) %{ 5995 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 5996 match(Set dst (AddVB src (LoadVector mem))); 5997 format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %} 5998 ins_encode %{ 5999 int vector_len = 0; 6000 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6001 %} 6002 ins_pipe( pipe_slow ); 6003 %} 6004 6005 instruct vadd8B_mem_evex_special(vecD dst, vecD src, memory mem) %{ 6006 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 6007 match(Set dst (AddVB dst (LoadVector mem))); 6008 effect(TEMP src); 6009 format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %} 6010 ins_encode %{ 6011 int vector_len = 0; 6012 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6013 %} 6014 ins_pipe( pipe_slow ); 6015 %} 6016 6017 instruct vadd16B(vecX dst, vecX src) %{ 6018 predicate(UseAVX == 0 && n->as_Vector()->length() == 16); 6019 match(Set dst (AddVB dst src)); 6020 format %{ "paddb $dst,$src\t! add packed16B" %} 6021 ins_encode %{ 6022 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 6023 %} 6024 ins_pipe( pipe_slow ); 6025 %} 6026 6027 instruct vadd16B_reg_avx(vecX dst, vecX src1, vecX src2) %{ 6028 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); 6029 match(Set dst (AddVB src1 src2)); 6030 format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %} 6031 ins_encode %{ 6032 int vector_len = 0; 6033 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6034 %} 6035 ins_pipe( pipe_slow ); 6036 %} 6037 6038 instruct vadd16B_reg_evex(vecX dst, vecX src1, vecX src2) %{ 6039 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 6040 match(Set dst (AddVB src1 src2)); 6041 format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %} 6042 ins_encode %{ 6043 int vector_len = 0; 6044 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6045 %} 6046 ins_pipe( pipe_slow ); 6047 %} 6048 6049 instruct vadd16B_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 6050 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 6051 match(Set dst (AddVB dst src2)); 6052 effect(TEMP src1); 6053 format %{ "vpaddb $dst,$dst,$src2\t! add packed16B" %} 6054 ins_encode %{ 6055 int vector_len = 0; 6056 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6057 %} 6058 ins_pipe( pipe_slow ); 6059 %} 6060 6061 instruct vadd16B_mem_avx(vecX dst, vecX src, memory mem) %{ 6062 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); 6063 match(Set dst (AddVB src (LoadVector mem))); 6064 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} 6065 ins_encode %{ 6066 int vector_len = 0; 6067 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6068 %} 6069 ins_pipe( pipe_slow ); 6070 %} 6071 6072 instruct vadd16B_mem_evex(vecX dst, vecX src, memory mem) %{ 6073 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 6074 match(Set dst (AddVB src (LoadVector mem))); 6075 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} 6076 ins_encode %{ 6077 int vector_len = 0; 6078 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6079 %} 6080 ins_pipe( pipe_slow ); 6081 %} 6082 6083 instruct vadd16B_mem_evex_special(vecX dst, vecX src, memory mem) %{ 6084 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 6085 match(Set dst (AddVB dst (LoadVector mem))); 6086 effect(TEMP src); 6087 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} 6088 ins_encode %{ 6089 int vector_len = 0; 6090 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6091 %} 6092 ins_pipe( pipe_slow ); 6093 %} 6094 6095 instruct vadd32B_reg_avx(vecY dst, vecY src1, vecY src2) %{ 6096 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32); 6097 match(Set dst (AddVB src1 src2)); 6098 format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %} 6099 ins_encode %{ 6100 int vector_len = 1; 6101 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6102 %} 6103 ins_pipe( pipe_slow ); 6104 %} 6105 6106 instruct vadd32B_reg_evex(vecY dst, vecY src1, vecY src2) %{ 6107 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6108 match(Set dst (AddVB src1 src2)); 6109 format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %} 6110 ins_encode %{ 6111 int vector_len = 1; 6112 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6113 %} 6114 ins_pipe( pipe_slow ); 6115 %} 6116 6117 instruct vadd32B_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 6118 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32); 6119 match(Set dst (AddVB dst src2)); 6120 effect(TEMP src1); 6121 format %{ "vpaddb $dst,$dst,$src2\t! add packed32B" %} 6122 ins_encode %{ 6123 int vector_len = 1; 6124 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6125 %} 6126 ins_pipe( pipe_slow ); 6127 %} 6128 6129 instruct vadd32B_mem_avx(vecY dst, vecY src, memory mem) %{ 6130 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32); 6131 match(Set dst (AddVB src (LoadVector mem))); 6132 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} 6133 ins_encode %{ 6134 int vector_len = 1; 6135 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6136 %} 6137 ins_pipe( pipe_slow ); 6138 %} 6139 6140 instruct vadd32B_mem_evex(vecY dst, vecY src, memory mem) %{ 6141 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6142 match(Set dst (AddVB src (LoadVector mem))); 6143 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} 6144 ins_encode %{ 6145 int vector_len = 1; 6146 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6147 %} 6148 ins_pipe( pipe_slow ); 6149 %} 6150 6151 instruct vadd32B_mem_evex_special(vecY dst, vecY src, memory mem) %{ 6152 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6153 match(Set dst (AddVB dst (LoadVector mem))); 6154 effect(TEMP src); 6155 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} 6156 ins_encode %{ 6157 int vector_len = 1; 6158 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6159 %} 6160 ins_pipe( pipe_slow ); 6161 %} 6162 6163 instruct vadd64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6164 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 6165 match(Set dst (AddVB src1 src2)); 6166 format %{ "vpaddb $dst,$src1,$src2\t! add packed64B" %} 6167 ins_encode %{ 6168 int vector_len = 2; 6169 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6170 %} 6171 ins_pipe( pipe_slow ); 6172 %} 6173 6174 instruct vadd64B_mem(vecZ dst, vecZ src, memory mem) %{ 6175 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 6176 match(Set dst (AddVB src (LoadVector mem))); 6177 format %{ "vpaddb $dst,$src,$mem\t! add packed64B" %} 6178 ins_encode %{ 6179 int vector_len = 2; 6180 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6181 %} 6182 ins_pipe( pipe_slow ); 6183 %} 6184 6185 // Shorts/Chars vector add 6186 instruct vadd2S(vecS dst, vecS src) %{ 6187 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 6188 match(Set dst (AddVS dst src)); 6189 format %{ "paddw $dst,$src\t! add packed2S" %} 6190 ins_encode %{ 6191 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 6192 %} 6193 ins_pipe( pipe_slow ); 6194 %} 6195 6196 instruct vadd2S_reg_avx(vecS dst, vecS src1, vecS src2) %{ 6197 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 6198 match(Set dst (AddVS src1 src2)); 6199 format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %} 6200 ins_encode %{ 6201 int vector_len = 0; 6202 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6203 %} 6204 ins_pipe( pipe_slow ); 6205 %} 6206 6207 instruct vadd2S_reg_evex(vecS dst, vecS src1, vecS src2) %{ 6208 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 6209 match(Set dst (AddVS src1 src2)); 6210 format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %} 6211 ins_encode %{ 6212 int vector_len = 0; 6213 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6214 %} 6215 ins_pipe( pipe_slow ); 6216 %} 6217 6218 instruct vadd2S_reg_evex_special(vecS dst, vecS src1, vecS src2) %{ 6219 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 6220 match(Set dst (AddVS dst src2)); 6221 effect(TEMP src1); 6222 format %{ "vpaddw $dst,$dst,$src2\t! add packed2S" %} 6223 ins_encode %{ 6224 int vector_len = 0; 6225 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6226 %} 6227 ins_pipe( pipe_slow ); 6228 %} 6229 6230 instruct vadd2S_mem_avx(vecS dst, vecS src, memory mem) %{ 6231 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 6232 match(Set dst (AddVS src (LoadVector mem))); 6233 format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %} 6234 ins_encode %{ 6235 int vector_len = 0; 6236 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6237 %} 6238 ins_pipe( pipe_slow ); 6239 %} 6240 6241 instruct vadd2S_mem_evex(vecS dst, vecS src, memory mem) %{ 6242 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 6243 match(Set dst (AddVS src (LoadVector mem))); 6244 format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %} 6245 ins_encode %{ 6246 int vector_len = 0; 6247 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6248 %} 6249 ins_pipe( pipe_slow ); 6250 %} 6251 6252 instruct vadd2S_mem_evex_special(vecS dst, vecS src, memory mem) %{ 6253 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 6254 match(Set dst (AddVS dst (LoadVector mem))); 6255 effect(TEMP src); 6256 format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %} 6257 ins_encode %{ 6258 int vector_len = 0; 6259 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6260 %} 6261 ins_pipe( pipe_slow ); 6262 %} 6263 6264 instruct vadd4S(vecD dst, vecD src) %{ 6265 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 6266 match(Set dst (AddVS dst src)); 6267 format %{ "paddw $dst,$src\t! add packed4S" %} 6268 ins_encode %{ 6269 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 6270 %} 6271 ins_pipe( pipe_slow ); 6272 %} 6273 6274 instruct vadd4S_reg_avx(vecD dst, vecD src1, vecD src2) %{ 6275 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 6276 match(Set dst (AddVS src1 src2)); 6277 format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %} 6278 ins_encode %{ 6279 int vector_len = 0; 6280 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6281 %} 6282 ins_pipe( pipe_slow ); 6283 %} 6284 6285 instruct vadd4S_reg_evex(vecD dst, vecD src1, vecD src2) %{ 6286 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 6287 match(Set dst (AddVS src1 src2)); 6288 format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %} 6289 ins_encode %{ 6290 int vector_len = 0; 6291 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6292 %} 6293 ins_pipe( pipe_slow ); 6294 %} 6295 6296 instruct vadd4S_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 6297 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 6298 match(Set dst (AddVS dst src2)); 6299 effect(TEMP src1); 6300 format %{ "vpaddw $dst,$dst,$src2\t! add packed4S" %} 6301 ins_encode %{ 6302 int vector_len = 0; 6303 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6304 %} 6305 ins_pipe( pipe_slow ); 6306 %} 6307 6308 instruct vadd4S_mem_avx(vecD dst, vecD src, memory mem) %{ 6309 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 6310 match(Set dst (AddVS src (LoadVector mem))); 6311 format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %} 6312 ins_encode %{ 6313 int vector_len = 0; 6314 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6315 %} 6316 ins_pipe( pipe_slow ); 6317 %} 6318 6319 instruct vadd4S_mem_evex(vecD dst, vecD src, memory mem) %{ 6320 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 6321 match(Set dst (AddVS src (LoadVector mem))); 6322 format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %} 6323 ins_encode %{ 6324 int vector_len = 0; 6325 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6326 %} 6327 ins_pipe( pipe_slow ); 6328 %} 6329 6330 instruct vadd4S_mem_evex_special(vecD dst, vecD src, memory mem) %{ 6331 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 6332 match(Set dst (AddVS dst (LoadVector mem))); 6333 effect(TEMP src); 6334 format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %} 6335 ins_encode %{ 6336 int vector_len = 0; 6337 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6338 %} 6339 ins_pipe( pipe_slow ); 6340 %} 6341 6342 instruct vadd8S(vecX dst, vecX src) %{ 6343 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 6344 match(Set dst (AddVS dst src)); 6345 format %{ "paddw $dst,$src\t! add packed8S" %} 6346 ins_encode %{ 6347 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 6348 %} 6349 ins_pipe( pipe_slow ); 6350 %} 6351 6352 instruct vadd8S_reg_avx(vecX dst, vecX src1, vecX src2) %{ 6353 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 6354 match(Set dst (AddVS src1 src2)); 6355 format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %} 6356 ins_encode %{ 6357 int vector_len = 0; 6358 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6359 %} 6360 ins_pipe( pipe_slow ); 6361 %} 6362 6363 instruct vadd8S_reg_evex(vecX dst, vecX src1, vecX src2) %{ 6364 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 6365 match(Set dst (AddVS src1 src2)); 6366 format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %} 6367 ins_encode %{ 6368 int vector_len = 0; 6369 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6370 %} 6371 ins_pipe( pipe_slow ); 6372 %} 6373 6374 instruct vadd8S_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 6375 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 6376 match(Set dst (AddVS dst src2)); 6377 effect(TEMP src1); 6378 format %{ "vpaddw $dst,$dst,$src2\t! add packed8S" %} 6379 ins_encode %{ 6380 int vector_len = 0; 6381 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6382 %} 6383 ins_pipe( pipe_slow ); 6384 %} 6385 6386 instruct vadd8S_mem_avx(vecX dst, vecX src, memory mem) %{ 6387 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 6388 match(Set dst (AddVS src (LoadVector mem))); 6389 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} 6390 ins_encode %{ 6391 int vector_len = 0; 6392 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6393 %} 6394 ins_pipe( pipe_slow ); 6395 %} 6396 6397 instruct vadd8S_mem_evex(vecX dst, vecX src, memory mem) %{ 6398 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 6399 match(Set dst (AddVS src (LoadVector mem))); 6400 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} 6401 ins_encode %{ 6402 int vector_len = 0; 6403 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6404 %} 6405 ins_pipe( pipe_slow ); 6406 %} 6407 6408 instruct vadd8S_mem_evex_special(vecX dst, vecX src, memory mem) %{ 6409 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 6410 match(Set dst (AddVS dst (LoadVector mem))); 6411 effect(TEMP src); 6412 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} 6413 ins_encode %{ 6414 int vector_len = 0; 6415 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6416 %} 6417 ins_pipe( pipe_slow ); 6418 %} 6419 6420 instruct vadd16S_reg_avx(vecY dst, vecY src1, vecY src2) %{ 6421 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 6422 match(Set dst (AddVS src1 src2)); 6423 format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %} 6424 ins_encode %{ 6425 int vector_len = 1; 6426 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6427 %} 6428 ins_pipe( pipe_slow ); 6429 %} 6430 6431 instruct vadd16S_reg_evex(vecY dst, vecY src1, vecY src2) %{ 6432 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 6433 match(Set dst (AddVS src1 src2)); 6434 format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %} 6435 ins_encode %{ 6436 int vector_len = 1; 6437 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6438 %} 6439 ins_pipe( pipe_slow ); 6440 %} 6441 6442 instruct vadd16S_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 6443 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 6444 match(Set dst (AddVS dst src2)); 6445 effect(TEMP src1); 6446 format %{ "vpaddw $dst,$dst,$src2\t! add packed16S" %} 6447 ins_encode %{ 6448 int vector_len = 1; 6449 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6450 %} 6451 ins_pipe( pipe_slow ); 6452 %} 6453 6454 instruct vadd16S_mem_avx(vecY dst, vecY src, memory mem) %{ 6455 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 6456 match(Set dst (AddVS src (LoadVector mem))); 6457 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} 6458 ins_encode %{ 6459 int vector_len = 1; 6460 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6461 %} 6462 ins_pipe( pipe_slow ); 6463 %} 6464 6465 instruct vadd16S_mem_evex(vecY dst, vecY src, memory mem) %{ 6466 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 6467 match(Set dst (AddVS src (LoadVector mem))); 6468 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} 6469 ins_encode %{ 6470 int vector_len = 1; 6471 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6472 %} 6473 ins_pipe( pipe_slow ); 6474 %} 6475 6476 instruct vadd16S_mem_evex_special(vecY dst, vecY src, memory mem) %{ 6477 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 6478 match(Set dst (AddVS dst (LoadVector mem))); 6479 effect(TEMP src); 6480 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} 6481 ins_encode %{ 6482 int vector_len = 1; 6483 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6484 %} 6485 ins_pipe( pipe_slow ); 6486 %} 6487 6488 instruct vadd32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6489 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6490 match(Set dst (AddVS src1 src2)); 6491 format %{ "vpaddw $dst,$src1,$src2\t! add packed32S" %} 6492 ins_encode %{ 6493 int vector_len = 2; 6494 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6495 %} 6496 ins_pipe( pipe_slow ); 6497 %} 6498 6499 instruct vadd32S_mem(vecZ dst, vecZ src, memory mem) %{ 6500 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6501 match(Set dst (AddVS src (LoadVector mem))); 6502 format %{ "vpaddw $dst,$src,$mem\t! add packed32S" %} 6503 ins_encode %{ 6504 int vector_len = 2; 6505 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6506 %} 6507 ins_pipe( pipe_slow ); 6508 %} 6509 6510 // Integers vector add 6511 instruct vadd2I(vecD dst, vecD src) %{ 6512 predicate(n->as_Vector()->length() == 2); 6513 match(Set dst (AddVI dst src)); 6514 format %{ "paddd $dst,$src\t! add packed2I" %} 6515 ins_encode %{ 6516 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 6517 %} 6518 ins_pipe( pipe_slow ); 6519 %} 6520 6521 instruct vadd2I_reg(vecD dst, vecD src1, vecD src2) %{ 6522 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6523 match(Set dst (AddVI src1 src2)); 6524 format %{ "vpaddd $dst,$src1,$src2\t! add packed2I" %} 6525 ins_encode %{ 6526 int vector_len = 0; 6527 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6528 %} 6529 ins_pipe( pipe_slow ); 6530 %} 6531 6532 instruct vadd2I_mem(vecD dst, vecD src, memory mem) %{ 6533 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6534 match(Set dst (AddVI src (LoadVector mem))); 6535 format %{ "vpaddd $dst,$src,$mem\t! add packed2I" %} 6536 ins_encode %{ 6537 int vector_len = 0; 6538 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6539 %} 6540 ins_pipe( pipe_slow ); 6541 %} 6542 6543 instruct vadd4I(vecX dst, vecX src) %{ 6544 predicate(n->as_Vector()->length() == 4); 6545 match(Set dst (AddVI dst src)); 6546 format %{ "paddd $dst,$src\t! add packed4I" %} 6547 ins_encode %{ 6548 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 6549 %} 6550 ins_pipe( pipe_slow ); 6551 %} 6552 6553 instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{ 6554 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6555 match(Set dst (AddVI src1 src2)); 6556 format %{ "vpaddd $dst,$src1,$src2\t! add packed4I" %} 6557 ins_encode %{ 6558 int vector_len = 0; 6559 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6560 %} 6561 ins_pipe( pipe_slow ); 6562 %} 6563 6564 instruct vadd4I_mem(vecX dst, vecX src, memory mem) %{ 6565 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6566 match(Set dst (AddVI src (LoadVector mem))); 6567 format %{ "vpaddd $dst,$src,$mem\t! add packed4I" %} 6568 ins_encode %{ 6569 int vector_len = 0; 6570 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6571 %} 6572 ins_pipe( pipe_slow ); 6573 %} 6574 6575 instruct vadd8I_reg(vecY dst, vecY src1, vecY src2) %{ 6576 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 6577 match(Set dst (AddVI src1 src2)); 6578 format %{ "vpaddd $dst,$src1,$src2\t! add packed8I" %} 6579 ins_encode %{ 6580 int vector_len = 1; 6581 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6582 %} 6583 ins_pipe( pipe_slow ); 6584 %} 6585 6586 instruct vadd8I_mem(vecY dst, vecY src, memory mem) %{ 6587 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 6588 match(Set dst (AddVI src (LoadVector mem))); 6589 format %{ "vpaddd $dst,$src,$mem\t! add packed8I" %} 6590 ins_encode %{ 6591 int vector_len = 1; 6592 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6593 %} 6594 ins_pipe( pipe_slow ); 6595 %} 6596 6597 instruct vadd16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6598 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6599 match(Set dst (AddVI src1 src2)); 6600 format %{ "vpaddd $dst,$src1,$src2\t! add packed16I" %} 6601 ins_encode %{ 6602 int vector_len = 2; 6603 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6604 %} 6605 ins_pipe( pipe_slow ); 6606 %} 6607 6608 instruct vadd16I_mem(vecZ dst, vecZ src, memory mem) %{ 6609 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6610 match(Set dst (AddVI src (LoadVector mem))); 6611 format %{ "vpaddd $dst,$src,$mem\t! add packed16I" %} 6612 ins_encode %{ 6613 int vector_len = 2; 6614 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6615 %} 6616 ins_pipe( pipe_slow ); 6617 %} 6618 6619 // Longs vector add 6620 instruct vadd2L(vecX dst, vecX src) %{ 6621 predicate(n->as_Vector()->length() == 2); 6622 match(Set dst (AddVL dst src)); 6623 format %{ "paddq $dst,$src\t! add packed2L" %} 6624 ins_encode %{ 6625 __ paddq($dst$$XMMRegister, $src$$XMMRegister); 6626 %} 6627 ins_pipe( pipe_slow ); 6628 %} 6629 6630 instruct vadd2L_reg(vecX dst, vecX src1, vecX src2) %{ 6631 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6632 match(Set dst (AddVL src1 src2)); 6633 format %{ "vpaddq $dst,$src1,$src2\t! add packed2L" %} 6634 ins_encode %{ 6635 int vector_len = 0; 6636 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6637 %} 6638 ins_pipe( pipe_slow ); 6639 %} 6640 6641 instruct vadd2L_mem(vecX dst, vecX src, memory mem) %{ 6642 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6643 match(Set dst (AddVL src (LoadVector mem))); 6644 format %{ "vpaddq $dst,$src,$mem\t! add packed2L" %} 6645 ins_encode %{ 6646 int vector_len = 0; 6647 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6648 %} 6649 ins_pipe( pipe_slow ); 6650 %} 6651 6652 instruct vadd4L_reg(vecY dst, vecY src1, vecY src2) %{ 6653 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 6654 match(Set dst (AddVL src1 src2)); 6655 format %{ "vpaddq $dst,$src1,$src2\t! add packed4L" %} 6656 ins_encode %{ 6657 int vector_len = 1; 6658 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6659 %} 6660 ins_pipe( pipe_slow ); 6661 %} 6662 6663 instruct vadd4L_mem(vecY dst, vecY src, memory mem) %{ 6664 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 6665 match(Set dst (AddVL src (LoadVector mem))); 6666 format %{ "vpaddq $dst,$src,$mem\t! add packed4L" %} 6667 ins_encode %{ 6668 int vector_len = 1; 6669 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6670 %} 6671 ins_pipe( pipe_slow ); 6672 %} 6673 6674 instruct vadd8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6675 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6676 match(Set dst (AddVL src1 src2)); 6677 format %{ "vpaddq $dst,$src1,$src2\t! add packed8L" %} 6678 ins_encode %{ 6679 int vector_len = 2; 6680 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6681 %} 6682 ins_pipe( pipe_slow ); 6683 %} 6684 6685 instruct vadd8L_mem(vecZ dst, vecZ src, memory mem) %{ 6686 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6687 match(Set dst (AddVL src (LoadVector mem))); 6688 format %{ "vpaddq $dst,$src,$mem\t! add packed8L" %} 6689 ins_encode %{ 6690 int vector_len = 2; 6691 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6692 %} 6693 ins_pipe( pipe_slow ); 6694 %} 6695 6696 // Floats vector add 6697 instruct vadd2F(vecD dst, vecD src) %{ 6698 predicate(n->as_Vector()->length() == 2); 6699 match(Set dst (AddVF dst src)); 6700 format %{ "addps $dst,$src\t! add packed2F" %} 6701 ins_encode %{ 6702 __ addps($dst$$XMMRegister, $src$$XMMRegister); 6703 %} 6704 ins_pipe( pipe_slow ); 6705 %} 6706 6707 instruct vadd2F_reg(vecD dst, vecD src1, vecD src2) %{ 6708 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6709 match(Set dst (AddVF src1 src2)); 6710 format %{ "vaddps $dst,$src1,$src2\t! add packed2F" %} 6711 ins_encode %{ 6712 int vector_len = 0; 6713 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6714 %} 6715 ins_pipe( pipe_slow ); 6716 %} 6717 6718 instruct vadd2F_mem(vecD dst, vecD src, memory mem) %{ 6719 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6720 match(Set dst (AddVF src (LoadVector mem))); 6721 format %{ "vaddps $dst,$src,$mem\t! add packed2F" %} 6722 ins_encode %{ 6723 int vector_len = 0; 6724 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6725 %} 6726 ins_pipe( pipe_slow ); 6727 %} 6728 6729 instruct vadd4F(vecX dst, vecX src) %{ 6730 predicate(n->as_Vector()->length() == 4); 6731 match(Set dst (AddVF dst src)); 6732 format %{ "addps $dst,$src\t! add packed4F" %} 6733 ins_encode %{ 6734 __ addps($dst$$XMMRegister, $src$$XMMRegister); 6735 %} 6736 ins_pipe( pipe_slow ); 6737 %} 6738 6739 instruct vadd4F_reg(vecX dst, vecX src1, vecX src2) %{ 6740 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6741 match(Set dst (AddVF src1 src2)); 6742 format %{ "vaddps $dst,$src1,$src2\t! add packed4F" %} 6743 ins_encode %{ 6744 int vector_len = 0; 6745 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6746 %} 6747 ins_pipe( pipe_slow ); 6748 %} 6749 6750 instruct vadd4F_mem(vecX dst, vecX src, memory mem) %{ 6751 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6752 match(Set dst (AddVF src (LoadVector mem))); 6753 format %{ "vaddps $dst,$src,$mem\t! add packed4F" %} 6754 ins_encode %{ 6755 int vector_len = 0; 6756 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6757 %} 6758 ins_pipe( pipe_slow ); 6759 %} 6760 6761 instruct vadd8F_reg(vecY dst, vecY src1, vecY src2) %{ 6762 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6763 match(Set dst (AddVF src1 src2)); 6764 format %{ "vaddps $dst,$src1,$src2\t! add packed8F" %} 6765 ins_encode %{ 6766 int vector_len = 1; 6767 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6768 %} 6769 ins_pipe( pipe_slow ); 6770 %} 6771 6772 instruct vadd8F_mem(vecY dst, vecY src, memory mem) %{ 6773 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6774 match(Set dst (AddVF src (LoadVector mem))); 6775 format %{ "vaddps $dst,$src,$mem\t! add packed8F" %} 6776 ins_encode %{ 6777 int vector_len = 1; 6778 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6779 %} 6780 ins_pipe( pipe_slow ); 6781 %} 6782 6783 instruct vadd16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6784 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6785 match(Set dst (AddVF src1 src2)); 6786 format %{ "vaddps $dst,$src1,$src2\t! add packed16F" %} 6787 ins_encode %{ 6788 int vector_len = 2; 6789 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6790 %} 6791 ins_pipe( pipe_slow ); 6792 %} 6793 6794 instruct vadd16F_mem(vecZ dst, vecZ src, memory mem) %{ 6795 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6796 match(Set dst (AddVF src (LoadVector mem))); 6797 format %{ "vaddps $dst,$src,$mem\t! add packed16F" %} 6798 ins_encode %{ 6799 int vector_len = 2; 6800 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6801 %} 6802 ins_pipe( pipe_slow ); 6803 %} 6804 6805 // Doubles vector add 6806 instruct vadd2D(vecX dst, vecX src) %{ 6807 predicate(n->as_Vector()->length() == 2); 6808 match(Set dst (AddVD dst src)); 6809 format %{ "addpd $dst,$src\t! add packed2D" %} 6810 ins_encode %{ 6811 __ addpd($dst$$XMMRegister, $src$$XMMRegister); 6812 %} 6813 ins_pipe( pipe_slow ); 6814 %} 6815 6816 instruct vadd2D_reg(vecX dst, vecX src1, vecX src2) %{ 6817 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6818 match(Set dst (AddVD src1 src2)); 6819 format %{ "vaddpd $dst,$src1,$src2\t! add packed2D" %} 6820 ins_encode %{ 6821 int vector_len = 0; 6822 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6823 %} 6824 ins_pipe( pipe_slow ); 6825 %} 6826 6827 instruct vadd2D_mem(vecX dst, vecX src, memory mem) %{ 6828 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6829 match(Set dst (AddVD src (LoadVector mem))); 6830 format %{ "vaddpd $dst,$src,$mem\t! add packed2D" %} 6831 ins_encode %{ 6832 int vector_len = 0; 6833 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6834 %} 6835 ins_pipe( pipe_slow ); 6836 %} 6837 6838 instruct vadd4D_reg(vecY dst, vecY src1, vecY src2) %{ 6839 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6840 match(Set dst (AddVD src1 src2)); 6841 format %{ "vaddpd $dst,$src1,$src2\t! add packed4D" %} 6842 ins_encode %{ 6843 int vector_len = 1; 6844 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6845 %} 6846 ins_pipe( pipe_slow ); 6847 %} 6848 6849 instruct vadd4D_mem(vecY dst, vecY src, memory mem) %{ 6850 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6851 match(Set dst (AddVD src (LoadVector mem))); 6852 format %{ "vaddpd $dst,$src,$mem\t! add packed4D" %} 6853 ins_encode %{ 6854 int vector_len = 1; 6855 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6856 %} 6857 ins_pipe( pipe_slow ); 6858 %} 6859 6860 instruct vadd8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6861 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6862 match(Set dst (AddVD src1 src2)); 6863 format %{ "vaddpd $dst,$src1,$src2\t! add packed8D" %} 6864 ins_encode %{ 6865 int vector_len = 2; 6866 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6867 %} 6868 ins_pipe( pipe_slow ); 6869 %} 6870 6871 instruct vadd8D_mem(vecZ dst, vecZ src, memory mem) %{ 6872 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6873 match(Set dst (AddVD src (LoadVector mem))); 6874 format %{ "vaddpd $dst,$src,$mem\t! add packed8D" %} 6875 ins_encode %{ 6876 int vector_len = 2; 6877 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6878 %} 6879 ins_pipe( pipe_slow ); 6880 %} 6881 6882 // --------------------------------- SUB -------------------------------------- 6883 6884 // Bytes vector sub 6885 instruct vsub4B(vecS dst, vecS src) %{ 6886 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 6887 match(Set dst (SubVB dst src)); 6888 format %{ "psubb $dst,$src\t! sub packed4B" %} 6889 ins_encode %{ 6890 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 6891 %} 6892 ins_pipe( pipe_slow ); 6893 %} 6894 6895 instruct vsub4B_reg_avx(vecS dst, vecS src1, vecS src2) %{ 6896 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 6897 match(Set dst (SubVB src1 src2)); 6898 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} 6899 ins_encode %{ 6900 int vector_len = 0; 6901 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6902 %} 6903 ins_pipe( pipe_slow ); 6904 %} 6905 6906 instruct vsub4B_reg_evex(vecS dst, vecS src1, vecS src2) %{ 6907 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 6908 match(Set dst (SubVB src1 src2)); 6909 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} 6910 ins_encode %{ 6911 int vector_len = 0; 6912 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6913 %} 6914 ins_pipe( pipe_slow ); 6915 %} 6916 6917 instruct vsub4B_reg_exex_special(vecS dst, vecS src1, vecS src2) %{ 6918 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 6919 match(Set dst (SubVB dst src2)); 6920 effect(TEMP src1); 6921 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} 6922 ins_encode %{ 6923 int vector_len = 0; 6924 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6925 %} 6926 ins_pipe( pipe_slow ); 6927 %} 6928 6929 instruct vsub4B_mem_avx(vecS dst, vecS src, memory mem) %{ 6930 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 6931 match(Set dst (SubVB src (LoadVector mem))); 6932 format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %} 6933 ins_encode %{ 6934 int vector_len = 0; 6935 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6936 %} 6937 ins_pipe( pipe_slow ); 6938 %} 6939 6940 instruct vsub4B_mem_evex(vecS dst, vecS src, memory mem) %{ 6941 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 6942 match(Set dst (SubVB src (LoadVector mem))); 6943 format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %} 6944 ins_encode %{ 6945 int vector_len = 0; 6946 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6947 %} 6948 ins_pipe( pipe_slow ); 6949 %} 6950 6951 instruct vsub4B_mem_evex_special(vecS dst, vecS src, memory mem) %{ 6952 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 6953 match(Set dst (SubVB dst (LoadVector mem))); 6954 effect(TEMP src); 6955 format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %} 6956 ins_encode %{ 6957 int vector_len = 0; 6958 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6959 %} 6960 ins_pipe( pipe_slow ); 6961 %} 6962 6963 instruct vsub8B(vecD dst, vecD src) %{ 6964 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 6965 match(Set dst (SubVB dst src)); 6966 format %{ "psubb $dst,$src\t! sub packed8B" %} 6967 ins_encode %{ 6968 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 6969 %} 6970 ins_pipe( pipe_slow ); 6971 %} 6972 6973 instruct vsub8B_reg_avx(vecD dst, vecD src1, vecD src2) %{ 6974 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 6975 match(Set dst (SubVB src1 src2)); 6976 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} 6977 ins_encode %{ 6978 int vector_len = 0; 6979 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6980 %} 6981 ins_pipe( pipe_slow ); 6982 %} 6983 6984 instruct vsub8B_reg_evex(vecD dst, vecD src1, vecD src2) %{ 6985 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 6986 match(Set dst (SubVB src1 src2)); 6987 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} 6988 ins_encode %{ 6989 int vector_len = 0; 6990 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6991 %} 6992 ins_pipe( pipe_slow ); 6993 %} 6994 6995 instruct vsub8B_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 6996 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 6997 match(Set dst (SubVB dst src2)); 6998 effect(TEMP src1); 6999 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} 7000 ins_encode %{ 7001 int vector_len = 0; 7002 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7003 %} 7004 ins_pipe( pipe_slow ); 7005 %} 7006 7007 instruct vsub8B_mem_avx(vecD dst, vecD src, memory mem) %{ 7008 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 7009 match(Set dst (SubVB src (LoadVector mem))); 7010 format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %} 7011 ins_encode %{ 7012 int vector_len = 0; 7013 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7014 %} 7015 ins_pipe( pipe_slow ); 7016 %} 7017 7018 instruct vsub8B_mem_evex(vecD dst, vecD src, memory mem) %{ 7019 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 7020 match(Set dst (SubVB src (LoadVector mem))); 7021 format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %} 7022 ins_encode %{ 7023 int vector_len = 0; 7024 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7025 %} 7026 ins_pipe( pipe_slow ); 7027 %} 7028 7029 instruct vsub8B_mem_evex_special(vecD dst, vecD src, memory mem) %{ 7030 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 7031 match(Set dst (SubVB dst (LoadVector mem))); 7032 effect(TEMP src); 7033 format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %} 7034 ins_encode %{ 7035 int vector_len = 0; 7036 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7037 %} 7038 ins_pipe( pipe_slow ); 7039 %} 7040 7041 instruct vsub16B(vecX dst, vecX src) %{ 7042 predicate(UseAVX == 0 && n->as_Vector()->length() == 16); 7043 match(Set dst (SubVB dst src)); 7044 format %{ "psubb $dst,$src\t! sub packed16B" %} 7045 ins_encode %{ 7046 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 7047 %} 7048 ins_pipe( pipe_slow ); 7049 %} 7050 7051 instruct vsub16B_reg_avx(vecX dst, vecX src1, vecX src2) %{ 7052 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); 7053 match(Set dst (SubVB src1 src2)); 7054 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} 7055 ins_encode %{ 7056 int vector_len = 0; 7057 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7058 %} 7059 ins_pipe( pipe_slow ); 7060 %} 7061 7062 instruct vsub16B_reg_evex(vecX dst, vecX src1, vecX src2) %{ 7063 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 7064 match(Set dst (SubVB src1 src2)); 7065 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} 7066 ins_encode %{ 7067 int vector_len = 0; 7068 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7069 %} 7070 ins_pipe( pipe_slow ); 7071 %} 7072 7073 instruct vsub16B_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 7074 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 7075 match(Set dst (SubVB dst src2)); 7076 effect(TEMP src1); 7077 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} 7078 ins_encode %{ 7079 int vector_len = 0; 7080 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7081 %} 7082 ins_pipe( pipe_slow ); 7083 %} 7084 7085 instruct vsub16B_mem_avx(vecX dst, vecX src, memory mem) %{ 7086 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); 7087 match(Set dst (SubVB src (LoadVector mem))); 7088 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} 7089 ins_encode %{ 7090 int vector_len = 0; 7091 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7092 %} 7093 ins_pipe( pipe_slow ); 7094 %} 7095 7096 instruct vsub16B_mem_evex(vecX dst, vecX src, memory mem) %{ 7097 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 7098 match(Set dst (SubVB src (LoadVector mem))); 7099 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} 7100 ins_encode %{ 7101 int vector_len = 0; 7102 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7103 %} 7104 ins_pipe( pipe_slow ); 7105 %} 7106 7107 instruct vsub16B_mem_evex_special(vecX dst, vecX src, memory mem) %{ 7108 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 7109 match(Set dst (SubVB dst (LoadVector mem))); 7110 effect(TEMP src); 7111 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} 7112 ins_encode %{ 7113 int vector_len = 0; 7114 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7115 %} 7116 ins_pipe( pipe_slow ); 7117 %} 7118 7119 instruct vsub32B_reg_avx(vecY dst, vecY src1, vecY src2) %{ 7120 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32); 7121 match(Set dst (SubVB src1 src2)); 7122 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} 7123 ins_encode %{ 7124 int vector_len = 1; 7125 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7126 %} 7127 ins_pipe( pipe_slow ); 7128 %} 7129 7130 instruct vsub32B_reg_evex(vecY dst, vecY src1, vecY src2) %{ 7131 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 7132 match(Set dst (SubVB src1 src2)); 7133 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} 7134 ins_encode %{ 7135 int vector_len = 1; 7136 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7137 %} 7138 ins_pipe( pipe_slow ); 7139 %} 7140 7141 instruct vsub32B_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 7142 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32); 7143 match(Set dst (SubVB dst src2)); 7144 effect(TEMP src1); 7145 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} 7146 ins_encode %{ 7147 int vector_len = 1; 7148 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7149 %} 7150 ins_pipe( pipe_slow ); 7151 %} 7152 7153 instruct vsub32B_mem_avx(vecY dst, vecY src, memory mem) %{ 7154 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32); 7155 match(Set dst (SubVB src (LoadVector mem))); 7156 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} 7157 ins_encode %{ 7158 int vector_len = 1; 7159 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7160 %} 7161 ins_pipe( pipe_slow ); 7162 %} 7163 7164 instruct vsub32B_mem_evex(vecY dst, vecY src, memory mem) %{ 7165 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 7166 match(Set dst (SubVB src (LoadVector mem))); 7167 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} 7168 ins_encode %{ 7169 int vector_len = 1; 7170 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7171 %} 7172 ins_pipe( pipe_slow ); 7173 %} 7174 7175 instruct vsub32B_mem_evex_special(vecY dst, vecY src, memory mem) %{ 7176 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32); 7177 match(Set dst (SubVB dst (LoadVector mem))); 7178 effect(TEMP src); 7179 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} 7180 ins_encode %{ 7181 int vector_len = 1; 7182 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7183 %} 7184 ins_pipe( pipe_slow ); 7185 %} 7186 7187 instruct vsub64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7188 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 7189 match(Set dst (SubVB src1 src2)); 7190 format %{ "vpsubb $dst,$src1,$src2\t! sub packed64B" %} 7191 ins_encode %{ 7192 int vector_len = 2; 7193 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7194 %} 7195 ins_pipe( pipe_slow ); 7196 %} 7197 7198 instruct vsub64B_mem(vecZ dst, vecZ src, memory mem) %{ 7199 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 7200 match(Set dst (SubVB src (LoadVector mem))); 7201 format %{ "vpsubb $dst,$src,$mem\t! sub packed64B" %} 7202 ins_encode %{ 7203 int vector_len = 2; 7204 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7205 %} 7206 ins_pipe( pipe_slow ); 7207 %} 7208 7209 // Shorts/Chars vector sub 7210 instruct vsub2S(vecS dst, vecS src) %{ 7211 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 7212 match(Set dst (SubVS dst src)); 7213 format %{ "psubw $dst,$src\t! sub packed2S" %} 7214 ins_encode %{ 7215 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 7216 %} 7217 ins_pipe( pipe_slow ); 7218 %} 7219 7220 instruct vsub2S_reg_avx(vecS dst, vecS src1, vecS src2) %{ 7221 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 7222 match(Set dst (SubVS src1 src2)); 7223 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} 7224 ins_encode %{ 7225 int vector_len = 0; 7226 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7227 %} 7228 ins_pipe( pipe_slow ); 7229 %} 7230 7231 instruct vsub2S_reg_evex(vecS dst, vecS src1, vecS src2) %{ 7232 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 7233 match(Set dst (SubVS src1 src2)); 7234 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} 7235 ins_encode %{ 7236 int vector_len = 0; 7237 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7238 %} 7239 ins_pipe( pipe_slow ); 7240 %} 7241 7242 instruct vsub2S_reg_evex_special(vecS dst, vecS src1, vecS src2) %{ 7243 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 7244 match(Set dst (SubVS dst src2)); 7245 effect(TEMP src1); 7246 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} 7247 ins_encode %{ 7248 int vector_len = 0; 7249 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7250 %} 7251 ins_pipe( pipe_slow ); 7252 %} 7253 7254 instruct vsub2S_mem_avx(vecS dst, vecS src, memory mem) %{ 7255 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 7256 match(Set dst (SubVS src (LoadVector mem))); 7257 format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %} 7258 ins_encode %{ 7259 int vector_len = 0; 7260 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7261 %} 7262 ins_pipe( pipe_slow ); 7263 %} 7264 7265 instruct vsub2S_mem_evex(vecS dst, vecS src, memory mem) %{ 7266 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 7267 match(Set dst (SubVS src (LoadVector mem))); 7268 format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %} 7269 ins_encode %{ 7270 int vector_len = 0; 7271 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7272 %} 7273 ins_pipe( pipe_slow ); 7274 %} 7275 7276 instruct vsub2S_mem_evex_special(vecS dst, vecS src, memory mem) %{ 7277 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 7278 match(Set dst (SubVS dst (LoadVector mem))); 7279 effect(TEMP src); 7280 format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %} 7281 ins_encode %{ 7282 int vector_len = 0; 7283 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7284 %} 7285 ins_pipe( pipe_slow ); 7286 %} 7287 7288 instruct vsub4S(vecD dst, vecD src) %{ 7289 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 7290 match(Set dst (SubVS dst src)); 7291 format %{ "psubw $dst,$src\t! sub packed4S" %} 7292 ins_encode %{ 7293 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 7294 %} 7295 ins_pipe( pipe_slow ); 7296 %} 7297 7298 instruct vsub4S_reg_avx(vecD dst, vecD src1, vecD src2) %{ 7299 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 7300 match(Set dst (SubVS src1 src2)); 7301 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} 7302 ins_encode %{ 7303 int vector_len = 0; 7304 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7305 %} 7306 ins_pipe( pipe_slow ); 7307 %} 7308 7309 instruct vsub4S_reg_evex(vecD dst, vecD src1, vecD src2) %{ 7310 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 7311 match(Set dst (SubVS src1 src2)); 7312 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} 7313 ins_encode %{ 7314 int vector_len = 0; 7315 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7316 %} 7317 ins_pipe( pipe_slow ); 7318 %} 7319 7320 instruct vsub4S_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 7321 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 7322 match(Set dst (SubVS dst src2)); 7323 effect(TEMP src1); 7324 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} 7325 ins_encode %{ 7326 int vector_len = 0; 7327 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7328 %} 7329 ins_pipe( pipe_slow ); 7330 %} 7331 7332 instruct vsub4S_mem_avx(vecD dst, vecD src, memory mem) %{ 7333 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 7334 match(Set dst (SubVS src (LoadVector mem))); 7335 format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %} 7336 ins_encode %{ 7337 int vector_len = 0; 7338 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7339 %} 7340 ins_pipe( pipe_slow ); 7341 %} 7342 7343 instruct vsub4S_mem_evex(vecD dst, vecD src, memory mem) %{ 7344 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 7345 match(Set dst (SubVS src (LoadVector mem))); 7346 format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %} 7347 ins_encode %{ 7348 int vector_len = 0; 7349 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7350 %} 7351 ins_pipe( pipe_slow ); 7352 %} 7353 7354 instruct vsub4S_mem_evex_special(vecD dst, vecD src, memory mem) %{ 7355 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 7356 match(Set dst (SubVS dst (LoadVector mem))); 7357 effect(TEMP src); 7358 format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %} 7359 ins_encode %{ 7360 int vector_len = 0; 7361 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7362 %} 7363 ins_pipe( pipe_slow ); 7364 %} 7365 7366 instruct vsub8S(vecX dst, vecX src) %{ 7367 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 7368 match(Set dst (SubVS dst src)); 7369 format %{ "psubw $dst,$src\t! sub packed8S" %} 7370 ins_encode %{ 7371 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 7372 %} 7373 ins_pipe( pipe_slow ); 7374 %} 7375 7376 instruct vsub8S_reg_avx(vecX dst, vecX src1, vecX src2) %{ 7377 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 7378 match(Set dst (SubVS src1 src2)); 7379 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} 7380 ins_encode %{ 7381 int vector_len = 0; 7382 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7383 %} 7384 ins_pipe( pipe_slow ); 7385 %} 7386 7387 instruct vsub8S_reg_evex(vecX dst, vecX src1, vecX src2) %{ 7388 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 7389 match(Set dst (SubVS src1 src2)); 7390 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} 7391 ins_encode %{ 7392 int vector_len = 0; 7393 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7394 %} 7395 ins_pipe( pipe_slow ); 7396 %} 7397 7398 instruct vsub8S_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 7399 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 7400 match(Set dst (SubVS dst src2)); 7401 effect(TEMP src1); 7402 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} 7403 ins_encode %{ 7404 int vector_len = 0; 7405 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7406 %} 7407 ins_pipe( pipe_slow ); 7408 %} 7409 7410 instruct vsub8S_mem_avx(vecX dst, vecX src, memory mem) %{ 7411 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 7412 match(Set dst (SubVS src (LoadVector mem))); 7413 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} 7414 ins_encode %{ 7415 int vector_len = 0; 7416 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7417 %} 7418 ins_pipe( pipe_slow ); 7419 %} 7420 7421 instruct vsub8S_mem_evex(vecX dst, vecX src, memory mem) %{ 7422 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 7423 match(Set dst (SubVS src (LoadVector mem))); 7424 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} 7425 ins_encode %{ 7426 int vector_len = 0; 7427 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7428 %} 7429 ins_pipe( pipe_slow ); 7430 %} 7431 7432 instruct vsub8S_mem_evex_special(vecX dst, vecX src, memory mem) %{ 7433 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 7434 match(Set dst (SubVS dst (LoadVector mem))); 7435 effect(TEMP src); 7436 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} 7437 ins_encode %{ 7438 int vector_len = 0; 7439 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7440 %} 7441 ins_pipe( pipe_slow ); 7442 %} 7443 7444 instruct vsub16S_reg_avx(vecY dst, vecY src1, vecY src2) %{ 7445 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 7446 match(Set dst (SubVS src1 src2)); 7447 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} 7448 ins_encode %{ 7449 int vector_len = 1; 7450 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7451 %} 7452 ins_pipe( pipe_slow ); 7453 %} 7454 7455 instruct vsub16S_reg_evex(vecY dst, vecY src1, vecY src2) %{ 7456 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 7457 match(Set dst (SubVS src1 src2)); 7458 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} 7459 ins_encode %{ 7460 int vector_len = 1; 7461 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7462 %} 7463 ins_pipe( pipe_slow ); 7464 %} 7465 7466 instruct vsub16S_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 7467 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 7468 match(Set dst (SubVS dst src2)); 7469 effect(TEMP src1); 7470 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} 7471 ins_encode %{ 7472 int vector_len = 1; 7473 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7474 %} 7475 ins_pipe( pipe_slow ); 7476 %} 7477 7478 instruct vsub16S_mem_avx(vecY dst, vecY src, memory mem) %{ 7479 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 7480 match(Set dst (SubVS src (LoadVector mem))); 7481 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} 7482 ins_encode %{ 7483 int vector_len = 1; 7484 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7485 %} 7486 ins_pipe( pipe_slow ); 7487 %} 7488 7489 instruct vsub16S_mem_evex(vecY dst, vecY src, memory mem) %{ 7490 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 7491 match(Set dst (SubVS src (LoadVector mem))); 7492 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} 7493 ins_encode %{ 7494 int vector_len = 1; 7495 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7496 %} 7497 ins_pipe( pipe_slow ); 7498 %} 7499 7500 instruct vsub16S_mem_evex_special(vecY dst, vecY src, memory mem) %{ 7501 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 7502 match(Set dst (SubVS dst (LoadVector mem))); 7503 effect(TEMP src); 7504 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} 7505 ins_encode %{ 7506 int vector_len = 1; 7507 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7508 %} 7509 ins_pipe( pipe_slow ); 7510 %} 7511 7512 instruct vsub32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7513 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 7514 match(Set dst (SubVS src1 src2)); 7515 format %{ "vpsubw $dst,$src1,$src2\t! sub packed32S" %} 7516 ins_encode %{ 7517 int vector_len = 2; 7518 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7519 %} 7520 ins_pipe( pipe_slow ); 7521 %} 7522 7523 instruct vsub32S_mem(vecZ dst, vecZ src, memory mem) %{ 7524 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 7525 match(Set dst (SubVS src (LoadVector mem))); 7526 format %{ "vpsubw $dst,$src,$mem\t! sub packed32S" %} 7527 ins_encode %{ 7528 int vector_len = 2; 7529 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7530 %} 7531 ins_pipe( pipe_slow ); 7532 %} 7533 7534 // Integers vector sub 7535 instruct vsub2I(vecD dst, vecD src) %{ 7536 predicate(n->as_Vector()->length() == 2); 7537 match(Set dst (SubVI dst src)); 7538 format %{ "psubd $dst,$src\t! sub packed2I" %} 7539 ins_encode %{ 7540 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 7541 %} 7542 ins_pipe( pipe_slow ); 7543 %} 7544 7545 instruct vsub2I_reg(vecD dst, vecD src1, vecD src2) %{ 7546 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7547 match(Set dst (SubVI src1 src2)); 7548 format %{ "vpsubd $dst,$src1,$src2\t! sub packed2I" %} 7549 ins_encode %{ 7550 int vector_len = 0; 7551 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7552 %} 7553 ins_pipe( pipe_slow ); 7554 %} 7555 7556 instruct vsub2I_mem(vecD dst, vecD src, memory mem) %{ 7557 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7558 match(Set dst (SubVI src (LoadVector mem))); 7559 format %{ "vpsubd $dst,$src,$mem\t! sub packed2I" %} 7560 ins_encode %{ 7561 int vector_len = 0; 7562 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7563 %} 7564 ins_pipe( pipe_slow ); 7565 %} 7566 7567 instruct vsub4I(vecX dst, vecX src) %{ 7568 predicate(n->as_Vector()->length() == 4); 7569 match(Set dst (SubVI dst src)); 7570 format %{ "psubd $dst,$src\t! sub packed4I" %} 7571 ins_encode %{ 7572 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 7573 %} 7574 ins_pipe( pipe_slow ); 7575 %} 7576 7577 instruct vsub4I_reg(vecX dst, vecX src1, vecX src2) %{ 7578 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7579 match(Set dst (SubVI src1 src2)); 7580 format %{ "vpsubd $dst,$src1,$src2\t! sub packed4I" %} 7581 ins_encode %{ 7582 int vector_len = 0; 7583 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7584 %} 7585 ins_pipe( pipe_slow ); 7586 %} 7587 7588 instruct vsub4I_mem(vecX dst, vecX src, memory mem) %{ 7589 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7590 match(Set dst (SubVI src (LoadVector mem))); 7591 format %{ "vpsubd $dst,$src,$mem\t! sub packed4I" %} 7592 ins_encode %{ 7593 int vector_len = 0; 7594 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7595 %} 7596 ins_pipe( pipe_slow ); 7597 %} 7598 7599 instruct vsub8I_reg(vecY dst, vecY src1, vecY src2) %{ 7600 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 7601 match(Set dst (SubVI src1 src2)); 7602 format %{ "vpsubd $dst,$src1,$src2\t! sub packed8I" %} 7603 ins_encode %{ 7604 int vector_len = 1; 7605 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7606 %} 7607 ins_pipe( pipe_slow ); 7608 %} 7609 7610 instruct vsub8I_mem(vecY dst, vecY src, memory mem) %{ 7611 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 7612 match(Set dst (SubVI src (LoadVector mem))); 7613 format %{ "vpsubd $dst,$src,$mem\t! sub packed8I" %} 7614 ins_encode %{ 7615 int vector_len = 1; 7616 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7617 %} 7618 ins_pipe( pipe_slow ); 7619 %} 7620 7621 instruct vsub16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7622 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7623 match(Set dst (SubVI src1 src2)); 7624 format %{ "vpsubd $dst,$src1,$src2\t! sub packed16I" %} 7625 ins_encode %{ 7626 int vector_len = 2; 7627 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7628 %} 7629 ins_pipe( pipe_slow ); 7630 %} 7631 7632 instruct vsub16I_mem(vecZ dst, vecZ src, memory mem) %{ 7633 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7634 match(Set dst (SubVI src (LoadVector mem))); 7635 format %{ "vpsubd $dst,$src,$mem\t! sub packed16I" %} 7636 ins_encode %{ 7637 int vector_len = 2; 7638 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7639 %} 7640 ins_pipe( pipe_slow ); 7641 %} 7642 7643 // Longs vector sub 7644 instruct vsub2L(vecX dst, vecX src) %{ 7645 predicate(n->as_Vector()->length() == 2); 7646 match(Set dst (SubVL dst src)); 7647 format %{ "psubq $dst,$src\t! sub packed2L" %} 7648 ins_encode %{ 7649 __ psubq($dst$$XMMRegister, $src$$XMMRegister); 7650 %} 7651 ins_pipe( pipe_slow ); 7652 %} 7653 7654 instruct vsub2L_reg(vecX dst, vecX src1, vecX src2) %{ 7655 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7656 match(Set dst (SubVL src1 src2)); 7657 format %{ "vpsubq $dst,$src1,$src2\t! sub packed2L" %} 7658 ins_encode %{ 7659 int vector_len = 0; 7660 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7661 %} 7662 ins_pipe( pipe_slow ); 7663 %} 7664 7665 instruct vsub2L_mem(vecX dst, vecX src, memory mem) %{ 7666 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7667 match(Set dst (SubVL src (LoadVector mem))); 7668 format %{ "vpsubq $dst,$src,$mem\t! sub packed2L" %} 7669 ins_encode %{ 7670 int vector_len = 0; 7671 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7672 %} 7673 ins_pipe( pipe_slow ); 7674 %} 7675 7676 instruct vsub4L_reg(vecY dst, vecY src1, vecY src2) %{ 7677 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 7678 match(Set dst (SubVL src1 src2)); 7679 format %{ "vpsubq $dst,$src1,$src2\t! sub packed4L" %} 7680 ins_encode %{ 7681 int vector_len = 1; 7682 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7683 %} 7684 ins_pipe( pipe_slow ); 7685 %} 7686 7687 instruct vsub4L_mem(vecY dst, vecY src, memory mem) %{ 7688 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 7689 match(Set dst (SubVL src (LoadVector mem))); 7690 format %{ "vpsubq $dst,$src,$mem\t! sub packed4L" %} 7691 ins_encode %{ 7692 int vector_len = 1; 7693 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7694 %} 7695 ins_pipe( pipe_slow ); 7696 %} 7697 7698 instruct vsub8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7699 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7700 match(Set dst (SubVL src1 src2)); 7701 format %{ "vpsubq $dst,$src1,$src2\t! sub packed8L" %} 7702 ins_encode %{ 7703 int vector_len = 2; 7704 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7705 %} 7706 ins_pipe( pipe_slow ); 7707 %} 7708 7709 instruct vsub8L_mem(vecZ dst, vecZ src, memory mem) %{ 7710 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7711 match(Set dst (SubVL src (LoadVector mem))); 7712 format %{ "vpsubq $dst,$src,$mem\t! sub packed8L" %} 7713 ins_encode %{ 7714 int vector_len = 2; 7715 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7716 %} 7717 ins_pipe( pipe_slow ); 7718 %} 7719 7720 // Floats vector sub 7721 instruct vsub2F(vecD dst, vecD src) %{ 7722 predicate(n->as_Vector()->length() == 2); 7723 match(Set dst (SubVF dst src)); 7724 format %{ "subps $dst,$src\t! sub packed2F" %} 7725 ins_encode %{ 7726 __ subps($dst$$XMMRegister, $src$$XMMRegister); 7727 %} 7728 ins_pipe( pipe_slow ); 7729 %} 7730 7731 instruct vsub2F_reg(vecD dst, vecD src1, vecD src2) %{ 7732 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7733 match(Set dst (SubVF src1 src2)); 7734 format %{ "vsubps $dst,$src1,$src2\t! sub packed2F" %} 7735 ins_encode %{ 7736 int vector_len = 0; 7737 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7738 %} 7739 ins_pipe( pipe_slow ); 7740 %} 7741 7742 instruct vsub2F_mem(vecD dst, vecD src, memory mem) %{ 7743 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7744 match(Set dst (SubVF src (LoadVector mem))); 7745 format %{ "vsubps $dst,$src,$mem\t! sub packed2F" %} 7746 ins_encode %{ 7747 int vector_len = 0; 7748 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7749 %} 7750 ins_pipe( pipe_slow ); 7751 %} 7752 7753 instruct vsub4F(vecX dst, vecX src) %{ 7754 predicate(n->as_Vector()->length() == 4); 7755 match(Set dst (SubVF dst src)); 7756 format %{ "subps $dst,$src\t! sub packed4F" %} 7757 ins_encode %{ 7758 __ subps($dst$$XMMRegister, $src$$XMMRegister); 7759 %} 7760 ins_pipe( pipe_slow ); 7761 %} 7762 7763 instruct vsub4F_reg(vecX dst, vecX src1, vecX src2) %{ 7764 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7765 match(Set dst (SubVF src1 src2)); 7766 format %{ "vsubps $dst,$src1,$src2\t! sub packed4F" %} 7767 ins_encode %{ 7768 int vector_len = 0; 7769 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7770 %} 7771 ins_pipe( pipe_slow ); 7772 %} 7773 7774 instruct vsub4F_mem(vecX dst, vecX src, memory mem) %{ 7775 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7776 match(Set dst (SubVF src (LoadVector mem))); 7777 format %{ "vsubps $dst,$src,$mem\t! sub packed4F" %} 7778 ins_encode %{ 7779 int vector_len = 0; 7780 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7781 %} 7782 ins_pipe( pipe_slow ); 7783 %} 7784 7785 instruct vsub8F_reg(vecY dst, vecY src1, vecY src2) %{ 7786 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7787 match(Set dst (SubVF src1 src2)); 7788 format %{ "vsubps $dst,$src1,$src2\t! sub packed8F" %} 7789 ins_encode %{ 7790 int vector_len = 1; 7791 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7792 %} 7793 ins_pipe( pipe_slow ); 7794 %} 7795 7796 instruct vsub8F_mem(vecY dst, vecY src, memory mem) %{ 7797 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7798 match(Set dst (SubVF src (LoadVector mem))); 7799 format %{ "vsubps $dst,$src,$mem\t! sub packed8F" %} 7800 ins_encode %{ 7801 int vector_len = 1; 7802 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7803 %} 7804 ins_pipe( pipe_slow ); 7805 %} 7806 7807 instruct vsub16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7808 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7809 match(Set dst (SubVF src1 src2)); 7810 format %{ "vsubps $dst,$src1,$src2\t! sub packed16F" %} 7811 ins_encode %{ 7812 int vector_len = 2; 7813 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7814 %} 7815 ins_pipe( pipe_slow ); 7816 %} 7817 7818 instruct vsub16F_mem(vecZ dst, vecZ src, memory mem) %{ 7819 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7820 match(Set dst (SubVF src (LoadVector mem))); 7821 format %{ "vsubps $dst,$src,$mem\t! sub packed16F" %} 7822 ins_encode %{ 7823 int vector_len = 2; 7824 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7825 %} 7826 ins_pipe( pipe_slow ); 7827 %} 7828 7829 // Doubles vector sub 7830 instruct vsub2D(vecX dst, vecX src) %{ 7831 predicate(n->as_Vector()->length() == 2); 7832 match(Set dst (SubVD dst src)); 7833 format %{ "subpd $dst,$src\t! sub packed2D" %} 7834 ins_encode %{ 7835 __ subpd($dst$$XMMRegister, $src$$XMMRegister); 7836 %} 7837 ins_pipe( pipe_slow ); 7838 %} 7839 7840 instruct vsub2D_reg(vecX dst, vecX src1, vecX src2) %{ 7841 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7842 match(Set dst (SubVD src1 src2)); 7843 format %{ "vsubpd $dst,$src1,$src2\t! sub packed2D" %} 7844 ins_encode %{ 7845 int vector_len = 0; 7846 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7847 %} 7848 ins_pipe( pipe_slow ); 7849 %} 7850 7851 instruct vsub2D_mem(vecX dst, vecX src, memory mem) %{ 7852 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7853 match(Set dst (SubVD src (LoadVector mem))); 7854 format %{ "vsubpd $dst,$src,$mem\t! sub packed2D" %} 7855 ins_encode %{ 7856 int vector_len = 0; 7857 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7858 %} 7859 ins_pipe( pipe_slow ); 7860 %} 7861 7862 instruct vsub4D_reg(vecY dst, vecY src1, vecY src2) %{ 7863 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7864 match(Set dst (SubVD src1 src2)); 7865 format %{ "vsubpd $dst,$src1,$src2\t! sub packed4D" %} 7866 ins_encode %{ 7867 int vector_len = 1; 7868 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7869 %} 7870 ins_pipe( pipe_slow ); 7871 %} 7872 7873 instruct vsub4D_mem(vecY dst, vecY src, memory mem) %{ 7874 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7875 match(Set dst (SubVD src (LoadVector mem))); 7876 format %{ "vsubpd $dst,$src,$mem\t! sub packed4D" %} 7877 ins_encode %{ 7878 int vector_len = 1; 7879 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7880 %} 7881 ins_pipe( pipe_slow ); 7882 %} 7883 7884 instruct vsub8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7885 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7886 match(Set dst (SubVD src1 src2)); 7887 format %{ "vsubpd $dst,$src1,$src2\t! sub packed8D" %} 7888 ins_encode %{ 7889 int vector_len = 2; 7890 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7891 %} 7892 ins_pipe( pipe_slow ); 7893 %} 7894 7895 instruct vsub8D_mem(vecZ dst, vecZ src, memory mem) %{ 7896 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7897 match(Set dst (SubVD src (LoadVector mem))); 7898 format %{ "vsubpd $dst,$src,$mem\t! sub packed8D" %} 7899 ins_encode %{ 7900 int vector_len = 2; 7901 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7902 %} 7903 ins_pipe( pipe_slow ); 7904 %} 7905 7906 // --------------------------------- MUL -------------------------------------- 7907 7908 // Shorts/Chars vector mul 7909 instruct vmul2S(vecS dst, vecS src) %{ 7910 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 7911 match(Set dst (MulVS dst src)); 7912 format %{ "pmullw $dst,$src\t! mul packed2S" %} 7913 ins_encode %{ 7914 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 7915 %} 7916 ins_pipe( pipe_slow ); 7917 %} 7918 7919 instruct vmul2S_reg_avx(vecS dst, vecS src1, vecS src2) %{ 7920 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 7921 match(Set dst (MulVS src1 src2)); 7922 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} 7923 ins_encode %{ 7924 int vector_len = 0; 7925 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7926 %} 7927 ins_pipe( pipe_slow ); 7928 %} 7929 7930 instruct vmul2S_reg_evex(vecS dst, vecS src1, vecS src2) %{ 7931 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 7932 match(Set dst (MulVS src1 src2)); 7933 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} 7934 ins_encode %{ 7935 int vector_len = 0; 7936 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7937 %} 7938 ins_pipe( pipe_slow ); 7939 %} 7940 7941 instruct vmul2S_evex_special(vecS dst, vecS src1, vecS src2) %{ 7942 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 7943 match(Set dst (MulVS dst src2)); 7944 effect(TEMP src1); 7945 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} 7946 ins_encode %{ 7947 int vector_len = 0; 7948 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7949 %} 7950 ins_pipe( pipe_slow ); 7951 %} 7952 7953 instruct vmul2S_mem_avx(vecS dst, vecS src, memory mem) %{ 7954 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 7955 match(Set dst (MulVS src (LoadVector mem))); 7956 format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %} 7957 ins_encode %{ 7958 int vector_len = 0; 7959 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7960 %} 7961 ins_pipe( pipe_slow ); 7962 %} 7963 7964 instruct vmul2S_mem_evex(vecS dst, vecS src, memory mem) %{ 7965 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 7966 match(Set dst (MulVS src (LoadVector mem))); 7967 format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %} 7968 ins_encode %{ 7969 int vector_len = 0; 7970 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7971 %} 7972 ins_pipe( pipe_slow ); 7973 %} 7974 7975 instruct vmul2S_mem_evex_special(vecS dst, vecS src, memory mem) %{ 7976 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 7977 match(Set dst (MulVS dst (LoadVector mem))); 7978 effect(TEMP src); 7979 format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %} 7980 ins_encode %{ 7981 int vector_len = 0; 7982 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7983 %} 7984 ins_pipe( pipe_slow ); 7985 %} 7986 7987 instruct vmul4S(vecD dst, vecD src) %{ 7988 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 7989 match(Set dst (MulVS dst src)); 7990 format %{ "pmullw $dst,$src\t! mul packed4S" %} 7991 ins_encode %{ 7992 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 7993 %} 7994 ins_pipe( pipe_slow ); 7995 %} 7996 7997 instruct vmul4S_reg_avx(vecD dst, vecD src1, vecD src2) %{ 7998 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 7999 match(Set dst (MulVS src1 src2)); 8000 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} 8001 ins_encode %{ 8002 int vector_len = 0; 8003 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8004 %} 8005 ins_pipe( pipe_slow ); 8006 %} 8007 8008 instruct vmul4S_reg_evex(vecD dst, vecD src1, vecD src2) %{ 8009 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 8010 match(Set dst (MulVS src1 src2)); 8011 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} 8012 ins_encode %{ 8013 int vector_len = 0; 8014 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8015 %} 8016 ins_pipe( pipe_slow ); 8017 %} 8018 8019 instruct vmul4S_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 8020 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 8021 match(Set dst (MulVS dst src2)); 8022 effect(TEMP src1); 8023 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} 8024 ins_encode %{ 8025 int vector_len = 0; 8026 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8027 %} 8028 ins_pipe( pipe_slow ); 8029 %} 8030 8031 instruct vmul4S_mem_avx(vecD dst, vecD src, memory mem) %{ 8032 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 8033 match(Set dst (MulVS src (LoadVector mem))); 8034 format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %} 8035 ins_encode %{ 8036 int vector_len = 0; 8037 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8038 %} 8039 ins_pipe( pipe_slow ); 8040 %} 8041 8042 instruct vmul4S_mem_evex(vecD dst, vecD src, memory mem) %{ 8043 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 8044 match(Set dst (MulVS src (LoadVector mem))); 8045 format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %} 8046 ins_encode %{ 8047 int vector_len = 0; 8048 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8049 %} 8050 ins_pipe( pipe_slow ); 8051 %} 8052 8053 instruct vmul4S_mem_evex_special(vecD dst, vecD src, memory mem) %{ 8054 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 8055 match(Set dst (MulVS dst (LoadVector mem))); 8056 effect(TEMP src); 8057 format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %} 8058 ins_encode %{ 8059 int vector_len = 0; 8060 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8061 %} 8062 ins_pipe( pipe_slow ); 8063 %} 8064 8065 instruct vmul8S(vecX dst, vecX src) %{ 8066 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 8067 match(Set dst (MulVS dst src)); 8068 format %{ "pmullw $dst,$src\t! mul packed8S" %} 8069 ins_encode %{ 8070 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 8071 %} 8072 ins_pipe( pipe_slow ); 8073 %} 8074 8075 instruct vmul8S_reg_avx(vecX dst, vecX src1, vecX src2) %{ 8076 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 8077 match(Set dst (MulVS src1 src2)); 8078 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} 8079 ins_encode %{ 8080 int vector_len = 0; 8081 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8082 %} 8083 ins_pipe( pipe_slow ); 8084 %} 8085 8086 instruct vmul8S_reg_evex(vecX dst, vecX src1, vecX src2) %{ 8087 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 8088 match(Set dst (MulVS src1 src2)); 8089 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} 8090 ins_encode %{ 8091 int vector_len = 0; 8092 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8093 %} 8094 ins_pipe( pipe_slow ); 8095 %} 8096 8097 instruct vmul8S_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 8098 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 8099 match(Set dst (MulVS dst src2)); 8100 effect(TEMP src1); 8101 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} 8102 ins_encode %{ 8103 int vector_len = 0; 8104 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8105 %} 8106 ins_pipe( pipe_slow ); 8107 %} 8108 8109 instruct vmul8S_mem_avx(vecX dst, vecX src, memory mem) %{ 8110 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 8111 match(Set dst (MulVS src (LoadVector mem))); 8112 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} 8113 ins_encode %{ 8114 int vector_len = 0; 8115 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8116 %} 8117 ins_pipe( pipe_slow ); 8118 %} 8119 8120 instruct vmul8S_mem_evex(vecX dst, vecX src, memory mem) %{ 8121 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 8122 match(Set dst (MulVS src (LoadVector mem))); 8123 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} 8124 ins_encode %{ 8125 int vector_len = 0; 8126 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8127 %} 8128 ins_pipe( pipe_slow ); 8129 %} 8130 8131 instruct vmul8S_mem_evex_special(vecX dst, vecX src, memory mem) %{ 8132 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 8133 match(Set dst (MulVS dst (LoadVector mem))); 8134 effect(TEMP src); 8135 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} 8136 ins_encode %{ 8137 int vector_len = 0; 8138 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8139 %} 8140 ins_pipe( pipe_slow ); 8141 %} 8142 8143 instruct vmul16S_reg_avx(vecY dst, vecY src1, vecY src2) %{ 8144 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 8145 match(Set dst (MulVS src1 src2)); 8146 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} 8147 ins_encode %{ 8148 int vector_len = 1; 8149 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8150 %} 8151 ins_pipe( pipe_slow ); 8152 %} 8153 8154 instruct vmul16S_reg_evex(vecY dst, vecY src1, vecY src2) %{ 8155 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 8156 match(Set dst (MulVS src1 src2)); 8157 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} 8158 ins_encode %{ 8159 int vector_len = 1; 8160 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8161 %} 8162 ins_pipe( pipe_slow ); 8163 %} 8164 8165 instruct vmul16S_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 8166 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 8167 match(Set dst (MulVS dst src2)); 8168 effect(TEMP src1); 8169 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} 8170 ins_encode %{ 8171 int vector_len = 1; 8172 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8173 %} 8174 ins_pipe( pipe_slow ); 8175 %} 8176 8177 instruct vmul16S_mem_avx(vecY dst, vecY src, memory mem) %{ 8178 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 8179 match(Set dst (MulVS src (LoadVector mem))); 8180 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} 8181 ins_encode %{ 8182 int vector_len = 1; 8183 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8184 %} 8185 ins_pipe( pipe_slow ); 8186 %} 8187 8188 instruct vmul16S_mem_evex(vecY dst, vecY src, memory mem) %{ 8189 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 8190 match(Set dst (MulVS src (LoadVector mem))); 8191 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} 8192 ins_encode %{ 8193 int vector_len = 1; 8194 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8195 %} 8196 ins_pipe( pipe_slow ); 8197 %} 8198 8199 instruct vmul16S_mem_evex_special(vecY dst, vecY src, memory mem) %{ 8200 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 8201 match(Set dst (MulVS dst (LoadVector mem))); 8202 effect(TEMP src); 8203 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} 8204 ins_encode %{ 8205 int vector_len = 1; 8206 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8207 %} 8208 ins_pipe( pipe_slow ); 8209 %} 8210 8211 instruct vmul32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8212 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 8213 match(Set dst (MulVS src1 src2)); 8214 format %{ "vpmullw $dst,$src1,$src2\t! mul packed32S" %} 8215 ins_encode %{ 8216 int vector_len = 2; 8217 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8218 %} 8219 ins_pipe( pipe_slow ); 8220 %} 8221 8222 instruct vmul32S_mem(vecZ dst, vecZ src, memory mem) %{ 8223 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 8224 match(Set dst (MulVS src (LoadVector mem))); 8225 format %{ "vpmullw $dst,$src,$mem\t! mul packed32S" %} 8226 ins_encode %{ 8227 int vector_len = 2; 8228 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8229 %} 8230 ins_pipe( pipe_slow ); 8231 %} 8232 8233 // Integers vector mul (sse4_1) 8234 instruct vmul2I(vecD dst, vecD src) %{ 8235 predicate(UseSSE > 3 && n->as_Vector()->length() == 2); 8236 match(Set dst (MulVI dst src)); 8237 format %{ "pmulld $dst,$src\t! mul packed2I" %} 8238 ins_encode %{ 8239 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 8240 %} 8241 ins_pipe( pipe_slow ); 8242 %} 8243 8244 instruct vmul2I_reg(vecD dst, vecD src1, vecD src2) %{ 8245 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8246 match(Set dst (MulVI src1 src2)); 8247 format %{ "vpmulld $dst,$src1,$src2\t! mul packed2I" %} 8248 ins_encode %{ 8249 int vector_len = 0; 8250 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8251 %} 8252 ins_pipe( pipe_slow ); 8253 %} 8254 8255 instruct vmul2I_mem(vecD dst, vecD src, memory mem) %{ 8256 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8257 match(Set dst (MulVI src (LoadVector mem))); 8258 format %{ "vpmulld $dst,$src,$mem\t! mul packed2I" %} 8259 ins_encode %{ 8260 int vector_len = 0; 8261 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8262 %} 8263 ins_pipe( pipe_slow ); 8264 %} 8265 8266 instruct vmul4I(vecX dst, vecX src) %{ 8267 predicate(UseSSE > 3 && n->as_Vector()->length() == 4); 8268 match(Set dst (MulVI dst src)); 8269 format %{ "pmulld $dst,$src\t! mul packed4I" %} 8270 ins_encode %{ 8271 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 8272 %} 8273 ins_pipe( pipe_slow ); 8274 %} 8275 8276 instruct vmul4I_reg(vecX dst, vecX src1, vecX src2) %{ 8277 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8278 match(Set dst (MulVI src1 src2)); 8279 format %{ "vpmulld $dst,$src1,$src2\t! mul packed4I" %} 8280 ins_encode %{ 8281 int vector_len = 0; 8282 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8283 %} 8284 ins_pipe( pipe_slow ); 8285 %} 8286 8287 instruct vmul4I_mem(vecX dst, vecX src, memory mem) %{ 8288 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8289 match(Set dst (MulVI src (LoadVector mem))); 8290 format %{ "vpmulld $dst,$src,$mem\t! mul packed4I" %} 8291 ins_encode %{ 8292 int vector_len = 0; 8293 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8294 %} 8295 ins_pipe( pipe_slow ); 8296 %} 8297 8298 instruct vmul2L_reg(vecX dst, vecX src1, vecX src2) %{ 8299 predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && VM_Version::supports_avx512dq()); 8300 match(Set dst (MulVL src1 src2)); 8301 format %{ "vpmullq $dst,$src1,$src2\t! mul packed2L" %} 8302 ins_encode %{ 8303 int vector_len = 0; 8304 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8305 %} 8306 ins_pipe( pipe_slow ); 8307 %} 8308 8309 instruct vmul2L_mem(vecX dst, vecX src, memory mem) %{ 8310 predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && VM_Version::supports_avx512dq()); 8311 match(Set dst (MulVL src (LoadVector mem))); 8312 format %{ "vpmullq $dst,$src,$mem\t! mul packed2L" %} 8313 ins_encode %{ 8314 int vector_len = 0; 8315 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8316 %} 8317 ins_pipe( pipe_slow ); 8318 %} 8319 8320 instruct vmul4L_reg(vecY dst, vecY src1, vecY src2) %{ 8321 predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && VM_Version::supports_avx512dq()); 8322 match(Set dst (MulVL src1 src2)); 8323 format %{ "vpmullq $dst,$src1,$src2\t! mul packed4L" %} 8324 ins_encode %{ 8325 int vector_len = 1; 8326 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8327 %} 8328 ins_pipe( pipe_slow ); 8329 %} 8330 8331 instruct vmul4L_mem(vecY dst, vecY src, memory mem) %{ 8332 predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && VM_Version::supports_avx512dq()); 8333 match(Set dst (MulVL src (LoadVector mem))); 8334 format %{ "vpmullq $dst,$src,$mem\t! mul packed4L" %} 8335 ins_encode %{ 8336 int vector_len = 1; 8337 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8338 %} 8339 ins_pipe( pipe_slow ); 8340 %} 8341 8342 instruct vmul8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8343 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq()); 8344 match(Set dst (MulVL src1 src2)); 8345 format %{ "vpmullq $dst,$src1,$src2\t! mul packed8L" %} 8346 ins_encode %{ 8347 int vector_len = 2; 8348 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8349 %} 8350 ins_pipe( pipe_slow ); 8351 %} 8352 8353 instruct vmul8L_mem(vecZ dst, vecZ src, memory mem) %{ 8354 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq()); 8355 match(Set dst (MulVL src (LoadVector mem))); 8356 format %{ "vpmullq $dst,$src,$mem\t! mul packed8L" %} 8357 ins_encode %{ 8358 int vector_len = 2; 8359 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8360 %} 8361 ins_pipe( pipe_slow ); 8362 %} 8363 8364 instruct vmul8I_reg(vecY dst, vecY src1, vecY src2) %{ 8365 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 8366 match(Set dst (MulVI src1 src2)); 8367 format %{ "vpmulld $dst,$src1,$src2\t! mul packed8I" %} 8368 ins_encode %{ 8369 int vector_len = 1; 8370 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8371 %} 8372 ins_pipe( pipe_slow ); 8373 %} 8374 8375 instruct vmul8I_mem(vecY dst, vecY src, memory mem) %{ 8376 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 8377 match(Set dst (MulVI src (LoadVector mem))); 8378 format %{ "vpmulld $dst,$src,$mem\t! mul packed8I" %} 8379 ins_encode %{ 8380 int vector_len = 1; 8381 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8382 %} 8383 ins_pipe( pipe_slow ); 8384 %} 8385 8386 instruct vmul16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8387 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8388 match(Set dst (MulVI src1 src2)); 8389 format %{ "vpmulld $dst,$src1,$src2\t! mul packed16I" %} 8390 ins_encode %{ 8391 int vector_len = 2; 8392 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8393 %} 8394 ins_pipe( pipe_slow ); 8395 %} 8396 8397 instruct vmul16I_mem(vecZ dst, vecZ src, memory mem) %{ 8398 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8399 match(Set dst (MulVI src (LoadVector mem))); 8400 format %{ "vpmulld $dst,$src,$mem\t! mul packed16I" %} 8401 ins_encode %{ 8402 int vector_len = 2; 8403 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8404 %} 8405 ins_pipe( pipe_slow ); 8406 %} 8407 8408 // Floats vector mul 8409 instruct vmul2F(vecD dst, vecD src) %{ 8410 predicate(n->as_Vector()->length() == 2); 8411 match(Set dst (MulVF dst src)); 8412 format %{ "mulps $dst,$src\t! mul packed2F" %} 8413 ins_encode %{ 8414 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 8415 %} 8416 ins_pipe( pipe_slow ); 8417 %} 8418 8419 instruct vmul2F_reg(vecD dst, vecD src1, vecD src2) %{ 8420 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8421 match(Set dst (MulVF src1 src2)); 8422 format %{ "vmulps $dst,$src1,$src2\t! mul packed2F" %} 8423 ins_encode %{ 8424 int vector_len = 0; 8425 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8426 %} 8427 ins_pipe( pipe_slow ); 8428 %} 8429 8430 instruct vmul2F_mem(vecD dst, vecD src, memory mem) %{ 8431 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8432 match(Set dst (MulVF src (LoadVector mem))); 8433 format %{ "vmulps $dst,$src,$mem\t! mul packed2F" %} 8434 ins_encode %{ 8435 int vector_len = 0; 8436 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8437 %} 8438 ins_pipe( pipe_slow ); 8439 %} 8440 8441 instruct vmul4F(vecX dst, vecX src) %{ 8442 predicate(n->as_Vector()->length() == 4); 8443 match(Set dst (MulVF dst src)); 8444 format %{ "mulps $dst,$src\t! mul packed4F" %} 8445 ins_encode %{ 8446 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 8447 %} 8448 ins_pipe( pipe_slow ); 8449 %} 8450 8451 instruct vmul4F_reg(vecX dst, vecX src1, vecX src2) %{ 8452 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8453 match(Set dst (MulVF src1 src2)); 8454 format %{ "vmulps $dst,$src1,$src2\t! mul packed4F" %} 8455 ins_encode %{ 8456 int vector_len = 0; 8457 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8458 %} 8459 ins_pipe( pipe_slow ); 8460 %} 8461 8462 instruct vmul4F_mem(vecX dst, vecX src, memory mem) %{ 8463 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8464 match(Set dst (MulVF src (LoadVector mem))); 8465 format %{ "vmulps $dst,$src,$mem\t! mul packed4F" %} 8466 ins_encode %{ 8467 int vector_len = 0; 8468 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8469 %} 8470 ins_pipe( pipe_slow ); 8471 %} 8472 8473 instruct vmul8F_reg(vecY dst, vecY src1, vecY src2) %{ 8474 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8475 match(Set dst (MulVF src1 src2)); 8476 format %{ "vmulps $dst,$src1,$src2\t! mul packed8F" %} 8477 ins_encode %{ 8478 int vector_len = 1; 8479 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8480 %} 8481 ins_pipe( pipe_slow ); 8482 %} 8483 8484 instruct vmul8F_mem(vecY dst, vecY src, memory mem) %{ 8485 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8486 match(Set dst (MulVF src (LoadVector mem))); 8487 format %{ "vmulps $dst,$src,$mem\t! mul packed8F" %} 8488 ins_encode %{ 8489 int vector_len = 1; 8490 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8491 %} 8492 ins_pipe( pipe_slow ); 8493 %} 8494 8495 instruct vmul16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8496 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8497 match(Set dst (MulVF src1 src2)); 8498 format %{ "vmulps $dst,$src1,$src2\t! mul packed16F" %} 8499 ins_encode %{ 8500 int vector_len = 2; 8501 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8502 %} 8503 ins_pipe( pipe_slow ); 8504 %} 8505 8506 instruct vmul16F_mem(vecZ dst, vecZ src, memory mem) %{ 8507 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8508 match(Set dst (MulVF src (LoadVector mem))); 8509 format %{ "vmulps $dst,$src,$mem\t! mul packed16F" %} 8510 ins_encode %{ 8511 int vector_len = 2; 8512 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8513 %} 8514 ins_pipe( pipe_slow ); 8515 %} 8516 8517 // Doubles vector mul 8518 instruct vmul2D(vecX dst, vecX src) %{ 8519 predicate(n->as_Vector()->length() == 2); 8520 match(Set dst (MulVD dst src)); 8521 format %{ "mulpd $dst,$src\t! mul packed2D" %} 8522 ins_encode %{ 8523 __ mulpd($dst$$XMMRegister, $src$$XMMRegister); 8524 %} 8525 ins_pipe( pipe_slow ); 8526 %} 8527 8528 instruct vmul2D_reg(vecX dst, vecX src1, vecX src2) %{ 8529 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8530 match(Set dst (MulVD src1 src2)); 8531 format %{ "vmulpd $dst,$src1,$src2\t! mul packed2D" %} 8532 ins_encode %{ 8533 int vector_len = 0; 8534 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8535 %} 8536 ins_pipe( pipe_slow ); 8537 %} 8538 8539 instruct vmul2D_mem(vecX dst, vecX src, memory mem) %{ 8540 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8541 match(Set dst (MulVD src (LoadVector mem))); 8542 format %{ "vmulpd $dst,$src,$mem\t! mul packed2D" %} 8543 ins_encode %{ 8544 int vector_len = 0; 8545 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8546 %} 8547 ins_pipe( pipe_slow ); 8548 %} 8549 8550 instruct vmul4D_reg(vecY dst, vecY src1, vecY src2) %{ 8551 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8552 match(Set dst (MulVD src1 src2)); 8553 format %{ "vmulpd $dst,$src1,$src2\t! mul packed4D" %} 8554 ins_encode %{ 8555 int vector_len = 1; 8556 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8557 %} 8558 ins_pipe( pipe_slow ); 8559 %} 8560 8561 instruct vmul4D_mem(vecY dst, vecY src, memory mem) %{ 8562 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8563 match(Set dst (MulVD src (LoadVector mem))); 8564 format %{ "vmulpd $dst,$src,$mem\t! mul packed4D" %} 8565 ins_encode %{ 8566 int vector_len = 1; 8567 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8568 %} 8569 ins_pipe( pipe_slow ); 8570 %} 8571 8572 instruct vmul8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8573 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8574 match(Set dst (MulVD src1 src2)); 8575 format %{ "vmulpd $dst k0,$src1,$src2\t! mul packed8D" %} 8576 ins_encode %{ 8577 int vector_len = 2; 8578 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8579 %} 8580 ins_pipe( pipe_slow ); 8581 %} 8582 8583 instruct vmul8D_mem(vecZ dst, vecZ src, memory mem) %{ 8584 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8585 match(Set dst (MulVD src (LoadVector mem))); 8586 format %{ "vmulpd $dst k0,$src,$mem\t! mul packed8D" %} 8587 ins_encode %{ 8588 int vector_len = 2; 8589 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8590 %} 8591 ins_pipe( pipe_slow ); 8592 %} 8593 8594 instruct vcmov4D_reg(vecY dst, vecY src1, vecY src2, immI8 cop, cmpOp_vcmppd copnd) %{ 8595 predicate(UseAVX > 0 && UseAVX < 3 && n->as_Vector()->length() == 4); 8596 match(Set dst (CMoveVD (Binary copnd cop) (Binary src1 src2))); 8597 effect(TEMP dst, USE src1, USE src2); 8598 format %{ "cmppd.$copnd $dst, $src1, $src2 ! vcmovevd, cond=$cop\n\t" 8599 "vpblendd $dst,$src1,$src2,$dst ! vcmovevd\n\t" 8600 %} 8601 ins_encode %{ 8602 int vector_len = 1; 8603 int cond = (Assembler::Condition)($copnd$$cmpcode); 8604 __ cmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cond, vector_len); 8605 __ vpblendd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vector_len); 8606 %} 8607 ins_pipe( pipe_slow ); 8608 %} 8609 8610 // --------------------------------- DIV -------------------------------------- 8611 8612 // Floats vector div 8613 instruct vdiv2F(vecD dst, vecD src) %{ 8614 predicate(n->as_Vector()->length() == 2); 8615 match(Set dst (DivVF dst src)); 8616 format %{ "divps $dst,$src\t! div packed2F" %} 8617 ins_encode %{ 8618 __ divps($dst$$XMMRegister, $src$$XMMRegister); 8619 %} 8620 ins_pipe( pipe_slow ); 8621 %} 8622 8623 instruct vdiv2F_reg(vecD dst, vecD src1, vecD src2) %{ 8624 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8625 match(Set dst (DivVF src1 src2)); 8626 format %{ "vdivps $dst,$src1,$src2\t! div packed2F" %} 8627 ins_encode %{ 8628 int vector_len = 0; 8629 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8630 %} 8631 ins_pipe( pipe_slow ); 8632 %} 8633 8634 instruct vdiv2F_mem(vecD dst, vecD src, memory mem) %{ 8635 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8636 match(Set dst (DivVF src (LoadVector mem))); 8637 format %{ "vdivps $dst,$src,$mem\t! div packed2F" %} 8638 ins_encode %{ 8639 int vector_len = 0; 8640 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8641 %} 8642 ins_pipe( pipe_slow ); 8643 %} 8644 8645 instruct vdiv4F(vecX dst, vecX src) %{ 8646 predicate(n->as_Vector()->length() == 4); 8647 match(Set dst (DivVF dst src)); 8648 format %{ "divps $dst,$src\t! div packed4F" %} 8649 ins_encode %{ 8650 __ divps($dst$$XMMRegister, $src$$XMMRegister); 8651 %} 8652 ins_pipe( pipe_slow ); 8653 %} 8654 8655 instruct vdiv4F_reg(vecX dst, vecX src1, vecX src2) %{ 8656 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8657 match(Set dst (DivVF src1 src2)); 8658 format %{ "vdivps $dst,$src1,$src2\t! div packed4F" %} 8659 ins_encode %{ 8660 int vector_len = 0; 8661 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8662 %} 8663 ins_pipe( pipe_slow ); 8664 %} 8665 8666 instruct vdiv4F_mem(vecX dst, vecX src, memory mem) %{ 8667 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8668 match(Set dst (DivVF src (LoadVector mem))); 8669 format %{ "vdivps $dst,$src,$mem\t! div packed4F" %} 8670 ins_encode %{ 8671 int vector_len = 0; 8672 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8673 %} 8674 ins_pipe( pipe_slow ); 8675 %} 8676 8677 instruct vdiv8F_reg(vecY dst, vecY src1, vecY src2) %{ 8678 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8679 match(Set dst (DivVF src1 src2)); 8680 format %{ "vdivps $dst,$src1,$src2\t! div packed8F" %} 8681 ins_encode %{ 8682 int vector_len = 1; 8683 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8684 %} 8685 ins_pipe( pipe_slow ); 8686 %} 8687 8688 instruct vdiv8F_mem(vecY dst, vecY src, memory mem) %{ 8689 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8690 match(Set dst (DivVF src (LoadVector mem))); 8691 format %{ "vdivps $dst,$src,$mem\t! div packed8F" %} 8692 ins_encode %{ 8693 int vector_len = 1; 8694 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8695 %} 8696 ins_pipe( pipe_slow ); 8697 %} 8698 8699 instruct vdiv16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8700 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 8701 match(Set dst (DivVF src1 src2)); 8702 format %{ "vdivps $dst,$src1,$src2\t! div packed16F" %} 8703 ins_encode %{ 8704 int vector_len = 2; 8705 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8706 %} 8707 ins_pipe( pipe_slow ); 8708 %} 8709 8710 instruct vdiv16F_mem(vecZ dst, vecZ src, memory mem) %{ 8711 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 8712 match(Set dst (DivVF src (LoadVector mem))); 8713 format %{ "vdivps $dst,$src,$mem\t! div packed16F" %} 8714 ins_encode %{ 8715 int vector_len = 2; 8716 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8717 %} 8718 ins_pipe( pipe_slow ); 8719 %} 8720 8721 // Doubles vector div 8722 instruct vdiv2D(vecX dst, vecX src) %{ 8723 predicate(n->as_Vector()->length() == 2); 8724 match(Set dst (DivVD dst src)); 8725 format %{ "divpd $dst,$src\t! div packed2D" %} 8726 ins_encode %{ 8727 __ divpd($dst$$XMMRegister, $src$$XMMRegister); 8728 %} 8729 ins_pipe( pipe_slow ); 8730 %} 8731 8732 instruct vdiv2D_reg(vecX dst, vecX src1, vecX src2) %{ 8733 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8734 match(Set dst (DivVD src1 src2)); 8735 format %{ "vdivpd $dst,$src1,$src2\t! div packed2D" %} 8736 ins_encode %{ 8737 int vector_len = 0; 8738 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8739 %} 8740 ins_pipe( pipe_slow ); 8741 %} 8742 8743 instruct vdiv2D_mem(vecX dst, vecX src, memory mem) %{ 8744 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8745 match(Set dst (DivVD src (LoadVector mem))); 8746 format %{ "vdivpd $dst,$src,$mem\t! div packed2D" %} 8747 ins_encode %{ 8748 int vector_len = 0; 8749 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8750 %} 8751 ins_pipe( pipe_slow ); 8752 %} 8753 8754 instruct vdiv4D_reg(vecY dst, vecY src1, vecY src2) %{ 8755 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8756 match(Set dst (DivVD src1 src2)); 8757 format %{ "vdivpd $dst,$src1,$src2\t! div packed4D" %} 8758 ins_encode %{ 8759 int vector_len = 1; 8760 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8761 %} 8762 ins_pipe( pipe_slow ); 8763 %} 8764 8765 instruct vdiv4D_mem(vecY dst, vecY src, memory mem) %{ 8766 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8767 match(Set dst (DivVD src (LoadVector mem))); 8768 format %{ "vdivpd $dst,$src,$mem\t! div packed4D" %} 8769 ins_encode %{ 8770 int vector_len = 1; 8771 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8772 %} 8773 ins_pipe( pipe_slow ); 8774 %} 8775 8776 instruct vdiv8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8777 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8778 match(Set dst (DivVD src1 src2)); 8779 format %{ "vdivpd $dst,$src1,$src2\t! div packed8D" %} 8780 ins_encode %{ 8781 int vector_len = 2; 8782 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8783 %} 8784 ins_pipe( pipe_slow ); 8785 %} 8786 8787 instruct vdiv8D_mem(vecZ dst, vecZ src, memory mem) %{ 8788 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8789 match(Set dst (DivVD src (LoadVector mem))); 8790 format %{ "vdivpd $dst,$src,$mem\t! div packed8D" %} 8791 ins_encode %{ 8792 int vector_len = 2; 8793 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8794 %} 8795 ins_pipe( pipe_slow ); 8796 %} 8797 8798 // ------------------------------ Shift --------------------------------------- 8799 8800 // Left and right shift count vectors are the same on x86 8801 // (only lowest bits of xmm reg are used for count). 8802 instruct vshiftcnt(vecS dst, rRegI cnt) %{ 8803 match(Set dst (LShiftCntV cnt)); 8804 match(Set dst (RShiftCntV cnt)); 8805 format %{ "movd $dst,$cnt\t! load shift count" %} 8806 ins_encode %{ 8807 __ movdl($dst$$XMMRegister, $cnt$$Register); 8808 %} 8809 ins_pipe( pipe_slow ); 8810 %} 8811 8812 // --------------------------------- Sqrt -------------------------------------- 8813 8814 // Floating point vector sqrt - double precision only 8815 instruct vsqrt2D_reg(vecX dst, vecX src) %{ 8816 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8817 match(Set dst (SqrtVD src)); 8818 format %{ "vsqrtpd $dst,$src\t! sqrt packed2D" %} 8819 ins_encode %{ 8820 int vector_len = 0; 8821 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8822 %} 8823 ins_pipe( pipe_slow ); 8824 %} 8825 8826 instruct vsqrt2D_mem(vecX dst, memory mem) %{ 8827 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8828 match(Set dst (SqrtVD (LoadVector mem))); 8829 format %{ "vsqrtpd $dst,$mem\t! sqrt packed2D" %} 8830 ins_encode %{ 8831 int vector_len = 0; 8832 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); 8833 %} 8834 ins_pipe( pipe_slow ); 8835 %} 8836 8837 instruct vsqrt4D_reg(vecY dst, vecY src) %{ 8838 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8839 match(Set dst (SqrtVD src)); 8840 format %{ "vsqrtpd $dst,$src\t! sqrt packed4D" %} 8841 ins_encode %{ 8842 int vector_len = 1; 8843 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8844 %} 8845 ins_pipe( pipe_slow ); 8846 %} 8847 8848 instruct vsqrt4D_mem(vecY dst, memory mem) %{ 8849 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8850 match(Set dst (SqrtVD (LoadVector mem))); 8851 format %{ "vsqrtpd $dst,$mem\t! sqrt packed4D" %} 8852 ins_encode %{ 8853 int vector_len = 1; 8854 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); 8855 %} 8856 ins_pipe( pipe_slow ); 8857 %} 8858 8859 instruct vsqrt8D_reg(vecZ dst, vecZ src) %{ 8860 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8861 match(Set dst (SqrtVD src)); 8862 format %{ "vsqrtpd $dst,$src\t! sqrt packed8D" %} 8863 ins_encode %{ 8864 int vector_len = 2; 8865 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8866 %} 8867 ins_pipe( pipe_slow ); 8868 %} 8869 8870 instruct vsqrt8D_mem(vecZ dst, memory mem) %{ 8871 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8872 match(Set dst (SqrtVD (LoadVector mem))); 8873 format %{ "vsqrtpd $dst,$mem\t! sqrt packed8D" %} 8874 ins_encode %{ 8875 int vector_len = 2; 8876 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); 8877 %} 8878 ins_pipe( pipe_slow ); 8879 %} 8880 8881 // ------------------------------ LeftShift ----------------------------------- 8882 8883 // Shorts/Chars vector left shift 8884 instruct vsll2S(vecS dst, vecS shift) %{ 8885 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8886 match(Set dst (LShiftVS dst shift)); 8887 format %{ "psllw $dst,$shift\t! left shift packed2S" %} 8888 ins_encode %{ 8889 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 8890 %} 8891 ins_pipe( pipe_slow ); 8892 %} 8893 8894 instruct vsll2S_imm(vecS dst, immI8 shift) %{ 8895 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8896 match(Set dst (LShiftVS dst shift)); 8897 format %{ "psllw $dst,$shift\t! left shift packed2S" %} 8898 ins_encode %{ 8899 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 8900 %} 8901 ins_pipe( pipe_slow ); 8902 %} 8903 8904 instruct vsll2S_reg_avx(vecS dst, vecS src, vecS shift) %{ 8905 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 8906 match(Set dst (LShiftVS src shift)); 8907 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 8908 ins_encode %{ 8909 int vector_len = 0; 8910 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8911 %} 8912 ins_pipe( pipe_slow ); 8913 %} 8914 8915 instruct vsll2S_reg_evex(vecS dst, vecS src, vecS shift) %{ 8916 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 8917 match(Set dst (LShiftVS src shift)); 8918 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 8919 ins_encode %{ 8920 int vector_len = 0; 8921 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8922 %} 8923 ins_pipe( pipe_slow ); 8924 %} 8925 8926 instruct vsll2S_reg_evex_special(vecS dst, vecS src, vecS shift) %{ 8927 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 8928 match(Set dst (LShiftVS dst shift)); 8929 effect(TEMP src); 8930 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 8931 ins_encode %{ 8932 int vector_len = 0; 8933 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8934 %} 8935 ins_pipe( pipe_slow ); 8936 %} 8937 8938 instruct vsll2S_reg_imm_avx(vecS dst, vecS src, immI8 shift) %{ 8939 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 8940 match(Set dst (LShiftVS src shift)); 8941 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 8942 ins_encode %{ 8943 int vector_len = 0; 8944 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8945 %} 8946 ins_pipe( pipe_slow ); 8947 %} 8948 8949 instruct vsll2S_reg_imm_evex(vecS dst, vecS src, immI8 shift) %{ 8950 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 8951 match(Set dst (LShiftVS src shift)); 8952 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 8953 ins_encode %{ 8954 int vector_len = 0; 8955 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8956 %} 8957 ins_pipe( pipe_slow ); 8958 %} 8959 8960 instruct vsll2S_reg_imm_evex_special(vecS dst, vecS src, immI8 shift) %{ 8961 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 8962 match(Set dst (LShiftVS dst shift)); 8963 effect(TEMP src); 8964 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 8965 ins_encode %{ 8966 int vector_len = 0; 8967 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8968 %} 8969 ins_pipe( pipe_slow ); 8970 %} 8971 8972 instruct vsll4S(vecD dst, vecS shift) %{ 8973 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 8974 match(Set dst (LShiftVS dst shift)); 8975 format %{ "psllw $dst,$shift\t! left shift packed4S" %} 8976 ins_encode %{ 8977 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 8978 %} 8979 ins_pipe( pipe_slow ); 8980 %} 8981 8982 instruct vsll4S_imm(vecD dst, immI8 shift) %{ 8983 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 8984 match(Set dst (LShiftVS dst shift)); 8985 format %{ "psllw $dst,$shift\t! left shift packed4S" %} 8986 ins_encode %{ 8987 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 8988 %} 8989 ins_pipe( pipe_slow ); 8990 %} 8991 8992 instruct vsll4S_reg_avx(vecD dst, vecD src, vecS shift) %{ 8993 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 8994 match(Set dst (LShiftVS src shift)); 8995 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 8996 ins_encode %{ 8997 int vector_len = 0; 8998 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8999 %} 9000 ins_pipe( pipe_slow ); 9001 %} 9002 9003 instruct vsll4S_reg_evex(vecD dst, vecD src, vecS shift) %{ 9004 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 9005 match(Set dst (LShiftVS src shift)); 9006 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 9007 ins_encode %{ 9008 int vector_len = 0; 9009 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9010 %} 9011 ins_pipe( pipe_slow ); 9012 %} 9013 9014 instruct vsll4S_reg_evex_special(vecD dst, vecD src, vecS shift) %{ 9015 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 9016 match(Set dst (LShiftVS dst shift)); 9017 effect(TEMP src); 9018 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 9019 ins_encode %{ 9020 int vector_len = 0; 9021 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9022 %} 9023 ins_pipe( pipe_slow ); 9024 %} 9025 9026 instruct vsll4S_reg_imm_avx(vecD dst, vecD src, immI8 shift) %{ 9027 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 9028 match(Set dst (LShiftVS src shift)); 9029 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 9030 ins_encode %{ 9031 int vector_len = 0; 9032 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9033 %} 9034 ins_pipe( pipe_slow ); 9035 %} 9036 9037 instruct vsll4S_reg_imm_evex(vecD dst, vecD src, immI8 shift) %{ 9038 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 9039 match(Set dst (LShiftVS src shift)); 9040 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 9041 ins_encode %{ 9042 int vector_len = 0; 9043 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9044 %} 9045 ins_pipe( pipe_slow ); 9046 %} 9047 9048 instruct vsll4S_reg_imm_evex_special(vecD dst, vecD src, immI8 shift) %{ 9049 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 9050 match(Set dst (LShiftVS dst shift)); 9051 effect(TEMP src); 9052 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 9053 ins_encode %{ 9054 int vector_len = 0; 9055 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9056 %} 9057 ins_pipe( pipe_slow ); 9058 %} 9059 9060 instruct vsll8S(vecX dst, vecS shift) %{ 9061 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 9062 match(Set dst (LShiftVS dst shift)); 9063 format %{ "psllw $dst,$shift\t! left shift packed8S" %} 9064 ins_encode %{ 9065 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 9066 %} 9067 ins_pipe( pipe_slow ); 9068 %} 9069 9070 instruct vsll8S_imm(vecX dst, immI8 shift) %{ 9071 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 9072 match(Set dst (LShiftVS dst shift)); 9073 format %{ "psllw $dst,$shift\t! left shift packed8S" %} 9074 ins_encode %{ 9075 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 9076 %} 9077 ins_pipe( pipe_slow ); 9078 %} 9079 9080 instruct vsll8S_reg_avx(vecX dst, vecX src, vecS shift) %{ 9081 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 9082 match(Set dst (LShiftVS src shift)); 9083 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 9084 ins_encode %{ 9085 int vector_len = 0; 9086 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9087 %} 9088 ins_pipe( pipe_slow ); 9089 %} 9090 9091 instruct vsll8S_reg_evex(vecX dst, vecX src, vecS shift) %{ 9092 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 9093 match(Set dst (LShiftVS src shift)); 9094 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 9095 ins_encode %{ 9096 int vector_len = 0; 9097 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9098 %} 9099 ins_pipe( pipe_slow ); 9100 %} 9101 9102 instruct vsll8S_reg_evex_special(vecX dst, vecX src, vecS shift) %{ 9103 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 9104 match(Set dst (LShiftVS dst shift)); 9105 effect(TEMP src); 9106 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 9107 ins_encode %{ 9108 int vector_len = 0; 9109 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9110 %} 9111 ins_pipe( pipe_slow ); 9112 %} 9113 9114 instruct vsll8S_reg_imm_avx(vecX dst, vecX src, immI8 shift) %{ 9115 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 9116 match(Set dst (LShiftVS src shift)); 9117 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 9118 ins_encode %{ 9119 int vector_len = 0; 9120 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9121 %} 9122 ins_pipe( pipe_slow ); 9123 %} 9124 9125 instruct vsll8S_reg_imm_evex(vecX dst, vecX src, immI8 shift) %{ 9126 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 9127 match(Set dst (LShiftVS src shift)); 9128 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 9129 ins_encode %{ 9130 int vector_len = 0; 9131 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9132 %} 9133 ins_pipe( pipe_slow ); 9134 %} 9135 9136 instruct vsll8S_reg_imm_evex_special(vecX dst, vecX src, immI8 shift) %{ 9137 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 9138 match(Set dst (LShiftVS dst shift)); 9139 effect(TEMP src); 9140 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 9141 ins_encode %{ 9142 int vector_len = 0; 9143 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9144 %} 9145 ins_pipe( pipe_slow ); 9146 %} 9147 9148 instruct vsll16S_reg_avx(vecY dst, vecY src, vecS shift) %{ 9149 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 9150 match(Set dst (LShiftVS src shift)); 9151 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 9152 ins_encode %{ 9153 int vector_len = 1; 9154 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9155 %} 9156 ins_pipe( pipe_slow ); 9157 %} 9158 9159 instruct vsll16S_reg_evex(vecY dst, vecY src, vecS shift) %{ 9160 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 9161 match(Set dst (LShiftVS src shift)); 9162 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 9163 ins_encode %{ 9164 int vector_len = 1; 9165 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9166 %} 9167 ins_pipe( pipe_slow ); 9168 %} 9169 9170 instruct vsll16S_reg_evex_special(vecY dst, vecY src, vecS shift) %{ 9171 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 9172 match(Set dst (LShiftVS dst shift)); 9173 effect(TEMP src); 9174 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 9175 ins_encode %{ 9176 int vector_len = 1; 9177 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9178 %} 9179 ins_pipe( pipe_slow ); 9180 %} 9181 9182 instruct vsll16S_reg_imm_avx(vecY dst, vecY src, immI8 shift) %{ 9183 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 9184 match(Set dst (LShiftVS src shift)); 9185 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 9186 ins_encode %{ 9187 int vector_len = 1; 9188 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9189 %} 9190 ins_pipe( pipe_slow ); 9191 %} 9192 9193 instruct vsll16S_reg_imm_evex(vecY dst, vecY src, immI8 shift) %{ 9194 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 9195 match(Set dst (LShiftVS src shift)); 9196 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 9197 ins_encode %{ 9198 int vector_len = 1; 9199 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9200 %} 9201 ins_pipe( pipe_slow ); 9202 %} 9203 9204 instruct vsll16S_reg_imm_evex_special(vecY dst, vecY src, immI8 shift) %{ 9205 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 9206 match(Set dst (LShiftVS dst shift)); 9207 effect(TEMP src); 9208 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 9209 ins_encode %{ 9210 int vector_len = 1; 9211 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9212 %} 9213 ins_pipe( pipe_slow ); 9214 %} 9215 9216 instruct vsll32S_reg(vecZ dst, vecZ src, vecS shift) %{ 9217 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 9218 match(Set dst (LShiftVS src shift)); 9219 format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %} 9220 ins_encode %{ 9221 int vector_len = 2; 9222 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9223 %} 9224 ins_pipe( pipe_slow ); 9225 %} 9226 9227 instruct vsll32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 9228 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 9229 match(Set dst (LShiftVS src shift)); 9230 format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %} 9231 ins_encode %{ 9232 int vector_len = 2; 9233 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9234 %} 9235 ins_pipe( pipe_slow ); 9236 %} 9237 9238 // Integers vector left shift 9239 instruct vsll2I(vecD dst, vecS shift) %{ 9240 predicate(n->as_Vector()->length() == 2); 9241 match(Set dst (LShiftVI dst shift)); 9242 format %{ "pslld $dst,$shift\t! left shift packed2I" %} 9243 ins_encode %{ 9244 __ pslld($dst$$XMMRegister, $shift$$XMMRegister); 9245 %} 9246 ins_pipe( pipe_slow ); 9247 %} 9248 9249 instruct vsll2I_imm(vecD dst, immI8 shift) %{ 9250 predicate(n->as_Vector()->length() == 2); 9251 match(Set dst (LShiftVI dst shift)); 9252 format %{ "pslld $dst,$shift\t! left shift packed2I" %} 9253 ins_encode %{ 9254 __ pslld($dst$$XMMRegister, (int)$shift$$constant); 9255 %} 9256 ins_pipe( pipe_slow ); 9257 %} 9258 9259 instruct vsll2I_reg(vecD dst, vecD src, vecS shift) %{ 9260 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9261 match(Set dst (LShiftVI src shift)); 9262 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} 9263 ins_encode %{ 9264 int vector_len = 0; 9265 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9266 %} 9267 ins_pipe( pipe_slow ); 9268 %} 9269 9270 instruct vsll2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 9271 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9272 match(Set dst (LShiftVI src shift)); 9273 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} 9274 ins_encode %{ 9275 int vector_len = 0; 9276 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9277 %} 9278 ins_pipe( pipe_slow ); 9279 %} 9280 9281 instruct vsll4I(vecX dst, vecS shift) %{ 9282 predicate(n->as_Vector()->length() == 4); 9283 match(Set dst (LShiftVI dst shift)); 9284 format %{ "pslld $dst,$shift\t! left shift packed4I" %} 9285 ins_encode %{ 9286 __ pslld($dst$$XMMRegister, $shift$$XMMRegister); 9287 %} 9288 ins_pipe( pipe_slow ); 9289 %} 9290 9291 instruct vsll4I_imm(vecX dst, immI8 shift) %{ 9292 predicate(n->as_Vector()->length() == 4); 9293 match(Set dst (LShiftVI dst shift)); 9294 format %{ "pslld $dst,$shift\t! left shift packed4I" %} 9295 ins_encode %{ 9296 __ pslld($dst$$XMMRegister, (int)$shift$$constant); 9297 %} 9298 ins_pipe( pipe_slow ); 9299 %} 9300 9301 instruct vsll4I_reg(vecX dst, vecX src, vecS shift) %{ 9302 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9303 match(Set dst (LShiftVI src shift)); 9304 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} 9305 ins_encode %{ 9306 int vector_len = 0; 9307 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9308 %} 9309 ins_pipe( pipe_slow ); 9310 %} 9311 9312 instruct vsll4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 9313 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9314 match(Set dst (LShiftVI src shift)); 9315 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} 9316 ins_encode %{ 9317 int vector_len = 0; 9318 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9319 %} 9320 ins_pipe( pipe_slow ); 9321 %} 9322 9323 instruct vsll8I_reg(vecY dst, vecY src, vecS shift) %{ 9324 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 9325 match(Set dst (LShiftVI src shift)); 9326 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} 9327 ins_encode %{ 9328 int vector_len = 1; 9329 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9330 %} 9331 ins_pipe( pipe_slow ); 9332 %} 9333 9334 instruct vsll8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 9335 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 9336 match(Set dst (LShiftVI src shift)); 9337 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} 9338 ins_encode %{ 9339 int vector_len = 1; 9340 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9341 %} 9342 ins_pipe( pipe_slow ); 9343 %} 9344 9345 instruct vsll16I_reg(vecZ dst, vecZ src, vecS shift) %{ 9346 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 9347 match(Set dst (LShiftVI src shift)); 9348 format %{ "vpslld $dst,$src,$shift\t! left shift packed16I" %} 9349 ins_encode %{ 9350 int vector_len = 2; 9351 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9352 %} 9353 ins_pipe( pipe_slow ); 9354 %} 9355 9356 instruct vsll16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 9357 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 9358 match(Set dst (LShiftVI src shift)); 9359 format %{ "vpslld $dst,$src,$shift\t! left shift packed16I" %} 9360 ins_encode %{ 9361 int vector_len = 2; 9362 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9363 %} 9364 ins_pipe( pipe_slow ); 9365 %} 9366 9367 // Longs vector left shift 9368 instruct vsll2L(vecX dst, vecS shift) %{ 9369 predicate(n->as_Vector()->length() == 2); 9370 match(Set dst (LShiftVL dst shift)); 9371 format %{ "psllq $dst,$shift\t! left shift packed2L" %} 9372 ins_encode %{ 9373 __ psllq($dst$$XMMRegister, $shift$$XMMRegister); 9374 %} 9375 ins_pipe( pipe_slow ); 9376 %} 9377 9378 instruct vsll2L_imm(vecX dst, immI8 shift) %{ 9379 predicate(n->as_Vector()->length() == 2); 9380 match(Set dst (LShiftVL dst shift)); 9381 format %{ "psllq $dst,$shift\t! left shift packed2L" %} 9382 ins_encode %{ 9383 __ psllq($dst$$XMMRegister, (int)$shift$$constant); 9384 %} 9385 ins_pipe( pipe_slow ); 9386 %} 9387 9388 instruct vsll2L_reg(vecX dst, vecX src, vecS shift) %{ 9389 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9390 match(Set dst (LShiftVL src shift)); 9391 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} 9392 ins_encode %{ 9393 int vector_len = 0; 9394 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9395 %} 9396 ins_pipe( pipe_slow ); 9397 %} 9398 9399 instruct vsll2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ 9400 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9401 match(Set dst (LShiftVL src shift)); 9402 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} 9403 ins_encode %{ 9404 int vector_len = 0; 9405 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9406 %} 9407 ins_pipe( pipe_slow ); 9408 %} 9409 9410 instruct vsll4L_reg(vecY dst, vecY src, vecS shift) %{ 9411 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 9412 match(Set dst (LShiftVL src shift)); 9413 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} 9414 ins_encode %{ 9415 int vector_len = 1; 9416 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9417 %} 9418 ins_pipe( pipe_slow ); 9419 %} 9420 9421 instruct vsll4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ 9422 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 9423 match(Set dst (LShiftVL src shift)); 9424 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} 9425 ins_encode %{ 9426 int vector_len = 1; 9427 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9428 %} 9429 ins_pipe( pipe_slow ); 9430 %} 9431 9432 instruct vsll8L_reg(vecZ dst, vecZ src, vecS shift) %{ 9433 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 9434 match(Set dst (LShiftVL src shift)); 9435 format %{ "vpsllq $dst,$src,$shift\t! left shift packed8L" %} 9436 ins_encode %{ 9437 int vector_len = 2; 9438 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9439 %} 9440 ins_pipe( pipe_slow ); 9441 %} 9442 9443 instruct vsll8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 9444 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 9445 match(Set dst (LShiftVL src shift)); 9446 format %{ "vpsllq $dst,$src,$shift\t! left shift packed8L" %} 9447 ins_encode %{ 9448 int vector_len = 2; 9449 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9450 %} 9451 ins_pipe( pipe_slow ); 9452 %} 9453 9454 // ----------------------- LogicalRightShift ----------------------------------- 9455 9456 // Shorts vector logical right shift produces incorrect Java result 9457 // for negative data because java code convert short value into int with 9458 // sign extension before a shift. But char vectors are fine since chars are 9459 // unsigned values. 9460 9461 instruct vsrl2S(vecS dst, vecS shift) %{ 9462 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 9463 match(Set dst (URShiftVS dst shift)); 9464 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} 9465 ins_encode %{ 9466 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 9467 %} 9468 ins_pipe( pipe_slow ); 9469 %} 9470 9471 instruct vsrl2S_imm(vecS dst, immI8 shift) %{ 9472 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 9473 match(Set dst (URShiftVS dst shift)); 9474 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} 9475 ins_encode %{ 9476 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 9477 %} 9478 ins_pipe( pipe_slow ); 9479 %} 9480 9481 instruct vsrl2S_reg_avx(vecS dst, vecS src, vecS shift) %{ 9482 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 9483 match(Set dst (URShiftVS src shift)); 9484 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 9485 ins_encode %{ 9486 int vector_len = 0; 9487 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9488 %} 9489 ins_pipe( pipe_slow ); 9490 %} 9491 9492 instruct vsrl2S_reg_evex(vecS dst, vecS src, vecS shift) %{ 9493 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 9494 match(Set dst (URShiftVS src shift)); 9495 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 9496 ins_encode %{ 9497 int vector_len = 0; 9498 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9499 %} 9500 ins_pipe( pipe_slow ); 9501 %} 9502 9503 instruct vsrl2S_reg_evex_special(vecS dst, vecS src, vecS shift) %{ 9504 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 9505 match(Set dst (URShiftVS dst shift)); 9506 effect(TEMP src); 9507 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 9508 ins_encode %{ 9509 int vector_len = 0; 9510 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9511 %} 9512 ins_pipe( pipe_slow ); 9513 %} 9514 9515 instruct vsrl2S_reg_imm_avx(vecS dst, vecS src, immI8 shift) %{ 9516 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 9517 match(Set dst (URShiftVS src shift)); 9518 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 9519 ins_encode %{ 9520 int vector_len = 0; 9521 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9522 %} 9523 ins_pipe( pipe_slow ); 9524 %} 9525 9526 instruct vsrl2S_reg_imm_evex(vecS dst, vecS src, immI8 shift) %{ 9527 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 9528 match(Set dst (URShiftVS src shift)); 9529 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 9530 ins_encode %{ 9531 int vector_len = 0; 9532 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9533 %} 9534 ins_pipe( pipe_slow ); 9535 %} 9536 9537 instruct vsrl2S_reg_imm_evex_special(vecS dst, vecS src, immI8 shift) %{ 9538 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 9539 match(Set dst (URShiftVS dst shift)); 9540 effect(TEMP src); 9541 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 9542 ins_encode %{ 9543 int vector_len = 0; 9544 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9545 %} 9546 ins_pipe( pipe_slow ); 9547 %} 9548 9549 instruct vsrl4S(vecD dst, vecS shift) %{ 9550 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 9551 match(Set dst (URShiftVS dst shift)); 9552 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} 9553 ins_encode %{ 9554 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 9555 %} 9556 ins_pipe( pipe_slow ); 9557 %} 9558 9559 instruct vsrl4S_imm(vecD dst, immI8 shift) %{ 9560 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 9561 match(Set dst (URShiftVS dst shift)); 9562 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} 9563 ins_encode %{ 9564 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 9565 %} 9566 ins_pipe( pipe_slow ); 9567 %} 9568 9569 instruct vsrl4S_reg_avx(vecD dst, vecD src, vecS shift) %{ 9570 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 9571 match(Set dst (URShiftVS src shift)); 9572 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 9573 ins_encode %{ 9574 int vector_len = 0; 9575 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9576 %} 9577 ins_pipe( pipe_slow ); 9578 %} 9579 9580 instruct vsrl4S_reg_evex(vecD dst, vecD src, vecS shift) %{ 9581 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 9582 match(Set dst (URShiftVS src shift)); 9583 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 9584 ins_encode %{ 9585 int vector_len = 0; 9586 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9587 %} 9588 ins_pipe( pipe_slow ); 9589 %} 9590 9591 instruct vsrl4S_reg_evex_special(vecD dst, vecD src, vecS shift) %{ 9592 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 9593 match(Set dst (URShiftVS dst shift)); 9594 effect(TEMP src); 9595 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 9596 ins_encode %{ 9597 int vector_len = 0; 9598 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9599 %} 9600 ins_pipe( pipe_slow ); 9601 %} 9602 9603 instruct vsrl4S_reg_imm_avx(vecD dst, vecD src, immI8 shift) %{ 9604 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 9605 match(Set dst (URShiftVS src shift)); 9606 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 9607 ins_encode %{ 9608 int vector_len = 0; 9609 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9610 %} 9611 ins_pipe( pipe_slow ); 9612 %} 9613 9614 instruct vsrl4S_reg_imm_evex(vecD dst, vecD src, immI8 shift) %{ 9615 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 9616 match(Set dst (URShiftVS src shift)); 9617 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 9618 ins_encode %{ 9619 int vector_len = 0; 9620 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9621 %} 9622 ins_pipe( pipe_slow ); 9623 %} 9624 9625 instruct vsrl4S_reg_imm_evex_special(vecD dst, vecD src, immI8 shift) %{ 9626 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 9627 match(Set dst (URShiftVS dst shift)); 9628 effect(TEMP src); 9629 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 9630 ins_encode %{ 9631 int vector_len = 0; 9632 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9633 %} 9634 ins_pipe( pipe_slow ); 9635 %} 9636 9637 instruct vsrl8S(vecX dst, vecS shift) %{ 9638 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 9639 match(Set dst (URShiftVS dst shift)); 9640 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} 9641 ins_encode %{ 9642 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 9643 %} 9644 ins_pipe( pipe_slow ); 9645 %} 9646 9647 instruct vsrl8S_imm(vecX dst, immI8 shift) %{ 9648 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 9649 match(Set dst (URShiftVS dst shift)); 9650 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} 9651 ins_encode %{ 9652 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 9653 %} 9654 ins_pipe( pipe_slow ); 9655 %} 9656 9657 instruct vsrl8S_reg_avx(vecX dst, vecX src, vecS shift) %{ 9658 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 9659 match(Set dst (URShiftVS src shift)); 9660 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 9661 ins_encode %{ 9662 int vector_len = 0; 9663 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9664 %} 9665 ins_pipe( pipe_slow ); 9666 %} 9667 9668 instruct vsrl8S_reg_evex(vecX dst, vecX src, vecS shift) %{ 9669 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 9670 match(Set dst (URShiftVS src shift)); 9671 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 9672 ins_encode %{ 9673 int vector_len = 0; 9674 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9675 %} 9676 ins_pipe( pipe_slow ); 9677 %} 9678 9679 instruct vsrl8S_reg_evex_special(vecX dst, vecX src, vecS shift) %{ 9680 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 9681 match(Set dst (URShiftVS dst shift)); 9682 effect(TEMP src); 9683 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 9684 ins_encode %{ 9685 int vector_len = 0; 9686 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9687 %} 9688 ins_pipe( pipe_slow ); 9689 %} 9690 9691 instruct vsrl8S_reg_imm_avx(vecX dst, vecX src, immI8 shift) %{ 9692 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 9693 match(Set dst (URShiftVS src shift)); 9694 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 9695 ins_encode %{ 9696 int vector_len = 0; 9697 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9698 %} 9699 ins_pipe( pipe_slow ); 9700 %} 9701 9702 instruct vsrl8S_reg_imm_evex(vecX dst, vecX src, immI8 shift) %{ 9703 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 9704 match(Set dst (URShiftVS src shift)); 9705 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 9706 ins_encode %{ 9707 int vector_len = 0; 9708 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9709 %} 9710 ins_pipe( pipe_slow ); 9711 %} 9712 9713 instruct vsrl8S_reg_imm_evex_special(vecX dst, vecX src, immI8 shift) %{ 9714 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 9715 match(Set dst (URShiftVS dst shift)); 9716 effect(TEMP src); 9717 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 9718 ins_encode %{ 9719 int vector_len = 0; 9720 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9721 %} 9722 ins_pipe( pipe_slow ); 9723 %} 9724 9725 instruct vsrl16S_reg_avx(vecY dst, vecY src, vecS shift) %{ 9726 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 9727 match(Set dst (URShiftVS src shift)); 9728 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 9729 ins_encode %{ 9730 int vector_len = 1; 9731 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9732 %} 9733 ins_pipe( pipe_slow ); 9734 %} 9735 9736 instruct vsrl16S_reg_evex(vecY dst, vecY src, vecS shift) %{ 9737 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 9738 match(Set dst (URShiftVS src shift)); 9739 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 9740 ins_encode %{ 9741 int vector_len = 1; 9742 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9743 %} 9744 ins_pipe( pipe_slow ); 9745 %} 9746 9747 instruct vsrl16S_reg_evex_special(vecY dst, vecY src, vecS shift) %{ 9748 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 9749 match(Set dst (URShiftVS dst shift)); 9750 effect(TEMP src); 9751 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 9752 ins_encode %{ 9753 int vector_len = 1; 9754 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9755 %} 9756 ins_pipe( pipe_slow ); 9757 %} 9758 9759 instruct vsrl16S_reg_imm_avx(vecY dst, vecY src, immI8 shift) %{ 9760 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 9761 match(Set dst (URShiftVS src shift)); 9762 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 9763 ins_encode %{ 9764 int vector_len = 1; 9765 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9766 %} 9767 ins_pipe( pipe_slow ); 9768 %} 9769 9770 instruct vsrl16S_reg_imm_evex(vecY dst, vecY src, immI8 shift) %{ 9771 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 9772 match(Set dst (URShiftVS src shift)); 9773 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 9774 ins_encode %{ 9775 int vector_len = 1; 9776 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9777 %} 9778 ins_pipe( pipe_slow ); 9779 %} 9780 9781 instruct vsrl16S_reg_imm_evex_special(vecY dst, vecY src, immI8 shift) %{ 9782 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 9783 match(Set dst (URShiftVS dst shift)); 9784 effect(TEMP src); 9785 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 9786 ins_encode %{ 9787 int vector_len = 1; 9788 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9789 %} 9790 ins_pipe( pipe_slow ); 9791 %} 9792 9793 instruct vsrl32S_reg(vecZ dst, vecZ src, vecS shift) %{ 9794 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 9795 match(Set dst (URShiftVS src shift)); 9796 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %} 9797 ins_encode %{ 9798 int vector_len = 2; 9799 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9800 %} 9801 ins_pipe( pipe_slow ); 9802 %} 9803 9804 instruct vsrl32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 9805 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 9806 match(Set dst (URShiftVS src shift)); 9807 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %} 9808 ins_encode %{ 9809 int vector_len = 2; 9810 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9811 %} 9812 ins_pipe( pipe_slow ); 9813 %} 9814 9815 // Integers vector logical right shift 9816 instruct vsrl2I(vecD dst, vecS shift) %{ 9817 predicate(n->as_Vector()->length() == 2); 9818 match(Set dst (URShiftVI dst shift)); 9819 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} 9820 ins_encode %{ 9821 __ psrld($dst$$XMMRegister, $shift$$XMMRegister); 9822 %} 9823 ins_pipe( pipe_slow ); 9824 %} 9825 9826 instruct vsrl2I_imm(vecD dst, immI8 shift) %{ 9827 predicate(n->as_Vector()->length() == 2); 9828 match(Set dst (URShiftVI dst shift)); 9829 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} 9830 ins_encode %{ 9831 __ psrld($dst$$XMMRegister, (int)$shift$$constant); 9832 %} 9833 ins_pipe( pipe_slow ); 9834 %} 9835 9836 instruct vsrl2I_reg(vecD dst, vecD src, vecS shift) %{ 9837 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9838 match(Set dst (URShiftVI src shift)); 9839 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} 9840 ins_encode %{ 9841 int vector_len = 0; 9842 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9843 %} 9844 ins_pipe( pipe_slow ); 9845 %} 9846 9847 instruct vsrl2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 9848 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9849 match(Set dst (URShiftVI src shift)); 9850 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} 9851 ins_encode %{ 9852 int vector_len = 0; 9853 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9854 %} 9855 ins_pipe( pipe_slow ); 9856 %} 9857 9858 instruct vsrl4I(vecX dst, vecS shift) %{ 9859 predicate(n->as_Vector()->length() == 4); 9860 match(Set dst (URShiftVI dst shift)); 9861 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} 9862 ins_encode %{ 9863 __ psrld($dst$$XMMRegister, $shift$$XMMRegister); 9864 %} 9865 ins_pipe( pipe_slow ); 9866 %} 9867 9868 instruct vsrl4I_imm(vecX dst, immI8 shift) %{ 9869 predicate(n->as_Vector()->length() == 4); 9870 match(Set dst (URShiftVI dst shift)); 9871 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} 9872 ins_encode %{ 9873 __ psrld($dst$$XMMRegister, (int)$shift$$constant); 9874 %} 9875 ins_pipe( pipe_slow ); 9876 %} 9877 9878 instruct vsrl4I_reg(vecX dst, vecX src, vecS shift) %{ 9879 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9880 match(Set dst (URShiftVI src shift)); 9881 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} 9882 ins_encode %{ 9883 int vector_len = 0; 9884 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9885 %} 9886 ins_pipe( pipe_slow ); 9887 %} 9888 9889 instruct vsrl4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 9890 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9891 match(Set dst (URShiftVI src shift)); 9892 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} 9893 ins_encode %{ 9894 int vector_len = 0; 9895 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9896 %} 9897 ins_pipe( pipe_slow ); 9898 %} 9899 9900 instruct vsrl8I_reg(vecY dst, vecY src, vecS shift) %{ 9901 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 9902 match(Set dst (URShiftVI src shift)); 9903 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} 9904 ins_encode %{ 9905 int vector_len = 1; 9906 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9907 %} 9908 ins_pipe( pipe_slow ); 9909 %} 9910 9911 instruct vsrl8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 9912 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 9913 match(Set dst (URShiftVI src shift)); 9914 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} 9915 ins_encode %{ 9916 int vector_len = 1; 9917 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9918 %} 9919 ins_pipe( pipe_slow ); 9920 %} 9921 9922 instruct vsrl16I_reg(vecZ dst, vecZ src, vecS shift) %{ 9923 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 9924 match(Set dst (URShiftVI src shift)); 9925 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed16I" %} 9926 ins_encode %{ 9927 int vector_len = 2; 9928 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9929 %} 9930 ins_pipe( pipe_slow ); 9931 %} 9932 9933 instruct vsrl16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 9934 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 9935 match(Set dst (URShiftVI src shift)); 9936 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed16I" %} 9937 ins_encode %{ 9938 int vector_len = 2; 9939 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9940 %} 9941 ins_pipe( pipe_slow ); 9942 %} 9943 9944 // Longs vector logical right shift 9945 instruct vsrl2L(vecX dst, vecS shift) %{ 9946 predicate(n->as_Vector()->length() == 2); 9947 match(Set dst (URShiftVL dst shift)); 9948 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} 9949 ins_encode %{ 9950 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister); 9951 %} 9952 ins_pipe( pipe_slow ); 9953 %} 9954 9955 instruct vsrl2L_imm(vecX dst, immI8 shift) %{ 9956 predicate(n->as_Vector()->length() == 2); 9957 match(Set dst (URShiftVL dst shift)); 9958 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} 9959 ins_encode %{ 9960 __ psrlq($dst$$XMMRegister, (int)$shift$$constant); 9961 %} 9962 ins_pipe( pipe_slow ); 9963 %} 9964 9965 instruct vsrl2L_reg(vecX dst, vecX src, vecS shift) %{ 9966 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9967 match(Set dst (URShiftVL src shift)); 9968 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} 9969 ins_encode %{ 9970 int vector_len = 0; 9971 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9972 %} 9973 ins_pipe( pipe_slow ); 9974 %} 9975 9976 instruct vsrl2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ 9977 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9978 match(Set dst (URShiftVL src shift)); 9979 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} 9980 ins_encode %{ 9981 int vector_len = 0; 9982 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9983 %} 9984 ins_pipe( pipe_slow ); 9985 %} 9986 9987 instruct vsrl4L_reg(vecY dst, vecY src, vecS shift) %{ 9988 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 9989 match(Set dst (URShiftVL src shift)); 9990 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} 9991 ins_encode %{ 9992 int vector_len = 1; 9993 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9994 %} 9995 ins_pipe( pipe_slow ); 9996 %} 9997 9998 instruct vsrl4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ 9999 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 10000 match(Set dst (URShiftVL src shift)); 10001 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} 10002 ins_encode %{ 10003 int vector_len = 1; 10004 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10005 %} 10006 ins_pipe( pipe_slow ); 10007 %} 10008 10009 instruct vsrl8L_reg(vecZ dst, vecZ src, vecS shift) %{ 10010 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 10011 match(Set dst (URShiftVL src shift)); 10012 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed8L" %} 10013 ins_encode %{ 10014 int vector_len = 2; 10015 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10016 %} 10017 ins_pipe( pipe_slow ); 10018 %} 10019 10020 instruct vsrl8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 10021 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 10022 match(Set dst (URShiftVL src shift)); 10023 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed8L" %} 10024 ins_encode %{ 10025 int vector_len = 2; 10026 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10027 %} 10028 ins_pipe( pipe_slow ); 10029 %} 10030 10031 // ------------------- ArithmeticRightShift ----------------------------------- 10032 10033 // Shorts/Chars vector arithmetic right shift 10034 instruct vsra2S(vecS dst, vecS shift) %{ 10035 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 10036 match(Set dst (RShiftVS dst shift)); 10037 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} 10038 ins_encode %{ 10039 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 10040 %} 10041 ins_pipe( pipe_slow ); 10042 %} 10043 10044 instruct vsra2S_imm(vecS dst, immI8 shift) %{ 10045 predicate(n->as_Vector()->length() == 2); 10046 match(Set dst (RShiftVS dst shift)); 10047 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} 10048 ins_encode %{ 10049 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 10050 %} 10051 ins_pipe( pipe_slow ); 10052 %} 10053 10054 instruct vsra2S_reg_avx(vecS dst, vecS src, vecS shift) %{ 10055 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 10056 match(Set dst (RShiftVS src shift)); 10057 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 10058 ins_encode %{ 10059 int vector_len = 0; 10060 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10061 %} 10062 ins_pipe( pipe_slow ); 10063 %} 10064 10065 instruct vsra2S_reg_evex(vecS dst, vecS src, vecS shift) %{ 10066 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 10067 match(Set dst (RShiftVS src shift)); 10068 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 10069 ins_encode %{ 10070 int vector_len = 0; 10071 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10072 %} 10073 ins_pipe( pipe_slow ); 10074 %} 10075 10076 instruct vsra2S_reg_evex_special(vecS dst, vecS src, vecS shift) %{ 10077 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 10078 match(Set dst (RShiftVS dst shift)); 10079 effect(TEMP src); 10080 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 10081 ins_encode %{ 10082 int vector_len = 0; 10083 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10084 %} 10085 ins_pipe( pipe_slow ); 10086 %} 10087 10088 instruct vsra2S_reg_imm_avx(vecS dst, vecS src, immI8 shift) %{ 10089 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 10090 match(Set dst (RShiftVS src shift)); 10091 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 10092 ins_encode %{ 10093 int vector_len = 0; 10094 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10095 %} 10096 ins_pipe( pipe_slow ); 10097 %} 10098 10099 instruct vsra2S_reg_imm_evex(vecS dst, vecS src, immI8 shift) %{ 10100 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 10101 match(Set dst (RShiftVS src shift)); 10102 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 10103 ins_encode %{ 10104 int vector_len = 0; 10105 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10106 %} 10107 ins_pipe( pipe_slow ); 10108 %} 10109 10110 instruct vsra2S_reg_imm_evex_special(vecS dst, vecS src, immI8 shift) %{ 10111 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 10112 match(Set dst (RShiftVS dst shift)); 10113 effect(TEMP src); 10114 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 10115 ins_encode %{ 10116 int vector_len = 0; 10117 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10118 %} 10119 ins_pipe( pipe_slow ); 10120 %} 10121 10122 instruct vsra4S(vecD dst, vecS shift) %{ 10123 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 10124 match(Set dst (RShiftVS dst shift)); 10125 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} 10126 ins_encode %{ 10127 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 10128 %} 10129 ins_pipe( pipe_slow ); 10130 %} 10131 10132 instruct vsra4S_imm(vecD dst, immI8 shift) %{ 10133 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 10134 match(Set dst (RShiftVS dst shift)); 10135 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} 10136 ins_encode %{ 10137 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 10138 %} 10139 ins_pipe( pipe_slow ); 10140 %} 10141 10142 instruct vsra4S_reg_avx(vecD dst, vecD src, vecS shift) %{ 10143 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 10144 match(Set dst (RShiftVS src shift)); 10145 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 10146 ins_encode %{ 10147 int vector_len = 0; 10148 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10149 %} 10150 ins_pipe( pipe_slow ); 10151 %} 10152 10153 instruct vsra4S_reg_evex(vecD dst, vecD src, vecS shift) %{ 10154 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 10155 match(Set dst (RShiftVS src shift)); 10156 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 10157 ins_encode %{ 10158 int vector_len = 0; 10159 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10160 %} 10161 ins_pipe( pipe_slow ); 10162 %} 10163 10164 instruct vsra4S_reg_evex_special(vecD dst, vecD src, vecS shift) %{ 10165 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 10166 match(Set dst (RShiftVS dst shift)); 10167 effect(TEMP src); 10168 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 10169 ins_encode %{ 10170 int vector_len = 0; 10171 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10172 %} 10173 ins_pipe( pipe_slow ); 10174 %} 10175 10176 instruct vsra4S_reg_imm_avx(vecD dst, vecD src, immI8 shift) %{ 10177 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 10178 match(Set dst (RShiftVS src shift)); 10179 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 10180 ins_encode %{ 10181 int vector_len = 0; 10182 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10183 %} 10184 ins_pipe( pipe_slow ); 10185 %} 10186 10187 instruct vsra4S_reg_imm_evex(vecD dst, vecD src, immI8 shift) %{ 10188 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 10189 match(Set dst (RShiftVS src shift)); 10190 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 10191 ins_encode %{ 10192 int vector_len = 0; 10193 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10194 %} 10195 ins_pipe( pipe_slow ); 10196 %} 10197 10198 instruct vsra4S_reg_imm_evex_special(vecD dst, vecD src, immI8 shift) %{ 10199 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 10200 match(Set dst (RShiftVS dst shift)); 10201 effect(TEMP src); 10202 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 10203 ins_encode %{ 10204 int vector_len = 0; 10205 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10206 %} 10207 ins_pipe( pipe_slow ); 10208 %} 10209 10210 instruct vsra8S(vecX dst, vecS shift) %{ 10211 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 10212 match(Set dst (RShiftVS dst shift)); 10213 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} 10214 ins_encode %{ 10215 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 10216 %} 10217 ins_pipe( pipe_slow ); 10218 %} 10219 10220 instruct vsra8S_imm(vecX dst, immI8 shift) %{ 10221 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 10222 match(Set dst (RShiftVS dst shift)); 10223 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} 10224 ins_encode %{ 10225 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 10226 %} 10227 ins_pipe( pipe_slow ); 10228 %} 10229 10230 instruct vsra8S_reg_avx(vecX dst, vecX src, vecS shift) %{ 10231 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 10232 match(Set dst (RShiftVS src shift)); 10233 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 10234 ins_encode %{ 10235 int vector_len = 0; 10236 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10237 %} 10238 ins_pipe( pipe_slow ); 10239 %} 10240 10241 instruct vsra8S_reg_evex(vecX dst, vecX src, vecS shift) %{ 10242 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 10243 match(Set dst (RShiftVS src shift)); 10244 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 10245 ins_encode %{ 10246 int vector_len = 0; 10247 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10248 %} 10249 ins_pipe( pipe_slow ); 10250 %} 10251 10252 instruct vsra8S_reg_evex_special(vecX dst, vecX src, vecS shift) %{ 10253 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 10254 match(Set dst (RShiftVS dst shift)); 10255 effect(TEMP src); 10256 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 10257 ins_encode %{ 10258 int vector_len = 0; 10259 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10260 %} 10261 ins_pipe( pipe_slow ); 10262 %} 10263 10264 instruct vsra8S_reg_imm_avx(vecX dst, vecX src, immI8 shift) %{ 10265 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 10266 match(Set dst (RShiftVS src shift)); 10267 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 10268 ins_encode %{ 10269 int vector_len = 0; 10270 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10271 %} 10272 ins_pipe( pipe_slow ); 10273 %} 10274 10275 instruct vsra8S_reg_imm_evex(vecX dst, vecX src, immI8 shift) %{ 10276 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 10277 match(Set dst (RShiftVS src shift)); 10278 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 10279 ins_encode %{ 10280 int vector_len = 0; 10281 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10282 %} 10283 ins_pipe( pipe_slow ); 10284 %} 10285 10286 instruct vsra8S_reg_imm_evex_special(vecX dst, vecX src, immI8 shift) %{ 10287 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 10288 match(Set dst (RShiftVS dst shift)); 10289 effect(TEMP src); 10290 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 10291 ins_encode %{ 10292 int vector_len = 0; 10293 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10294 %} 10295 ins_pipe( pipe_slow ); 10296 %} 10297 10298 instruct vsra16S_reg_avx(vecY dst, vecY src, vecS shift) %{ 10299 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 10300 match(Set dst (RShiftVS src shift)); 10301 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 10302 ins_encode %{ 10303 int vector_len = 1; 10304 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10305 %} 10306 ins_pipe( pipe_slow ); 10307 %} 10308 10309 instruct vsra16S_reg_evex(vecY dst, vecY src, vecS shift) %{ 10310 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 10311 match(Set dst (RShiftVS src shift)); 10312 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 10313 ins_encode %{ 10314 int vector_len = 1; 10315 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10316 %} 10317 ins_pipe( pipe_slow ); 10318 %} 10319 10320 instruct vsra16S_reg_evex_special(vecY dst, vecY src, vecS shift) %{ 10321 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 10322 match(Set dst (RShiftVS dst shift)); 10323 effect(TEMP src); 10324 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 10325 ins_encode %{ 10326 int vector_len = 1; 10327 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10328 %} 10329 ins_pipe( pipe_slow ); 10330 %} 10331 10332 instruct vsra16S_reg_imm_avx(vecY dst, vecY src, immI8 shift) %{ 10333 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); 10334 match(Set dst (RShiftVS src shift)); 10335 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 10336 ins_encode %{ 10337 int vector_len = 1; 10338 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10339 %} 10340 ins_pipe( pipe_slow ); 10341 %} 10342 10343 instruct vsra16S_reg_imm_evex(vecY dst, vecY src, immI8 shift) %{ 10344 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 10345 match(Set dst (RShiftVS src shift)); 10346 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 10347 ins_encode %{ 10348 int vector_len = 1; 10349 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10350 %} 10351 ins_pipe( pipe_slow ); 10352 %} 10353 10354 instruct vsra16S_reg_imm_evex_special(vecY dst, vecY src, immI8 shift) %{ 10355 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 10356 match(Set dst (RShiftVS dst shift)); 10357 effect(TEMP src); 10358 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 10359 ins_encode %{ 10360 int vector_len = 1; 10361 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10362 %} 10363 ins_pipe( pipe_slow ); 10364 %} 10365 10366 instruct vsra32S_reg(vecZ dst, vecZ src, vecS shift) %{ 10367 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 10368 match(Set dst (RShiftVS src shift)); 10369 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %} 10370 ins_encode %{ 10371 int vector_len = 2; 10372 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10373 %} 10374 ins_pipe( pipe_slow ); 10375 %} 10376 10377 instruct vsra32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 10378 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 10379 match(Set dst (RShiftVS src shift)); 10380 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %} 10381 ins_encode %{ 10382 int vector_len = 2; 10383 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10384 %} 10385 ins_pipe( pipe_slow ); 10386 %} 10387 10388 // Integers vector arithmetic right shift 10389 instruct vsra2I(vecD dst, vecS shift) %{ 10390 predicate(n->as_Vector()->length() == 2); 10391 match(Set dst (RShiftVI dst shift)); 10392 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} 10393 ins_encode %{ 10394 __ psrad($dst$$XMMRegister, $shift$$XMMRegister); 10395 %} 10396 ins_pipe( pipe_slow ); 10397 %} 10398 10399 instruct vsra2I_imm(vecD dst, immI8 shift) %{ 10400 predicate(n->as_Vector()->length() == 2); 10401 match(Set dst (RShiftVI dst shift)); 10402 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} 10403 ins_encode %{ 10404 __ psrad($dst$$XMMRegister, (int)$shift$$constant); 10405 %} 10406 ins_pipe( pipe_slow ); 10407 %} 10408 10409 instruct vsra2I_reg(vecD dst, vecD src, vecS shift) %{ 10410 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 10411 match(Set dst (RShiftVI src shift)); 10412 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} 10413 ins_encode %{ 10414 int vector_len = 0; 10415 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10416 %} 10417 ins_pipe( pipe_slow ); 10418 %} 10419 10420 instruct vsra2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 10421 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 10422 match(Set dst (RShiftVI src shift)); 10423 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} 10424 ins_encode %{ 10425 int vector_len = 0; 10426 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10427 %} 10428 ins_pipe( pipe_slow ); 10429 %} 10430 10431 instruct vsra4I(vecX dst, vecS shift) %{ 10432 predicate(n->as_Vector()->length() == 4); 10433 match(Set dst (RShiftVI dst shift)); 10434 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} 10435 ins_encode %{ 10436 __ psrad($dst$$XMMRegister, $shift$$XMMRegister); 10437 %} 10438 ins_pipe( pipe_slow ); 10439 %} 10440 10441 instruct vsra4I_imm(vecX dst, immI8 shift) %{ 10442 predicate(n->as_Vector()->length() == 4); 10443 match(Set dst (RShiftVI dst shift)); 10444 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} 10445 ins_encode %{ 10446 __ psrad($dst$$XMMRegister, (int)$shift$$constant); 10447 %} 10448 ins_pipe( pipe_slow ); 10449 %} 10450 10451 instruct vsra4I_reg(vecX dst, vecX src, vecS shift) %{ 10452 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 10453 match(Set dst (RShiftVI src shift)); 10454 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} 10455 ins_encode %{ 10456 int vector_len = 0; 10457 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10458 %} 10459 ins_pipe( pipe_slow ); 10460 %} 10461 10462 instruct vsra4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 10463 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 10464 match(Set dst (RShiftVI src shift)); 10465 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} 10466 ins_encode %{ 10467 int vector_len = 0; 10468 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10469 %} 10470 ins_pipe( pipe_slow ); 10471 %} 10472 10473 instruct vsra8I_reg(vecY dst, vecY src, vecS shift) %{ 10474 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 10475 match(Set dst (RShiftVI src shift)); 10476 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} 10477 ins_encode %{ 10478 int vector_len = 1; 10479 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10480 %} 10481 ins_pipe( pipe_slow ); 10482 %} 10483 10484 instruct vsra8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 10485 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 10486 match(Set dst (RShiftVI src shift)); 10487 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} 10488 ins_encode %{ 10489 int vector_len = 1; 10490 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10491 %} 10492 ins_pipe( pipe_slow ); 10493 %} 10494 10495 instruct vsra16I_reg(vecZ dst, vecZ src, vecS shift) %{ 10496 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 10497 match(Set dst (RShiftVI src shift)); 10498 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed16I" %} 10499 ins_encode %{ 10500 int vector_len = 2; 10501 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10502 %} 10503 ins_pipe( pipe_slow ); 10504 %} 10505 10506 instruct vsra16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 10507 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 10508 match(Set dst (RShiftVI src shift)); 10509 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed16I" %} 10510 ins_encode %{ 10511 int vector_len = 2; 10512 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10513 %} 10514 ins_pipe( pipe_slow ); 10515 %} 10516 10517 // There are no longs vector arithmetic right shift instructions. 10518 10519 10520 // --------------------------------- AND -------------------------------------- 10521 10522 instruct vand4B(vecS dst, vecS src) %{ 10523 predicate(n->as_Vector()->length_in_bytes() == 4); 10524 match(Set dst (AndV dst src)); 10525 format %{ "pand $dst,$src\t! and vectors (4 bytes)" %} 10526 ins_encode %{ 10527 __ pand($dst$$XMMRegister, $src$$XMMRegister); 10528 %} 10529 ins_pipe( pipe_slow ); 10530 %} 10531 10532 instruct vand4B_reg(vecS dst, vecS src1, vecS src2) %{ 10533 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 10534 match(Set dst (AndV src1 src2)); 10535 format %{ "vpand $dst,$src1,$src2\t! and vectors (4 bytes)" %} 10536 ins_encode %{ 10537 int vector_len = 0; 10538 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10539 %} 10540 ins_pipe( pipe_slow ); 10541 %} 10542 10543 instruct vand4B_mem(vecS dst, vecS src, memory mem) %{ 10544 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 10545 match(Set dst (AndV src (LoadVector mem))); 10546 format %{ "vpand $dst,$src,$mem\t! and vectors (4 bytes)" %} 10547 ins_encode %{ 10548 int vector_len = 0; 10549 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10550 %} 10551 ins_pipe( pipe_slow ); 10552 %} 10553 10554 instruct vand8B(vecD dst, vecD src) %{ 10555 predicate(n->as_Vector()->length_in_bytes() == 8); 10556 match(Set dst (AndV dst src)); 10557 format %{ "pand $dst,$src\t! and vectors (8 bytes)" %} 10558 ins_encode %{ 10559 __ pand($dst$$XMMRegister, $src$$XMMRegister); 10560 %} 10561 ins_pipe( pipe_slow ); 10562 %} 10563 10564 instruct vand8B_reg(vecD dst, vecD src1, vecD src2) %{ 10565 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 10566 match(Set dst (AndV src1 src2)); 10567 format %{ "vpand $dst,$src1,$src2\t! and vectors (8 bytes)" %} 10568 ins_encode %{ 10569 int vector_len = 0; 10570 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10571 %} 10572 ins_pipe( pipe_slow ); 10573 %} 10574 10575 instruct vand8B_mem(vecD dst, vecD src, memory mem) %{ 10576 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 10577 match(Set dst (AndV src (LoadVector mem))); 10578 format %{ "vpand $dst,$src,$mem\t! and vectors (8 bytes)" %} 10579 ins_encode %{ 10580 int vector_len = 0; 10581 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10582 %} 10583 ins_pipe( pipe_slow ); 10584 %} 10585 10586 instruct vand16B(vecX dst, vecX src) %{ 10587 predicate(n->as_Vector()->length_in_bytes() == 16); 10588 match(Set dst (AndV dst src)); 10589 format %{ "pand $dst,$src\t! and vectors (16 bytes)" %} 10590 ins_encode %{ 10591 __ pand($dst$$XMMRegister, $src$$XMMRegister); 10592 %} 10593 ins_pipe( pipe_slow ); 10594 %} 10595 10596 instruct vand16B_reg(vecX dst, vecX src1, vecX src2) %{ 10597 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 10598 match(Set dst (AndV src1 src2)); 10599 format %{ "vpand $dst,$src1,$src2\t! and vectors (16 bytes)" %} 10600 ins_encode %{ 10601 int vector_len = 0; 10602 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10603 %} 10604 ins_pipe( pipe_slow ); 10605 %} 10606 10607 instruct vand16B_mem(vecX dst, vecX src, memory mem) %{ 10608 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 10609 match(Set dst (AndV src (LoadVector mem))); 10610 format %{ "vpand $dst,$src,$mem\t! and vectors (16 bytes)" %} 10611 ins_encode %{ 10612 int vector_len = 0; 10613 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10614 %} 10615 ins_pipe( pipe_slow ); 10616 %} 10617 10618 instruct vand32B_reg(vecY dst, vecY src1, vecY src2) %{ 10619 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 10620 match(Set dst (AndV src1 src2)); 10621 format %{ "vpand $dst,$src1,$src2\t! and vectors (32 bytes)" %} 10622 ins_encode %{ 10623 int vector_len = 1; 10624 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10625 %} 10626 ins_pipe( pipe_slow ); 10627 %} 10628 10629 instruct vand32B_mem(vecY dst, vecY src, memory mem) %{ 10630 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 10631 match(Set dst (AndV src (LoadVector mem))); 10632 format %{ "vpand $dst,$src,$mem\t! and vectors (32 bytes)" %} 10633 ins_encode %{ 10634 int vector_len = 1; 10635 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10636 %} 10637 ins_pipe( pipe_slow ); 10638 %} 10639 10640 instruct vand64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 10641 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 10642 match(Set dst (AndV src1 src2)); 10643 format %{ "vpand $dst,$src1,$src2\t! and vectors (64 bytes)" %} 10644 ins_encode %{ 10645 int vector_len = 2; 10646 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10647 %} 10648 ins_pipe( pipe_slow ); 10649 %} 10650 10651 instruct vand64B_mem(vecZ dst, vecZ src, memory mem) %{ 10652 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 10653 match(Set dst (AndV src (LoadVector mem))); 10654 format %{ "vpand $dst,$src,$mem\t! and vectors (64 bytes)" %} 10655 ins_encode %{ 10656 int vector_len = 2; 10657 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10658 %} 10659 ins_pipe( pipe_slow ); 10660 %} 10661 10662 // --------------------------------- OR --------------------------------------- 10663 10664 instruct vor4B(vecS dst, vecS src) %{ 10665 predicate(n->as_Vector()->length_in_bytes() == 4); 10666 match(Set dst (OrV dst src)); 10667 format %{ "por $dst,$src\t! or vectors (4 bytes)" %} 10668 ins_encode %{ 10669 __ por($dst$$XMMRegister, $src$$XMMRegister); 10670 %} 10671 ins_pipe( pipe_slow ); 10672 %} 10673 10674 instruct vor4B_reg(vecS dst, vecS src1, vecS src2) %{ 10675 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 10676 match(Set dst (OrV src1 src2)); 10677 format %{ "vpor $dst,$src1,$src2\t! or vectors (4 bytes)" %} 10678 ins_encode %{ 10679 int vector_len = 0; 10680 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10681 %} 10682 ins_pipe( pipe_slow ); 10683 %} 10684 10685 instruct vor4B_mem(vecS dst, vecS src, memory mem) %{ 10686 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 10687 match(Set dst (OrV src (LoadVector mem))); 10688 format %{ "vpor $dst,$src,$mem\t! or vectors (4 bytes)" %} 10689 ins_encode %{ 10690 int vector_len = 0; 10691 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10692 %} 10693 ins_pipe( pipe_slow ); 10694 %} 10695 10696 instruct vor8B(vecD dst, vecD src) %{ 10697 predicate(n->as_Vector()->length_in_bytes() == 8); 10698 match(Set dst (OrV dst src)); 10699 format %{ "por $dst,$src\t! or vectors (8 bytes)" %} 10700 ins_encode %{ 10701 __ por($dst$$XMMRegister, $src$$XMMRegister); 10702 %} 10703 ins_pipe( pipe_slow ); 10704 %} 10705 10706 instruct vor8B_reg(vecD dst, vecD src1, vecD src2) %{ 10707 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 10708 match(Set dst (OrV src1 src2)); 10709 format %{ "vpor $dst,$src1,$src2\t! or vectors (8 bytes)" %} 10710 ins_encode %{ 10711 int vector_len = 0; 10712 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10713 %} 10714 ins_pipe( pipe_slow ); 10715 %} 10716 10717 instruct vor8B_mem(vecD dst, vecD src, memory mem) %{ 10718 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 10719 match(Set dst (OrV src (LoadVector mem))); 10720 format %{ "vpor $dst,$src,$mem\t! or vectors (8 bytes)" %} 10721 ins_encode %{ 10722 int vector_len = 0; 10723 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10724 %} 10725 ins_pipe( pipe_slow ); 10726 %} 10727 10728 instruct vor16B(vecX dst, vecX src) %{ 10729 predicate(n->as_Vector()->length_in_bytes() == 16); 10730 match(Set dst (OrV dst src)); 10731 format %{ "por $dst,$src\t! or vectors (16 bytes)" %} 10732 ins_encode %{ 10733 __ por($dst$$XMMRegister, $src$$XMMRegister); 10734 %} 10735 ins_pipe( pipe_slow ); 10736 %} 10737 10738 instruct vor16B_reg(vecX dst, vecX src1, vecX src2) %{ 10739 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 10740 match(Set dst (OrV src1 src2)); 10741 format %{ "vpor $dst,$src1,$src2\t! or vectors (16 bytes)" %} 10742 ins_encode %{ 10743 int vector_len = 0; 10744 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10745 %} 10746 ins_pipe( pipe_slow ); 10747 %} 10748 10749 instruct vor16B_mem(vecX dst, vecX src, memory mem) %{ 10750 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 10751 match(Set dst (OrV src (LoadVector mem))); 10752 format %{ "vpor $dst,$src,$mem\t! or vectors (16 bytes)" %} 10753 ins_encode %{ 10754 int vector_len = 0; 10755 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10756 %} 10757 ins_pipe( pipe_slow ); 10758 %} 10759 10760 instruct vor32B_reg(vecY dst, vecY src1, vecY src2) %{ 10761 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 10762 match(Set dst (OrV src1 src2)); 10763 format %{ "vpor $dst,$src1,$src2\t! or vectors (32 bytes)" %} 10764 ins_encode %{ 10765 int vector_len = 1; 10766 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10767 %} 10768 ins_pipe( pipe_slow ); 10769 %} 10770 10771 instruct vor32B_mem(vecY dst, vecY src, memory mem) %{ 10772 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 10773 match(Set dst (OrV src (LoadVector mem))); 10774 format %{ "vpor $dst,$src,$mem\t! or vectors (32 bytes)" %} 10775 ins_encode %{ 10776 int vector_len = 1; 10777 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10778 %} 10779 ins_pipe( pipe_slow ); 10780 %} 10781 10782 instruct vor64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 10783 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 10784 match(Set dst (OrV src1 src2)); 10785 format %{ "vpor $dst,$src1,$src2\t! or vectors (64 bytes)" %} 10786 ins_encode %{ 10787 int vector_len = 2; 10788 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10789 %} 10790 ins_pipe( pipe_slow ); 10791 %} 10792 10793 instruct vor64B_mem(vecZ dst, vecZ src, memory mem) %{ 10794 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 10795 match(Set dst (OrV src (LoadVector mem))); 10796 format %{ "vpor $dst,$src,$mem\t! or vectors (64 bytes)" %} 10797 ins_encode %{ 10798 int vector_len = 2; 10799 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10800 %} 10801 ins_pipe( pipe_slow ); 10802 %} 10803 10804 // --------------------------------- XOR -------------------------------------- 10805 10806 instruct vxor4B(vecS dst, vecS src) %{ 10807 predicate(n->as_Vector()->length_in_bytes() == 4); 10808 match(Set dst (XorV dst src)); 10809 format %{ "pxor $dst,$src\t! xor vectors (4 bytes)" %} 10810 ins_encode %{ 10811 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 10812 %} 10813 ins_pipe( pipe_slow ); 10814 %} 10815 10816 instruct vxor4B_reg(vecS dst, vecS src1, vecS src2) %{ 10817 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 10818 match(Set dst (XorV src1 src2)); 10819 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (4 bytes)" %} 10820 ins_encode %{ 10821 int vector_len = 0; 10822 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10823 %} 10824 ins_pipe( pipe_slow ); 10825 %} 10826 10827 instruct vxor4B_mem(vecS dst, vecS src, memory mem) %{ 10828 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 10829 match(Set dst (XorV src (LoadVector mem))); 10830 format %{ "vpxor $dst,$src,$mem\t! xor vectors (4 bytes)" %} 10831 ins_encode %{ 10832 int vector_len = 0; 10833 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10834 %} 10835 ins_pipe( pipe_slow ); 10836 %} 10837 10838 instruct vxor8B(vecD dst, vecD src) %{ 10839 predicate(n->as_Vector()->length_in_bytes() == 8); 10840 match(Set dst (XorV dst src)); 10841 format %{ "pxor $dst,$src\t! xor vectors (8 bytes)" %} 10842 ins_encode %{ 10843 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 10844 %} 10845 ins_pipe( pipe_slow ); 10846 %} 10847 10848 instruct vxor8B_reg(vecD dst, vecD src1, vecD src2) %{ 10849 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 10850 match(Set dst (XorV src1 src2)); 10851 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (8 bytes)" %} 10852 ins_encode %{ 10853 int vector_len = 0; 10854 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10855 %} 10856 ins_pipe( pipe_slow ); 10857 %} 10858 10859 instruct vxor8B_mem(vecD dst, vecD src, memory mem) %{ 10860 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 10861 match(Set dst (XorV src (LoadVector mem))); 10862 format %{ "vpxor $dst,$src,$mem\t! xor vectors (8 bytes)" %} 10863 ins_encode %{ 10864 int vector_len = 0; 10865 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10866 %} 10867 ins_pipe( pipe_slow ); 10868 %} 10869 10870 instruct vxor16B(vecX dst, vecX src) %{ 10871 predicate(n->as_Vector()->length_in_bytes() == 16); 10872 match(Set dst (XorV dst src)); 10873 format %{ "pxor $dst,$src\t! xor vectors (16 bytes)" %} 10874 ins_encode %{ 10875 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 10876 %} 10877 ins_pipe( pipe_slow ); 10878 %} 10879 10880 instruct vxor16B_reg(vecX dst, vecX src1, vecX src2) %{ 10881 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 10882 match(Set dst (XorV src1 src2)); 10883 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (16 bytes)" %} 10884 ins_encode %{ 10885 int vector_len = 0; 10886 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10887 %} 10888 ins_pipe( pipe_slow ); 10889 %} 10890 10891 instruct vxor16B_mem(vecX dst, vecX src, memory mem) %{ 10892 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 10893 match(Set dst (XorV src (LoadVector mem))); 10894 format %{ "vpxor $dst,$src,$mem\t! xor vectors (16 bytes)" %} 10895 ins_encode %{ 10896 int vector_len = 0; 10897 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10898 %} 10899 ins_pipe( pipe_slow ); 10900 %} 10901 10902 instruct vxor32B_reg(vecY dst, vecY src1, vecY src2) %{ 10903 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 10904 match(Set dst (XorV src1 src2)); 10905 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (32 bytes)" %} 10906 ins_encode %{ 10907 int vector_len = 1; 10908 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10909 %} 10910 ins_pipe( pipe_slow ); 10911 %} 10912 10913 instruct vxor32B_mem(vecY dst, vecY src, memory mem) %{ 10914 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 10915 match(Set dst (XorV src (LoadVector mem))); 10916 format %{ "vpxor $dst,$src,$mem\t! xor vectors (32 bytes)" %} 10917 ins_encode %{ 10918 int vector_len = 1; 10919 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10920 %} 10921 ins_pipe( pipe_slow ); 10922 %} 10923 10924 instruct vxor64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 10925 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 10926 match(Set dst (XorV src1 src2)); 10927 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (64 bytes)" %} 10928 ins_encode %{ 10929 int vector_len = 2; 10930 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10931 %} 10932 ins_pipe( pipe_slow ); 10933 %} 10934 10935 instruct vxor64B_mem(vecZ dst, vecZ src, memory mem) %{ 10936 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 10937 match(Set dst (XorV src (LoadVector mem))); 10938 format %{ "vpxor $dst,$src,$mem\t! xor vectors (64 bytes)" %} 10939 ins_encode %{ 10940 int vector_len = 2; 10941 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10942 %} 10943 ins_pipe( pipe_slow ); 10944 %} 10945