1 // 2 // Copyright (c) 2011, 2014, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Common Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // archtecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // XMM registers. 512-bit registers or 8 words each, labeled (a)-p. 63 // Word a in each register holds a Float, words ab hold a Double. 64 // The whole registers are used in SSE4.2 version intrinsics, 65 // array copy stubs and superword operations (see UseSSE42Intrinsics, 66 // UseXMMForArrayCopy and UseSuperword flags). 67 // For pre EVEX enabled architectures: 68 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX) 69 // For EVEX enabled architectures: 70 // XMM8-XMM31 must be encoded with REX (EVEX for UseAVX). 71 // 72 // Linux ABI: No register preserved across function calls 73 // XMM0-XMM7 might hold parameters 74 // Windows ABI: XMM6-XMM31 preserved across function calls 75 // XMM0-XMM3 might hold parameters 76 77 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()); 78 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1)); 79 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2)); 80 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3)); 81 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4)); 82 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5)); 83 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6)); 84 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7)); 85 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8)); 86 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9)); 87 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10)); 88 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11)); 89 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12)); 90 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13)); 91 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14)); 92 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15)); 93 94 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()); 95 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1)); 96 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2)); 97 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3)); 98 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4)); 99 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5)); 100 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6)); 101 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7)); 102 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8)); 103 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9)); 104 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10)); 105 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11)); 106 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12)); 107 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13)); 108 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14)); 109 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15)); 110 111 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()); 112 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1)); 113 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2)); 114 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3)); 115 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4)); 116 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5)); 117 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6)); 118 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7)); 119 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8)); 120 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9)); 121 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10)); 122 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11)); 123 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12)); 124 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13)); 125 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14)); 126 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15)); 127 128 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()); 129 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1)); 130 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2)); 131 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3)); 132 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4)); 133 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5)); 134 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6)); 135 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7)); 136 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8)); 137 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9)); 138 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10)); 139 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11)); 140 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12)); 141 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13)); 142 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14)); 143 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15)); 144 145 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()); 146 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1)); 147 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2)); 148 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3)); 149 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4)); 150 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5)); 151 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6)); 152 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7)); 153 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8)); 154 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9)); 155 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10)); 156 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11)); 157 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12)); 158 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13)); 159 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14)); 160 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15)); 161 162 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()); 163 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1)); 164 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2)); 165 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3)); 166 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4)); 167 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5)); 168 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6)); 169 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7)); 170 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8)); 171 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9)); 172 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10)); 173 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11)); 174 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12)); 175 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13)); 176 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14)); 177 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15)); 178 179 #ifdef _WIN64 180 181 reg_def XMM6 ( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()); 182 reg_def XMM6b( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(1)); 183 reg_def XMM6c( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(2)); 184 reg_def XMM6d( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(3)); 185 reg_def XMM6e( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(4)); 186 reg_def XMM6f( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(5)); 187 reg_def XMM6g( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(6)); 188 reg_def XMM6h( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(7)); 189 reg_def XMM6i( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(8)); 190 reg_def XMM6j( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(9)); 191 reg_def XMM6k( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(10)); 192 reg_def XMM6l( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(11)); 193 reg_def XMM6m( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(12)); 194 reg_def XMM6n( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(13)); 195 reg_def XMM6o( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(14)); 196 reg_def XMM6p( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(15)); 197 198 reg_def XMM7 ( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()); 199 reg_def XMM7b( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(1)); 200 reg_def XMM7c( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(2)); 201 reg_def XMM7d( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(3)); 202 reg_def XMM7e( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(4)); 203 reg_def XMM7f( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(5)); 204 reg_def XMM7g( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(6)); 205 reg_def XMM7h( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(7)); 206 reg_def XMM7i( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(8)); 207 reg_def XMM7j( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(9)); 208 reg_def XMM7k( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(10)); 209 reg_def XMM7l( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(11)); 210 reg_def XMM7m( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(12)); 211 reg_def XMM7n( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(13)); 212 reg_def XMM7o( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(14)); 213 reg_def XMM7p( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(15)); 214 215 reg_def XMM8 ( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()); 216 reg_def XMM8b( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(1)); 217 reg_def XMM8c( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(2)); 218 reg_def XMM8d( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(3)); 219 reg_def XMM8e( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(4)); 220 reg_def XMM8f( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(5)); 221 reg_def XMM8g( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(6)); 222 reg_def XMM8h( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(7)); 223 reg_def XMM8i( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(8)); 224 reg_def XMM8j( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(9)); 225 reg_def XMM8k( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(10)); 226 reg_def XMM8l( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(11)); 227 reg_def XMM8m( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(12)); 228 reg_def XMM8n( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(13)); 229 reg_def XMM8o( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(14)); 230 reg_def XMM8p( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(15)); 231 232 reg_def XMM9 ( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()); 233 reg_def XMM9b( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(1)); 234 reg_def XMM9c( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(2)); 235 reg_def XMM9d( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(3)); 236 reg_def XMM9e( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(4)); 237 reg_def XMM9f( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(5)); 238 reg_def XMM9g( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(6)); 239 reg_def XMM9h( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(7)); 240 reg_def XMM9i( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(8)); 241 reg_def XMM9j( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(9)); 242 reg_def XMM9k( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(10)); 243 reg_def XMM9l( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(11)); 244 reg_def XMM9m( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(12)); 245 reg_def XMM9n( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(13)); 246 reg_def XMM9o( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(14)); 247 reg_def XMM9p( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(15)); 248 249 reg_def XMM10 ( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()); 250 reg_def XMM10b( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(1)); 251 reg_def XMM10c( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(2)); 252 reg_def XMM10d( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(3)); 253 reg_def XMM10e( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(4)); 254 reg_def XMM10f( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(5)); 255 reg_def XMM10g( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(6)); 256 reg_def XMM10h( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(7)); 257 reg_def XMM10i( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(8)); 258 reg_def XMM10j( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(9)); 259 reg_def XMM10k( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(10)); 260 reg_def XMM10l( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(11)); 261 reg_def XMM10m( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(12)); 262 reg_def XMM10n( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(13)); 263 reg_def XMM10o( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(14)); 264 reg_def XMM10p( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(15)); 265 266 reg_def XMM11 ( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()); 267 reg_def XMM11b( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(1)); 268 reg_def XMM11c( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(2)); 269 reg_def XMM11d( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(3)); 270 reg_def XMM11e( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(4)); 271 reg_def XMM11f( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(5)); 272 reg_def XMM11g( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(6)); 273 reg_def XMM11h( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(7)); 274 reg_def XMM11i( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(8)); 275 reg_def XMM11j( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(9)); 276 reg_def XMM11k( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(10)); 277 reg_def XMM11l( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(11)); 278 reg_def XMM11m( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(12)); 279 reg_def XMM11n( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(13)); 280 reg_def XMM11o( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(14)); 281 reg_def XMM11p( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(15)); 282 283 reg_def XMM12 ( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()); 284 reg_def XMM12b( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(1)); 285 reg_def XMM12c( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(2)); 286 reg_def XMM12d( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(3)); 287 reg_def XMM12e( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(4)); 288 reg_def XMM12f( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(5)); 289 reg_def XMM12g( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(6)); 290 reg_def XMM12h( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(7)); 291 reg_def XMM12i( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(8)); 292 reg_def XMM12j( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(9)); 293 reg_def XMM12k( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(10)); 294 reg_def XMM12l( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(11)); 295 reg_def XMM12m( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(12)); 296 reg_def XMM12n( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(13)); 297 reg_def XMM12o( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(14)); 298 reg_def XMM12p( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(15)); 299 300 reg_def XMM13 ( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()); 301 reg_def XMM13b( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(1)); 302 reg_def XMM13c( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(2)); 303 reg_def XMM13d( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(3)); 304 reg_def XMM13e( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(4)); 305 reg_def XMM13f( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(5)); 306 reg_def XMM13g( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(6)); 307 reg_def XMM13h( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(7)); 308 reg_def XMM13i( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(8)); 309 reg_def XMM13j( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(9)); 310 reg_def XMM13k( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(10)); 311 reg_def XMM13l( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(11)); 312 reg_def XMM13m( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(12)); 313 reg_def XMM13n( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(13)); 314 reg_def XMM13o( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(14)); 315 reg_def XMM13p( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(15)); 316 317 reg_def XMM14 ( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()); 318 reg_def XMM14b( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(1)); 319 reg_def XMM14c( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(2)); 320 reg_def XMM14d( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(3)); 321 reg_def XMM14e( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(4)); 322 reg_def XMM14f( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(5)); 323 reg_def XMM14g( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(6)); 324 reg_def XMM14h( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(7)); 325 reg_def XMM14i( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(8)); 326 reg_def XMM14j( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(9)); 327 reg_def XMM14k( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(10)); 328 reg_def XMM14l( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(11)); 329 reg_def XMM14m( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(12)); 330 reg_def XMM14n( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(13)); 331 reg_def XMM14o( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(14)); 332 reg_def XMM14p( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(15)); 333 334 reg_def XMM15 ( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()); 335 reg_def XMM15b( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(1)); 336 reg_def XMM15c( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(2)); 337 reg_def XMM15d( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(3)); 338 reg_def XMM15e( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(4)); 339 reg_def XMM15f( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(5)); 340 reg_def XMM15g( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(6)); 341 reg_def XMM15h( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(7)); 342 reg_def XMM15i( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(8)); 343 reg_def XMM15j( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(9)); 344 reg_def XMM15k( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(10)); 345 reg_def XMM15l( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(11)); 346 reg_def XMM15m( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(12)); 347 reg_def XMM15n( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(13)); 348 reg_def XMM15o( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(14)); 349 reg_def XMM15p( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(15)); 350 351 reg_def XMM16 ( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()); 352 reg_def XMM16b( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(1)); 353 reg_def XMM16c( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(2)); 354 reg_def XMM16d( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(3)); 355 reg_def XMM16e( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(4)); 356 reg_def XMM16f( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(5)); 357 reg_def XMM16g( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(6)); 358 reg_def XMM16h( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(7)); 359 reg_def XMM16i( SOC, SOE, Op_RegF, 16, xmm15->as_VMReg()->next(8)); 360 reg_def XMM16j( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(9)); 361 reg_def XMM16k( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(10)); 362 reg_def XMM16l( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(11)); 363 reg_def XMM16m( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(12)); 364 reg_def XMM16n( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(13)); 365 reg_def XMM16o( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(14)); 366 reg_def XMM16p( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(15)); 367 368 reg_def XMM17 ( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()); 369 reg_def XMM17b( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(1)); 370 reg_def XMM17c( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(2)); 371 reg_def XMM17d( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(3)); 372 reg_def XMM17e( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(4)); 373 reg_def XMM17f( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(5)); 374 reg_def XMM17g( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(6)); 375 reg_def XMM17h( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(7)); 376 reg_def XMM17i( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(8)); 377 reg_def XMM17j( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(9)); 378 reg_def XMM17k( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(10)); 379 reg_def XMM17l( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(11)); 380 reg_def XMM17m( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(12)); 381 reg_def XMM17n( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(13)); 382 reg_def XMM17o( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(14)); 383 reg_def XMM17p( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(15)); 384 385 reg_def XMM18 ( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()); 386 reg_def XMM18b( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(1)); 387 reg_def XMM18c( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(2)); 388 reg_def XMM18d( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(3)); 389 reg_def XMM18e( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(4)); 390 reg_def XMM18f( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(5)); 391 reg_def XMM18g( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(6)); 392 reg_def XMM18h( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(7)); 393 reg_def XMM18i( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(8)); 394 reg_def XMM18j( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(9)); 395 reg_def XMM18k( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(10)); 396 reg_def XMM18l( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(11)); 397 reg_def XMM18m( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(12)); 398 reg_def XMM18n( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(13)); 399 reg_def XMM18o( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(14)); 400 reg_def XMM18p( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(15)); 401 402 reg_def XMM19 ( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()); 403 reg_def XMM19b( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(1)); 404 reg_def XMM19c( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(2)); 405 reg_def XMM19d( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(3)); 406 reg_def XMM19e( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(4)); 407 reg_def XMM19f( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(5)); 408 reg_def XMM19g( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(6)); 409 reg_def XMM19h( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(7)); 410 reg_def XMM19i( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(8)); 411 reg_def XMM19j( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(9)); 412 reg_def XMM19k( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(10)); 413 reg_def XMM19l( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(11)); 414 reg_def XMM19m( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(12)); 415 reg_def XMM19n( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(13)); 416 reg_def XMM19o( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(14)); 417 reg_def XMM19p( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(15)); 418 419 reg_def XMM20 ( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()); 420 reg_def XMM20b( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(1)); 421 reg_def XMM20c( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(2)); 422 reg_def XMM20d( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(3)); 423 reg_def XMM20e( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(4)); 424 reg_def XMM20f( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(5)); 425 reg_def XMM20g( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(6)); 426 reg_def XMM20h( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(7)); 427 reg_def XMM20i( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(8)); 428 reg_def XMM20j( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(9)); 429 reg_def XMM20k( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(10)); 430 reg_def XMM20l( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(11)); 431 reg_def XMM20m( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(12)); 432 reg_def XMM20n( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(13)); 433 reg_def XMM20o( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(14)); 434 reg_def XMM20p( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(15)); 435 436 reg_def XMM21 ( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()); 437 reg_def XMM21b( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(1)); 438 reg_def XMM21c( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(2)); 439 reg_def XMM21d( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(3)); 440 reg_def XMM21e( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(4)); 441 reg_def XMM21f( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(5)); 442 reg_def XMM21g( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(6)); 443 reg_def XMM21h( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(7)); 444 reg_def XMM21i( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(8)); 445 reg_def XMM21j( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(9)); 446 reg_def XMM21k( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(10)); 447 reg_def XMM21l( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(11)); 448 reg_def XMM21m( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(12)); 449 reg_def XMM21n( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(13)); 450 reg_def XMM21o( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(14)); 451 reg_def XMM21p( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(15)); 452 453 reg_def XMM22 ( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()); 454 reg_def XMM22b( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(1)); 455 reg_def XMM22c( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(2)); 456 reg_def XMM22d( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(3)); 457 reg_def XMM22e( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(4)); 458 reg_def XMM22f( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(5)); 459 reg_def XMM22g( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(6)); 460 reg_def XMM22h( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(7)); 461 reg_def XMM22i( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(8)); 462 reg_def XMM22j( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(9)); 463 reg_def XMM22k( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(10)); 464 reg_def XMM22l( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(11)); 465 reg_def XMM22m( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(12)); 466 reg_def XMM22n( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(13)); 467 reg_def XMM22o( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(14)); 468 reg_def XMM22p( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(15)); 469 470 reg_def XMM23 ( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()); 471 reg_def XMM23b( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(1)); 472 reg_def XMM23c( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(2)); 473 reg_def XMM23d( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(3)); 474 reg_def XMM23e( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(4)); 475 reg_def XMM23f( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(5)); 476 reg_def XMM23g( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(6)); 477 reg_def XMM23h( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(7)); 478 reg_def XMM23i( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(8)); 479 reg_def XMM23j( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(9)); 480 reg_def XMM23k( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(10)); 481 reg_def XMM23l( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(11)); 482 reg_def XMM23m( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(12)); 483 reg_def XMM23n( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(13)); 484 reg_def XMM23o( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(14)); 485 reg_def XMM23p( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(15)); 486 487 reg_def XMM24 ( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()); 488 reg_def XMM24b( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(1)); 489 reg_def XMM24c( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(2)); 490 reg_def XMM24d( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(3)); 491 reg_def XMM24e( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(4)); 492 reg_def XMM24f( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(5)); 493 reg_def XMM24g( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(6)); 494 reg_def XMM24h( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(7)); 495 reg_def XMM24i( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(8)); 496 reg_def XMM24j( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(9)); 497 reg_def XMM24k( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(10)); 498 reg_def XMM24l( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(11)); 499 reg_def XMM24m( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(12)); 500 reg_def XMM24n( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(13)); 501 reg_def XMM24o( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(14)); 502 reg_def XMM24p( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(15)); 503 504 reg_def XMM25 ( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()); 505 reg_def XMM25b( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(1)); 506 reg_def XMM25c( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(2)); 507 reg_def XMM25d( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(3)); 508 reg_def XMM25e( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(4)); 509 reg_def XMM25f( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(5)); 510 reg_def XMM25g( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(6)); 511 reg_def XMM25h( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(7)); 512 reg_def XMM25i( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(8)); 513 reg_def XMM25j( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(9)); 514 reg_def XMM25k( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(10)); 515 reg_def XMM25l( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(11)); 516 reg_def XMM25m( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(12)); 517 reg_def XMM25n( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(13)); 518 reg_def XMM25o( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(14)); 519 reg_def XMM25p( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(15)); 520 521 reg_def XMM26 ( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()); 522 reg_def XMM26b( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(1)); 523 reg_def XMM26c( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(2)); 524 reg_def XMM26d( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(3)); 525 reg_def XMM26e( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(4)); 526 reg_def XMM26f( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(5)); 527 reg_def XMM26g( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(6)); 528 reg_def XMM26h( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(7)); 529 reg_def XMM26i( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(8)); 530 reg_def XMM26j( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(9)); 531 reg_def XMM26k( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(10)); 532 reg_def XMM26l( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(11)); 533 reg_def XMM26m( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(12)); 534 reg_def XMM26n( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(13)); 535 reg_def XMM26o( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(14)); 536 reg_def XMM26p( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(15)); 537 538 reg_def XMM27g( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(1)); 539 reg_def XMM27c( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(2)); 540 reg_def XMM27d( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(3)); 541 reg_def XMM27e( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(4)); 542 reg_def XMM27f( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(5)); 543 reg_def XMM27g( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(6)); 544 reg_def XMM27h( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(7)); 545 reg_def XMM27i( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(8)); 546 reg_def XMM27j( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(9)); 547 reg_def XMM27k( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(10)); 548 reg_def XMM27l( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(11)); 549 reg_def XMM27m( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(12)); 550 reg_def XMM27n( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(13)); 551 reg_def XMM27o( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(14)); 552 reg_def XMM27p( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(15)); 553 554 reg_def XMM28 ( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()); 555 reg_def XMM28b( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(1)); 556 reg_def XMM28c( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(2)); 557 reg_def XMM28d( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(3)); 558 reg_def XMM28e( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(4)); 559 reg_def XMM28f( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(5)); 560 reg_def XMM28g( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(6)); 561 reg_def XMM28h( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(7)); 562 reg_def XMM28i( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(8)); 563 reg_def XMM28j( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(9)); 564 reg_def XMM28k( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(10)); 565 reg_def XMM28l( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(11)); 566 reg_def XMM28m( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(12)); 567 reg_def XMM28n( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(13)); 568 reg_def XMM28o( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(14)); 569 reg_def XMM28p( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(15)); 570 571 reg_def XMM29 ( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()); 572 reg_def XMM29b( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(1)); 573 reg_def XMM29c( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(2)); 574 reg_def XMM29d( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(3)); 575 reg_def XMM29e( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(4)); 576 reg_def XMM29f( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(5)); 577 reg_def XMM29g( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(6)); 578 reg_def XMM29h( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(7)); 579 reg_def XMM29i( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(8)); 580 reg_def XMM29j( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(9)); 581 reg_def XMM29k( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(10)); 582 reg_def XMM29l( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(11)); 583 reg_def XMM29m( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(12)); 584 reg_def XMM29n( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(13)); 585 reg_def XMM29o( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(14)); 586 reg_def XMM29p( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(15)); 587 588 reg_def XMM30 ( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()); 589 reg_def XMM30b( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(1)); 590 reg_def XMM30c( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(2)); 591 reg_def XMM30d( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(3)); 592 reg_def XMM30e( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(4)); 593 reg_def XMM30f( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(5)); 594 reg_def XMM30g( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(6)); 595 reg_def XMM30h( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(7)); 596 reg_def XMM30i( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(8)); 597 reg_def XMM30j( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(9)); 598 reg_def XMM30k( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(10)); 599 reg_def XMM30l( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(11)); 600 reg_def XMM30m( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(12)); 601 reg_def XMM30n( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(13)); 602 reg_def XMM30o( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(14)); 603 reg_def XMM30p( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(15)); 604 605 reg_def XMM31 ( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()); 606 reg_def XMM31b( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(1)); 607 reg_def XMM31c( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(2)); 608 reg_def XMM31d( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(3)); 609 reg_def XMM31e( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(4)); 610 reg_def XMM31f( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(5)); 611 reg_def XMM31g( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(6)); 612 reg_def XMM31h( SOC, SOE, Op_RegF, 31, xmm31>-as_VMReg()->next(7)); 613 reg_def XMM31i( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(8)); 614 reg_def XMM31j( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(9)); 615 reg_def XMM31k( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(10)); 616 reg_def XMM31l( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(11)); 617 reg_def XMM31m( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(12)); 618 reg_def XMM31n( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(13)); 619 reg_def XMM31o( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(14)); 620 reg_def XMM31p( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(15)); 621 622 #else // _WIN64 623 624 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()); 625 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1)); 626 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2)); 627 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3)); 628 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4)); 629 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5)); 630 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6)); 631 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7)); 632 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8)); 633 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9)); 634 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10)); 635 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11)); 636 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12)); 637 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13)); 638 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14)); 639 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15)); 640 641 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()); 642 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1)); 643 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2)); 644 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3)); 645 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4)); 646 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5)); 647 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6)); 648 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7)); 649 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8)); 650 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9)); 651 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10)); 652 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11)); 653 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12)); 654 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13)); 655 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14)); 656 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15)); 657 658 #ifdef _LP64 659 660 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()); 661 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1)); 662 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2)); 663 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3)); 664 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4)); 665 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5)); 666 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6)); 667 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7)); 668 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8)); 669 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9)); 670 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10)); 671 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11)); 672 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12)); 673 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13)); 674 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14)); 675 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15)); 676 677 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()); 678 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1)); 679 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2)); 680 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3)); 681 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4)); 682 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5)); 683 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6)); 684 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7)); 685 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8)); 686 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9)); 687 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10)); 688 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11)); 689 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12)); 690 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13)); 691 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14)); 692 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15)); 693 694 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()); 695 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1)); 696 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2)); 697 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3)); 698 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4)); 699 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5)); 700 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6)); 701 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7)); 702 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8)); 703 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9)); 704 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10)); 705 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11)); 706 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12)); 707 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13)); 708 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14)); 709 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15)); 710 711 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()); 712 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1)); 713 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2)); 714 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3)); 715 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4)); 716 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5)); 717 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6)); 718 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7)); 719 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8)); 720 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9)); 721 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10)); 722 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11)); 723 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12)); 724 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13)); 725 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14)); 726 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15)); 727 728 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()); 729 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1)); 730 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2)); 731 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3)); 732 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4)); 733 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5)); 734 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6)); 735 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7)); 736 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8)); 737 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9)); 738 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10)); 739 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11)); 740 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12)); 741 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13)); 742 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14)); 743 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15)); 744 745 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()); 746 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1)); 747 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2)); 748 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3)); 749 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4)); 750 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5)); 751 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6)); 752 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7)); 753 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8)); 754 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9)); 755 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10)); 756 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11)); 757 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12)); 758 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13)); 759 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14)); 760 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15)); 761 762 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()); 763 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1)); 764 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2)); 765 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3)); 766 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4)); 767 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5)); 768 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6)); 769 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7)); 770 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8)); 771 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9)); 772 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10)); 773 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11)); 774 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12)); 775 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13)); 776 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14)); 777 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15)); 778 779 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()); 780 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1)); 781 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2)); 782 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3)); 783 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4)); 784 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5)); 785 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6)); 786 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7)); 787 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8)); 788 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9)); 789 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10)); 790 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11)); 791 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12)); 792 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13)); 793 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14)); 794 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15)); 795 796 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()); 797 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1)); 798 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2)); 799 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3)); 800 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4)); 801 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5)); 802 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6)); 803 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7)); 804 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8)); 805 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9)); 806 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10)); 807 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11)); 808 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12)); 809 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13)); 810 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14)); 811 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15)); 812 813 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()); 814 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1)); 815 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2)); 816 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3)); 817 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4)); 818 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5)); 819 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6)); 820 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7)); 821 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8)); 822 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9)); 823 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10)); 824 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11)); 825 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12)); 826 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13)); 827 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14)); 828 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15)); 829 830 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()); 831 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1)); 832 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2)); 833 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3)); 834 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4)); 835 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5)); 836 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6)); 837 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7)); 838 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8)); 839 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9)); 840 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10)); 841 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11)); 842 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12)); 843 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13)); 844 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14)); 845 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15)); 846 847 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()); 848 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1)); 849 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2)); 850 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3)); 851 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4)); 852 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5)); 853 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6)); 854 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7)); 855 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8)); 856 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9)); 857 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10)); 858 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11)); 859 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12)); 860 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13)); 861 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14)); 862 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15)); 863 864 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()); 865 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1)); 866 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2)); 867 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3)); 868 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4)); 869 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5)); 870 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6)); 871 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7)); 872 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8)); 873 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9)); 874 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10)); 875 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11)); 876 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12)); 877 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13)); 878 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14)); 879 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15)); 880 881 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()); 882 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1)); 883 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2)); 884 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3)); 885 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4)); 886 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5)); 887 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6)); 888 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7)); 889 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8)); 890 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9)); 891 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10)); 892 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11)); 893 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12)); 894 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13)); 895 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14)); 896 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15)); 897 898 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()); 899 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1)); 900 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2)); 901 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3)); 902 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4)); 903 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5)); 904 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6)); 905 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7)); 906 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8)); 907 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9)); 908 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10)); 909 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11)); 910 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12)); 911 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13)); 912 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14)); 913 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15)); 914 915 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()); 916 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1)); 917 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2)); 918 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3)); 919 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4)); 920 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5)); 921 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6)); 922 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7)); 923 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8)); 924 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9)); 925 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10)); 926 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11)); 927 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12)); 928 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13)); 929 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14)); 930 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15)); 931 932 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()); 933 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1)); 934 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2)); 935 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3)); 936 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4)); 937 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5)); 938 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6)); 939 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7)); 940 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8)); 941 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9)); 942 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10)); 943 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11)); 944 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12)); 945 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13)); 946 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14)); 947 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15)); 948 949 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()); 950 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1)); 951 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2)); 952 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3)); 953 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4)); 954 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5)); 955 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6)); 956 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7)); 957 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8)); 958 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9)); 959 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10)); 960 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11)); 961 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12)); 962 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13)); 963 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14)); 964 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15)); 965 966 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()); 967 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1)); 968 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2)); 969 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3)); 970 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4)); 971 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5)); 972 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6)); 973 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7)); 974 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8)); 975 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9)); 976 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10)); 977 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11)); 978 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12)); 979 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13)); 980 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14)); 981 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15)); 982 983 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()); 984 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1)); 985 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2)); 986 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3)); 987 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4)); 988 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5)); 989 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6)); 990 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7)); 991 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8)); 992 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9)); 993 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10)); 994 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11)); 995 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12)); 996 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13)); 997 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14)); 998 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15)); 999 1000 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()); 1001 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1)); 1002 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2)); 1003 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3)); 1004 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4)); 1005 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5)); 1006 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6)); 1007 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7)); 1008 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8)); 1009 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9)); 1010 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10)); 1011 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11)); 1012 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12)); 1013 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13)); 1014 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14)); 1015 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15)); 1016 1017 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()); 1018 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1)); 1019 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2)); 1020 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3)); 1021 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4)); 1022 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5)); 1023 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6)); 1024 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7)); 1025 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8)); 1026 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9)); 1027 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10)); 1028 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11)); 1029 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12)); 1030 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13)); 1031 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14)); 1032 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15)); 1033 1034 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()); 1035 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1)); 1036 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2)); 1037 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3)); 1038 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4)); 1039 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5)); 1040 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6)); 1041 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7)); 1042 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8)); 1043 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9)); 1044 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10)); 1045 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11)); 1046 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12)); 1047 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13)); 1048 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14)); 1049 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15)); 1050 1051 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()); 1052 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1)); 1053 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2)); 1054 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3)); 1055 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4)); 1056 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5)); 1057 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6)); 1058 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7)); 1059 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8)); 1060 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9)); 1061 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10)); 1062 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11)); 1063 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12)); 1064 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13)); 1065 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14)); 1066 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15)); 1067 1068 #endif // _LP64 1069 1070 #endif // _WIN64 1071 1072 #ifdef _LP64 1073 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad()); 1074 #else 1075 reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad()); 1076 #endif // _LP64 1077 1078 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1079 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1080 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1081 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1082 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1083 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1084 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1085 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1086 #ifdef _LP64 1087 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1088 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1089 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1090 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1091 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1092 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1093 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1094 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1095 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 1096 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 1097 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 1098 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 1099 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 1100 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 1101 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 1102 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 1103 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 1104 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 1105 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 1106 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 1107 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 1108 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 1109 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 1110 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 1111 #endif 1112 ); 1113 1114 // flags allocation class should be last. 1115 alloc_class chunk2(RFLAGS); 1116 1117 // Singleton class for condition codes 1118 reg_class int_flags(RFLAGS); 1119 1120 // Class for pre evex float registers 1121 reg_class float_reg_legacy(XMM0, 1122 XMM1, 1123 XMM2, 1124 XMM3, 1125 XMM4, 1126 XMM5, 1127 XMM6, 1128 XMM7 1129 #ifdef _LP64 1130 ,XMM8, 1131 XMM9, 1132 XMM10, 1133 XMM11, 1134 XMM12, 1135 XMM13, 1136 XMM14, 1137 XMM15 1138 #endif 1139 ); 1140 1141 // Class for evex float registers 1142 reg_class float_reg_evex(XMM0, 1143 XMM1, 1144 XMM2, 1145 XMM3, 1146 XMM4, 1147 XMM5, 1148 XMM6, 1149 XMM7 1150 #ifdef _LP64 1151 ,XMM8, 1152 XMM9, 1153 XMM10, 1154 XMM11, 1155 XMM12, 1156 XMM13, 1157 XMM14, 1158 XMM15, 1159 XMM16, 1160 XMM17, 1161 XMM18, 1162 XMM19, 1163 XMM20, 1164 XMM21, 1165 XMM22, 1166 XMM23, 1167 XMM24, 1168 XMM25, 1169 XMM26, 1170 XMM27, 1171 XMM28, 1172 XMM29, 1173 XMM30, 1174 XMM31 1175 #endif 1176 ); 1177 1178 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} ); 1179 1180 // Class for pre evex double registers 1181 reg_class double_reg_legacy(XMM0, XMM0b, 1182 XMM1, XMM1b, 1183 XMM2, XMM2b, 1184 XMM3, XMM3b, 1185 XMM4, XMM4b, 1186 XMM5, XMM5b, 1187 XMM6, XMM6b, 1188 XMM7, XMM7b 1189 #ifdef _LP64 1190 ,XMM8, XMM8b, 1191 XMM9, XMM9b, 1192 XMM10, XMM10b, 1193 XMM11, XMM11b, 1194 XMM12, XMM12b, 1195 XMM13, XMM13b, 1196 XMM14, XMM14b, 1197 XMM15, XMM15b 1198 #endif 1199 ); 1200 1201 // Class for evex double registers 1202 reg_class double_reg_evex(XMM0, XMM0b, 1203 XMM1, XMM1b, 1204 XMM2, XMM2b, 1205 XMM3, XMM3b, 1206 XMM4, XMM4b, 1207 XMM5, XMM5b, 1208 XMM6, XMM6b, 1209 XMM7, XMM7b 1210 #ifdef _LP64 1211 ,XMM8, XMM8b, 1212 XMM9, XMM9b, 1213 XMM10, XMM10b, 1214 XMM11, XMM11b, 1215 XMM12, XMM12b, 1216 XMM13, XMM13b, 1217 XMM14, XMM14b, 1218 XMM15, XMM15b, 1219 XMM16, XMM16b, 1220 XMM17, XMM17b, 1221 XMM18, XMM18b, 1222 XMM19, XMM19b, 1223 XMM20, XMM20b, 1224 XMM21, XMM21b, 1225 XMM22, XMM22b, 1226 XMM23, XMM23b, 1227 XMM24, XMM24b, 1228 XMM25, XMM25b, 1229 XMM26, XMM26b, 1230 XMM27, XMM27b, 1231 XMM28, XMM28b, 1232 XMM29, XMM29b, 1233 XMM30, XMM30b, 1234 XMM31, XMM31b 1235 #endif 1236 ); 1237 1238 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} ); 1239 1240 // Class for pre evex 32bit vector registers 1241 reg_class vectors_reg_legacy(XMM0, 1242 XMM1, 1243 XMM2, 1244 XMM3, 1245 XMM4, 1246 XMM5, 1247 XMM6, 1248 XMM7 1249 #ifdef _LP64 1250 ,XMM8, 1251 XMM9, 1252 XMM10, 1253 XMM11, 1254 XMM12, 1255 XMM13, 1256 XMM14, 1257 XMM15 1258 #endif 1259 ); 1260 1261 // Class for evex 32bit vector registers 1262 reg_class vectors_reg_evex(XMM0, 1263 XMM1, 1264 XMM2, 1265 XMM3, 1266 XMM4, 1267 XMM5, 1268 XMM6, 1269 XMM7 1270 #ifdef _LP64 1271 ,XMM8, 1272 XMM9, 1273 XMM10, 1274 XMM11, 1275 XMM12, 1276 XMM13, 1277 XMM14, 1278 XMM15, 1279 XMM16, 1280 XMM17, 1281 XMM18, 1282 XMM19, 1283 XMM20, 1284 XMM21, 1285 XMM22, 1286 XMM23, 1287 XMM24, 1288 XMM25, 1289 XMM26, 1290 XMM27, 1291 XMM28, 1292 XMM29, 1293 XMM30, 1294 XMM31 1295 #endif 1296 ); 1297 1298 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} ); 1299 1300 // Class for all 64bit vector registers 1301 reg_class vectord_reg_legacy(XMM0, XMM0b, 1302 XMM1, XMM1b, 1303 XMM2, XMM2b, 1304 XMM3, XMM3b, 1305 XMM4, XMM4b, 1306 XMM5, XMM5b, 1307 XMM6, XMM6b, 1308 XMM7, XMM7b 1309 #ifdef _LP64 1310 ,XMM8, XMM8b, 1311 XMM9, XMM9b, 1312 XMM10, XMM10b, 1313 XMM11, XMM11b, 1314 XMM12, XMM12b, 1315 XMM13, XMM13b, 1316 XMM14, XMM14b, 1317 XMM15, XMM15b 1318 #endif 1319 ); 1320 1321 // Class for all 64bit vector registers 1322 reg_class vectord_reg_evex(XMM0, XMM0b, 1323 XMM1, XMM1b, 1324 XMM2, XMM2b, 1325 XMM3, XMM3b, 1326 XMM4, XMM4b, 1327 XMM5, XMM5b, 1328 XMM6, XMM6b, 1329 XMM7, XMM7b 1330 #ifdef _LP64 1331 ,XMM8, XMM8b, 1332 XMM9, XMM9b, 1333 XMM10, XMM10b, 1334 XMM11, XMM11b, 1335 XMM12, XMM12b, 1336 XMM13, XMM13b, 1337 XMM14, XMM14b, 1338 XMM15, XMM15b, 1339 XMM16, XMM16b, 1340 XMM17, XMM17b, 1341 XMM18, XMM18b, 1342 XMM19, XMM19b, 1343 XMM20, XMM20b, 1344 XMM21, XMM21b, 1345 XMM22, XMM22b, 1346 XMM23, XMM23b, 1347 XMM24, XMM24b, 1348 XMM25, XMM25b, 1349 XMM26, XMM26b, 1350 XMM27, XMM27b, 1351 XMM28, XMM28b, 1352 XMM29, XMM29b, 1353 XMM30, XMM30b, 1354 XMM31, XMM31b 1355 #endif 1356 ); 1357 1358 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} ); 1359 1360 // Class for all 128bit vector registers 1361 reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, 1362 XMM1, XMM1b, XMM1c, XMM1d, 1363 XMM2, XMM2b, XMM2c, XMM2d, 1364 XMM3, XMM3b, XMM3c, XMM3d, 1365 XMM4, XMM4b, XMM4c, XMM4d, 1366 XMM5, XMM5b, XMM5c, XMM5d, 1367 XMM6, XMM6b, XMM6c, XMM6d, 1368 XMM7, XMM7b, XMM7c, XMM7d 1369 #ifdef _LP64 1370 ,XMM8, XMM8b, XMM8c, XMM8d, 1371 XMM9, XMM9b, XMM9c, XMM9d, 1372 XMM10, XMM10b, XMM10c, XMM10d, 1373 XMM11, XMM11b, XMM11c, XMM11d, 1374 XMM12, XMM12b, XMM12c, XMM12d, 1375 XMM13, XMM13b, XMM13c, XMM13d, 1376 XMM14, XMM14b, XMM14c, XMM14d, 1377 XMM15, XMM15b, XMM15c, XMM15d 1378 #endif 1379 ); 1380 1381 // Class for all 128bit vector registers 1382 reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, 1383 XMM1, XMM1b, XMM1c, XMM1d, 1384 XMM2, XMM2b, XMM2c, XMM2d, 1385 XMM3, XMM3b, XMM3c, XMM3d, 1386 XMM4, XMM4b, XMM4c, XMM4d, 1387 XMM5, XMM5b, XMM5c, XMM5d, 1388 XMM6, XMM6b, XMM6c, XMM6d, 1389 XMM7, XMM7b, XMM7c, XMM7d 1390 #ifdef _LP64 1391 ,XMM8, XMM8b, XMM8c, XMM8d, 1392 XMM9, XMM9b, XMM9c, XMM9d, 1393 XMM10, XMM10b, XMM10c, XMM10d, 1394 XMM11, XMM11b, XMM11c, XMM11d, 1395 XMM12, XMM12b, XMM12c, XMM12d, 1396 XMM13, XMM13b, XMM13c, XMM13d, 1397 XMM14, XMM14b, XMM14c, XMM14d, 1398 XMM15, XMM15b, XMM15c, XMM15d, 1399 XMM16, XMM16b, XMM16c, XMM16d, 1400 XMM17, XMM17b, XMM17c, XMM17d, 1401 XMM18, XMM18b, XMM18c, XMM18d, 1402 XMM19, XMM19b, XMM19c, XMM19d, 1403 XMM20, XMM20b, XMM20c, XMM20d, 1404 XMM21, XMM21b, XMM21c, XMM21d, 1405 XMM22, XMM22b, XMM22c, XMM22d, 1406 XMM23, XMM23b, XMM23c, XMM23d, 1407 XMM24, XMM24b, XMM24c, XMM24d, 1408 XMM25, XMM25b, XMM25c, XMM25d, 1409 XMM26, XMM26b, XMM26c, XMM26d, 1410 XMM27, XMM27b, XMM27c, XMM27d, 1411 XMM28, XMM28b, XMM28c, XMM28d, 1412 XMM29, XMM29b, XMM29c, XMM29d, 1413 XMM30, XMM30b, XMM30c, XMM30d, 1414 XMM31, XMM31b, XMM31c, XMM31d 1415 #endif 1416 ); 1417 1418 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} ); 1419 1420 // Class for all 256bit vector registers 1421 reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 1422 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 1423 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 1424 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 1425 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 1426 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 1427 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 1428 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 1429 #ifdef _LP64 1430 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 1431 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 1432 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 1433 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 1434 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 1435 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1436 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1437 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h 1438 #endif 1439 ); 1440 1441 // Class for all 256bit vector registers 1442 reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 1443 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 1444 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 1445 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 1446 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 1447 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 1448 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 1449 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 1450 #ifdef _LP64 1451 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 1452 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 1453 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 1454 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 1455 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 1456 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1457 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1458 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, 1459 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, 1460 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, 1461 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, 1462 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, 1463 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, 1464 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, 1465 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, 1466 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, 1467 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, 1468 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, 1469 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, 1470 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, 1471 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, 1472 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, 1473 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, 1474 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h 1475 #endif 1476 ); 1477 1478 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} ); 1479 1480 // Class for all 512bit vector registers 1481 reg_class vectorz_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1482 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1483 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1484 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1485 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1486 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1487 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1488 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1489 #ifdef _LP64 1490 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1491 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1492 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1493 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1494 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1495 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1496 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1497 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1498 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 1499 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 1500 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 1501 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 1502 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 1503 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 1504 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 1505 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 1506 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 1507 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 1508 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 1509 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 1510 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 1511 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 1512 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 1513 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 1514 #endif 1515 ); 1516 1517 %} 1518 1519 1520 //----------SOURCE BLOCK------------------------------------------------------- 1521 // This is a block of C++ code which provides values, functions, and 1522 // definitions necessary in the rest of the architecture description 1523 1524 source_hpp %{ 1525 // Header information of the source block. 1526 // Method declarations/definitions which are used outside 1527 // the ad-scope can conveniently be defined here. 1528 // 1529 // To keep related declarations/definitions/uses close together, 1530 // we switch between source %{ }% and source_hpp %{ }% freely as needed. 1531 1532 class NativeJump; 1533 1534 class CallStubImpl { 1535 1536 //-------------------------------------------------------------- 1537 //---< Used for optimization in Compile::shorten_branches >--- 1538 //-------------------------------------------------------------- 1539 1540 public: 1541 // Size of call trampoline stub. 1542 static uint size_call_trampoline() { 1543 return 0; // no call trampolines on this platform 1544 } 1545 1546 // number of relocations needed by a call trampoline stub 1547 static uint reloc_call_trampoline() { 1548 return 0; // no call trampolines on this platform 1549 } 1550 }; 1551 1552 class HandlerImpl { 1553 1554 public: 1555 1556 static int emit_exception_handler(CodeBuffer &cbuf); 1557 static int emit_deopt_handler(CodeBuffer& cbuf); 1558 1559 static uint size_exception_handler() { 1560 // NativeCall instruction size is the same as NativeJump. 1561 // exception handler starts out as jump and can be patched to 1562 // a call be deoptimization. (4932387) 1563 // Note that this value is also credited (in output.cpp) to 1564 // the size of the code section. 1565 return NativeJump::instruction_size; 1566 } 1567 1568 #ifdef _LP64 1569 static uint size_deopt_handler() { 1570 // three 5 byte instructions 1571 return 15; 1572 } 1573 #else 1574 static uint size_deopt_handler() { 1575 // NativeCall instruction size is the same as NativeJump. 1576 // exception handler starts out as jump and can be patched to 1577 // a call be deoptimization. (4932387) 1578 // Note that this value is also credited (in output.cpp) to 1579 // the size of the code section. 1580 return 5 + NativeJump::instruction_size; // pushl(); jmp; 1581 } 1582 #endif 1583 }; 1584 1585 %} // end source_hpp 1586 1587 source %{ 1588 1589 // Emit exception handler code. 1590 // Stuff framesize into a register and call a VM stub routine. 1591 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) { 1592 1593 // Note that the code buffer's insts_mark is always relative to insts. 1594 // That's why we must use the macroassembler to generate a handler. 1595 MacroAssembler _masm(&cbuf); 1596 address base = __ start_a_stub(size_exception_handler()); 1597 if (base == NULL) { 1598 ciEnv::current()->record_failure("CodeCache is full"); 1599 return 0; // CodeBuffer::expand failed 1600 } 1601 int offset = __ offset(); 1602 __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point())); 1603 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); 1604 __ end_a_stub(); 1605 return offset; 1606 } 1607 1608 // Emit deopt handler code. 1609 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) { 1610 1611 // Note that the code buffer's insts_mark is always relative to insts. 1612 // That's why we must use the macroassembler to generate a handler. 1613 MacroAssembler _masm(&cbuf); 1614 address base = __ start_a_stub(size_deopt_handler()); 1615 if (base == NULL) { 1616 ciEnv::current()->record_failure("CodeCache is full"); 1617 return 0; // CodeBuffer::expand failed 1618 } 1619 int offset = __ offset(); 1620 1621 #ifdef _LP64 1622 address the_pc = (address) __ pc(); 1623 Label next; 1624 // push a "the_pc" on the stack without destroying any registers 1625 // as they all may be live. 1626 1627 // push address of "next" 1628 __ call(next, relocInfo::none); // reloc none is fine since it is a disp32 1629 __ bind(next); 1630 // adjust it so it matches "the_pc" 1631 __ subptr(Address(rsp, 0), __ offset() - offset); 1632 #else 1633 InternalAddress here(__ pc()); 1634 __ pushptr(here.addr()); 1635 #endif 1636 1637 __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack())); 1638 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow"); 1639 __ end_a_stub(); 1640 return offset; 1641 } 1642 1643 1644 //============================================================================= 1645 1646 // Float masks come from different places depending on platform. 1647 #ifdef _LP64 1648 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); } 1649 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); } 1650 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); } 1651 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); } 1652 #else 1653 static address float_signmask() { return (address)float_signmask_pool; } 1654 static address float_signflip() { return (address)float_signflip_pool; } 1655 static address double_signmask() { return (address)double_signmask_pool; } 1656 static address double_signflip() { return (address)double_signflip_pool; } 1657 #endif 1658 1659 1660 const bool Matcher::match_rule_supported(int opcode) { 1661 if (!has_match_rule(opcode)) 1662 return false; 1663 1664 bool ret_value = true; 1665 switch (opcode) { 1666 case Op_PopCountI: 1667 case Op_PopCountL: 1668 if (!UsePopCountInstruction) 1669 ret_value = false; 1670 break; 1671 case Op_MulVI: 1672 if ((UseSSE < 4) && (UseAVX < 1)) // only with SSE4_1 or AVX 1673 ret_value = false; 1674 break; 1675 case Op_MulVL: 1676 case Op_MulReductionVL: 1677 if (VM_Version::supports_avx512dq() == false) 1678 ret_value = false; 1679 break; 1680 case Op_AddReductionVL: 1681 if (UseAVX < 3) // only EVEX : vector connectivity becomes an issue here 1682 ret_value = false; 1683 break; 1684 case Op_AddReductionVI: 1685 if (UseSSE < 3) // requires at least SSE3 1686 ret_value = false; 1687 break; 1688 case Op_MulReductionVI: 1689 if (UseSSE < 4) // requires at least SSE4 1690 ret_value = false; 1691 break; 1692 case Op_AddReductionVF: 1693 case Op_AddReductionVD: 1694 case Op_MulReductionVF: 1695 case Op_MulReductionVD: 1696 if (UseSSE < 1) // requires at least SSE 1697 ret_value = false; 1698 break; 1699 case Op_SqrtVD: 1700 if (UseAVX < 1) // enabled for AVX only 1701 ret_value = false; 1702 break; 1703 case Op_CompareAndSwapL: 1704 #ifdef _LP64 1705 case Op_CompareAndSwapP: 1706 #endif 1707 if (!VM_Version::supports_cx8()) 1708 ret_value = false; 1709 break; 1710 case Op_CMoveVD: 1711 if (UseAVX < 1 || UseAVX > 2) 1712 ret_value = false; 1713 break; 1714 } 1715 1716 return ret_value; // Per default match rules are supported. 1717 } 1718 1719 const bool Matcher::match_rule_supported_vector(int opcode, int vlen) { 1720 // identify extra cases that we might want to provide match rules for 1721 // e.g. Op_ vector nodes and other intrinsics while guarding with vlen 1722 bool ret_value = match_rule_supported(opcode); 1723 if (ret_value) { 1724 switch (opcode) { 1725 case Op_AddVB: 1726 case Op_SubVB: 1727 if ((vlen == 64) && (VM_Version::supports_avx512bw() == false)) 1728 ret_value = false; 1729 break; 1730 case Op_URShiftVS: 1731 case Op_RShiftVS: 1732 case Op_LShiftVS: 1733 case Op_MulVS: 1734 case Op_AddVS: 1735 case Op_SubVS: 1736 if ((vlen == 32) && (VM_Version::supports_avx512bw() == false)) 1737 ret_value = false; 1738 break; 1739 case Op_CMoveVD: 1740 if (vlen != 4) 1741 ret_value = false; 1742 break; 1743 } 1744 } 1745 1746 return ret_value; // Per default match rules are supported. 1747 } 1748 1749 const int Matcher::float_pressure(int default_pressure_threshold) { 1750 int float_pressure_threshold = default_pressure_threshold; 1751 #ifdef _LP64 1752 if (UseAVX > 2) { 1753 // Increase pressure threshold on machines with AVX3 which have 1754 // 2x more XMM registers. 1755 float_pressure_threshold = default_pressure_threshold * 2; 1756 } 1757 #endif 1758 return float_pressure_threshold; 1759 } 1760 1761 // Max vector size in bytes. 0 if not supported. 1762 const int Matcher::vector_width_in_bytes(BasicType bt) { 1763 assert(is_java_primitive(bt), "only primitive type vectors"); 1764 if (UseSSE < 2) return 0; 1765 // SSE2 supports 128bit vectors for all types. 1766 // AVX2 supports 256bit vectors for all types. 1767 // AVX2/EVEX supports 512bit vectors for all types. 1768 int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16; 1769 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE. 1770 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE)) 1771 size = (UseAVX > 2) ? 64 : 32; 1772 // Use flag to limit vector size. 1773 size = MIN2(size,(int)MaxVectorSize); 1774 // Minimum 2 values in vector (or 4 for bytes). 1775 switch (bt) { 1776 case T_DOUBLE: 1777 case T_LONG: 1778 if (size < 16) return 0; 1779 break; 1780 case T_FLOAT: 1781 case T_INT: 1782 if (size < 8) return 0; 1783 break; 1784 case T_BOOLEAN: 1785 if (size < 4) return 0; 1786 break; 1787 case T_CHAR: 1788 if (size < 4) return 0; 1789 break; 1790 case T_BYTE: 1791 if (size < 4) return 0; 1792 break; 1793 case T_SHORT: 1794 if (size < 4) return 0; 1795 break; 1796 default: 1797 ShouldNotReachHere(); 1798 } 1799 return size; 1800 } 1801 1802 // Limits on vector size (number of elements) loaded into vector. 1803 const int Matcher::max_vector_size(const BasicType bt) { 1804 return vector_width_in_bytes(bt)/type2aelembytes(bt); 1805 } 1806 const int Matcher::min_vector_size(const BasicType bt) { 1807 int max_size = max_vector_size(bt); 1808 // Min size which can be loaded into vector is 4 bytes. 1809 int size = (type2aelembytes(bt) == 1) ? 4 : 2; 1810 return MIN2(size,max_size); 1811 } 1812 1813 // Vector ideal reg corresponding to specidied size in bytes 1814 const int Matcher::vector_ideal_reg(int size) { 1815 assert(MaxVectorSize >= size, ""); 1816 switch(size) { 1817 case 4: return Op_VecS; 1818 case 8: return Op_VecD; 1819 case 16: return Op_VecX; 1820 case 32: return Op_VecY; 1821 case 64: return Op_VecZ; 1822 } 1823 ShouldNotReachHere(); 1824 return 0; 1825 } 1826 1827 // Only lowest bits of xmm reg are used for vector shift count. 1828 const int Matcher::vector_shift_count_ideal_reg(int size) { 1829 return Op_VecS; 1830 } 1831 1832 // x86 supports misaligned vectors store/load. 1833 const bool Matcher::misaligned_vectors_ok() { 1834 return !AlignVector; // can be changed by flag 1835 } 1836 1837 // x86 AES instructions are compatible with SunJCE expanded 1838 // keys, hence we do not need to pass the original key to stubs 1839 const bool Matcher::pass_original_key_for_aes() { 1840 return false; 1841 } 1842 1843 // Helper methods for MachSpillCopyNode::implementation(). 1844 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 1845 int src_hi, int dst_hi, uint ireg, outputStream* st) { 1846 // In 64-bit VM size calculation is very complex. Emitting instructions 1847 // into scratch buffer is used to get size in 64-bit VM. 1848 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 1849 assert(ireg == Op_VecS || // 32bit vector 1850 (src_lo & 1) == 0 && (src_lo + 1) == src_hi && 1851 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi, 1852 "no non-adjacent vector moves" ); 1853 if (cbuf) { 1854 MacroAssembler _masm(cbuf); 1855 int offset = __ offset(); 1856 switch (ireg) { 1857 case Op_VecS: // copy whole register 1858 case Op_VecD: 1859 case Op_VecX: 1860 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1861 break; 1862 case Op_VecY: 1863 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1864 break; 1865 case Op_VecZ: 1866 __ evmovdqul(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2); 1867 break; 1868 default: 1869 ShouldNotReachHere(); 1870 } 1871 int size = __ offset() - offset; 1872 #ifdef ASSERT 1873 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1874 assert(!do_size || size == 4, "incorrect size calculattion"); 1875 #endif 1876 return size; 1877 #ifndef PRODUCT 1878 } else if (!do_size) { 1879 switch (ireg) { 1880 case Op_VecS: 1881 case Op_VecD: 1882 case Op_VecX: 1883 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 1884 break; 1885 case Op_VecY: 1886 case Op_VecZ: 1887 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 1888 break; 1889 default: 1890 ShouldNotReachHere(); 1891 } 1892 #endif 1893 } 1894 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 1895 return (UseAVX > 2) ? 6 : 4; 1896 } 1897 1898 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, 1899 int stack_offset, int reg, uint ireg, outputStream* st) { 1900 // In 64-bit VM size calculation is very complex. Emitting instructions 1901 // into scratch buffer is used to get size in 64-bit VM. 1902 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 1903 if (cbuf) { 1904 MacroAssembler _masm(cbuf); 1905 int offset = __ offset(); 1906 if (is_load) { 1907 switch (ireg) { 1908 case Op_VecS: 1909 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1910 break; 1911 case Op_VecD: 1912 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1913 break; 1914 case Op_VecX: 1915 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1916 break; 1917 case Op_VecY: 1918 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1919 break; 1920 case Op_VecZ: 1921 __ evmovdqul(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2); 1922 break; 1923 default: 1924 ShouldNotReachHere(); 1925 } 1926 } else { // store 1927 switch (ireg) { 1928 case Op_VecS: 1929 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1930 break; 1931 case Op_VecD: 1932 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1933 break; 1934 case Op_VecX: 1935 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1936 break; 1937 case Op_VecY: 1938 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1939 break; 1940 case Op_VecZ: 1941 __ evmovdqul(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2); 1942 break; 1943 default: 1944 ShouldNotReachHere(); 1945 } 1946 } 1947 int size = __ offset() - offset; 1948 #ifdef ASSERT 1949 int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : (UseAVX > 2) ? 6 : 4); 1950 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1951 assert(!do_size || size == (5+offset_size), "incorrect size calculattion"); 1952 #endif 1953 return size; 1954 #ifndef PRODUCT 1955 } else if (!do_size) { 1956 if (is_load) { 1957 switch (ireg) { 1958 case Op_VecS: 1959 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1960 break; 1961 case Op_VecD: 1962 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1963 break; 1964 case Op_VecX: 1965 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1966 break; 1967 case Op_VecY: 1968 case Op_VecZ: 1969 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1970 break; 1971 default: 1972 ShouldNotReachHere(); 1973 } 1974 } else { // store 1975 switch (ireg) { 1976 case Op_VecS: 1977 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1978 break; 1979 case Op_VecD: 1980 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1981 break; 1982 case Op_VecX: 1983 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1984 break; 1985 case Op_VecY: 1986 case Op_VecZ: 1987 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1988 break; 1989 default: 1990 ShouldNotReachHere(); 1991 } 1992 } 1993 #endif 1994 } 1995 bool is_single_byte = false; 1996 int vec_len = 0; 1997 if ((UseAVX > 2) && (stack_offset != 0)) { 1998 int tuple_type = Assembler::EVEX_FVM; 1999 int input_size = Assembler::EVEX_32bit; 2000 switch (ireg) { 2001 case Op_VecS: 2002 tuple_type = Assembler::EVEX_T1S; 2003 break; 2004 case Op_VecD: 2005 tuple_type = Assembler::EVEX_T1S; 2006 input_size = Assembler::EVEX_64bit; 2007 break; 2008 case Op_VecX: 2009 break; 2010 case Op_VecY: 2011 vec_len = 1; 2012 break; 2013 case Op_VecZ: 2014 vec_len = 2; 2015 break; 2016 } 2017 is_single_byte = Assembler::query_compressed_disp_byte(stack_offset, true, vec_len, tuple_type, input_size, 0); 2018 } 2019 int offset_size = 0; 2020 int size = 5; 2021 if (UseAVX > 2 ) { 2022 if (VM_Version::supports_avx512novl() && (vec_len == 2)) { 2023 offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 2024 size += 2; // Need an additional two bytes for EVEX encoding 2025 } else if (VM_Version::supports_avx512novl() && (vec_len < 2)) { 2026 offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4); 2027 } else { 2028 offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 2029 size += 2; // Need an additional two bytes for EVEX encodding 2030 } 2031 } else { 2032 offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4); 2033 } 2034 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 2035 return size+offset_size; 2036 } 2037 2038 static inline jfloat replicate4_imm(int con, int width) { 2039 // Load a constant of "width" (in bytes) and replicate it to fill 32bit. 2040 assert(width == 1 || width == 2, "only byte or short types here"); 2041 int bit_width = width * 8; 2042 jint val = con; 2043 val &= (1 << bit_width) - 1; // mask off sign bits 2044 while(bit_width < 32) { 2045 val |= (val << bit_width); 2046 bit_width <<= 1; 2047 } 2048 jfloat fval = *((jfloat*) &val); // coerce to float type 2049 return fval; 2050 } 2051 2052 static inline jdouble replicate8_imm(int con, int width) { 2053 // Load a constant of "width" (in bytes) and replicate it to fill 64bit. 2054 assert(width == 1 || width == 2 || width == 4, "only byte, short or int types here"); 2055 int bit_width = width * 8; 2056 jlong val = con; 2057 val &= (((jlong) 1) << bit_width) - 1; // mask off sign bits 2058 while(bit_width < 64) { 2059 val |= (val << bit_width); 2060 bit_width <<= 1; 2061 } 2062 jdouble dval = *((jdouble*) &val); // coerce to double type 2063 return dval; 2064 } 2065 2066 #ifndef PRODUCT 2067 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const { 2068 st->print("nop \t# %d bytes pad for loops and calls", _count); 2069 } 2070 #endif 2071 2072 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const { 2073 MacroAssembler _masm(&cbuf); 2074 __ nop(_count); 2075 } 2076 2077 uint MachNopNode::size(PhaseRegAlloc*) const { 2078 return _count; 2079 } 2080 2081 #ifndef PRODUCT 2082 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const { 2083 st->print("# breakpoint"); 2084 } 2085 #endif 2086 2087 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const { 2088 MacroAssembler _masm(&cbuf); 2089 __ int3(); 2090 } 2091 2092 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const { 2093 return MachNode::size(ra_); 2094 } 2095 2096 %} 2097 2098 encode %{ 2099 2100 enc_class call_epilog %{ 2101 if (VerifyStackAtCalls) { 2102 // Check that stack depth is unchanged: find majik cookie on stack 2103 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word)); 2104 MacroAssembler _masm(&cbuf); 2105 Label L; 2106 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d); 2107 __ jccb(Assembler::equal, L); 2108 // Die if stack mismatch 2109 __ int3(); 2110 __ bind(L); 2111 } 2112 %} 2113 2114 %} 2115 2116 2117 //----------OPERANDS----------------------------------------------------------- 2118 // Operand definitions must precede instruction definitions for correct parsing 2119 // in the ADLC because operands constitute user defined types which are used in 2120 // instruction definitions. 2121 2122 // This one generically applies only for evex, so only one version 2123 operand vecZ() %{ 2124 constraint(ALLOC_IN_RC(vectorz_reg)); 2125 match(VecZ); 2126 2127 format %{ %} 2128 interface(REG_INTER); 2129 %} 2130 2131 // Comparison Code for FP conditional move 2132 operand cmpOp_vcmppd() %{ 2133 match(Bool); 2134 2135 predicate(n->as_Bool()->_test._test != BoolTest::overflow && 2136 n->as_Bool()->_test._test != BoolTest::no_overflow); 2137 format %{ "" %} 2138 interface(COND_INTER) %{ 2139 equal (0x0, "eq"); 2140 less (0x1, "lt"); 2141 less_equal (0x2, "le"); 2142 not_equal (0xC, "ne"); 2143 greater_equal(0xD, "ge"); 2144 greater (0xE, "gt"); 2145 //TODO cannot compile (adlc breaks) without two next lines with error: 2146 // x86_64.ad(13987) Syntax Error: :In operand cmpOp_vcmppd: Do not support this encode constant: ' %{ 2147 // equal' for overflow. 2148 overflow (0x20, "o"); // not really supported by the instruction 2149 no_overflow (0x21, "no"); // not really supported by the instruction 2150 %} 2151 %} 2152 2153 2154 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit) 2155 2156 // ============================================================================ 2157 2158 instruct ShouldNotReachHere() %{ 2159 match(Halt); 2160 format %{ "int3\t# ShouldNotReachHere" %} 2161 ins_encode %{ 2162 __ int3(); 2163 %} 2164 ins_pipe(pipe_slow); 2165 %} 2166 2167 // ============================================================================ 2168 2169 instruct addF_reg(regF dst, regF src) %{ 2170 predicate((UseSSE>=1) && (UseAVX == 0)); 2171 match(Set dst (AddF dst src)); 2172 2173 format %{ "addss $dst, $src" %} 2174 ins_cost(150); 2175 ins_encode %{ 2176 __ addss($dst$$XMMRegister, $src$$XMMRegister); 2177 %} 2178 ins_pipe(pipe_slow); 2179 %} 2180 2181 instruct addF_mem(regF dst, memory src) %{ 2182 predicate((UseSSE>=1) && (UseAVX == 0)); 2183 match(Set dst (AddF dst (LoadF src))); 2184 2185 format %{ "addss $dst, $src" %} 2186 ins_cost(150); 2187 ins_encode %{ 2188 __ addss($dst$$XMMRegister, $src$$Address); 2189 %} 2190 ins_pipe(pipe_slow); 2191 %} 2192 2193 instruct addF_imm(regF dst, immF con) %{ 2194 predicate((UseSSE>=1) && (UseAVX == 0)); 2195 match(Set dst (AddF dst con)); 2196 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2197 ins_cost(150); 2198 ins_encode %{ 2199 __ addss($dst$$XMMRegister, $constantaddress($con)); 2200 %} 2201 ins_pipe(pipe_slow); 2202 %} 2203 2204 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{ 2205 predicate(UseAVX > 0); 2206 match(Set dst (AddF src1 src2)); 2207 2208 format %{ "vaddss $dst, $src1, $src2" %} 2209 ins_cost(150); 2210 ins_encode %{ 2211 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2212 %} 2213 ins_pipe(pipe_slow); 2214 %} 2215 2216 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{ 2217 predicate(UseAVX > 0); 2218 match(Set dst (AddF src1 (LoadF src2))); 2219 2220 format %{ "vaddss $dst, $src1, $src2" %} 2221 ins_cost(150); 2222 ins_encode %{ 2223 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2224 %} 2225 ins_pipe(pipe_slow); 2226 %} 2227 2228 instruct addF_reg_imm(regF dst, regF src, immF con) %{ 2229 predicate(UseAVX > 0); 2230 match(Set dst (AddF src con)); 2231 2232 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2233 ins_cost(150); 2234 ins_encode %{ 2235 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2236 %} 2237 ins_pipe(pipe_slow); 2238 %} 2239 2240 instruct addD_reg(regD dst, regD src) %{ 2241 predicate((UseSSE>=2) && (UseAVX == 0)); 2242 match(Set dst (AddD dst src)); 2243 2244 format %{ "addsd $dst, $src" %} 2245 ins_cost(150); 2246 ins_encode %{ 2247 __ addsd($dst$$XMMRegister, $src$$XMMRegister); 2248 %} 2249 ins_pipe(pipe_slow); 2250 %} 2251 2252 instruct addD_mem(regD dst, memory src) %{ 2253 predicate((UseSSE>=2) && (UseAVX == 0)); 2254 match(Set dst (AddD dst (LoadD src))); 2255 2256 format %{ "addsd $dst, $src" %} 2257 ins_cost(150); 2258 ins_encode %{ 2259 __ addsd($dst$$XMMRegister, $src$$Address); 2260 %} 2261 ins_pipe(pipe_slow); 2262 %} 2263 2264 instruct addD_imm(regD dst, immD con) %{ 2265 predicate((UseSSE>=2) && (UseAVX == 0)); 2266 match(Set dst (AddD dst con)); 2267 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2268 ins_cost(150); 2269 ins_encode %{ 2270 __ addsd($dst$$XMMRegister, $constantaddress($con)); 2271 %} 2272 ins_pipe(pipe_slow); 2273 %} 2274 2275 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{ 2276 predicate(UseAVX > 0); 2277 match(Set dst (AddD src1 src2)); 2278 2279 format %{ "vaddsd $dst, $src1, $src2" %} 2280 ins_cost(150); 2281 ins_encode %{ 2282 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2283 %} 2284 ins_pipe(pipe_slow); 2285 %} 2286 2287 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{ 2288 predicate(UseAVX > 0); 2289 match(Set dst (AddD src1 (LoadD src2))); 2290 2291 format %{ "vaddsd $dst, $src1, $src2" %} 2292 ins_cost(150); 2293 ins_encode %{ 2294 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2295 %} 2296 ins_pipe(pipe_slow); 2297 %} 2298 2299 instruct addD_reg_imm(regD dst, regD src, immD con) %{ 2300 predicate(UseAVX > 0); 2301 match(Set dst (AddD src con)); 2302 2303 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2304 ins_cost(150); 2305 ins_encode %{ 2306 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2307 %} 2308 ins_pipe(pipe_slow); 2309 %} 2310 2311 instruct subF_reg(regF dst, regF src) %{ 2312 predicate((UseSSE>=1) && (UseAVX == 0)); 2313 match(Set dst (SubF dst src)); 2314 2315 format %{ "subss $dst, $src" %} 2316 ins_cost(150); 2317 ins_encode %{ 2318 __ subss($dst$$XMMRegister, $src$$XMMRegister); 2319 %} 2320 ins_pipe(pipe_slow); 2321 %} 2322 2323 instruct subF_mem(regF dst, memory src) %{ 2324 predicate((UseSSE>=1) && (UseAVX == 0)); 2325 match(Set dst (SubF dst (LoadF src))); 2326 2327 format %{ "subss $dst, $src" %} 2328 ins_cost(150); 2329 ins_encode %{ 2330 __ subss($dst$$XMMRegister, $src$$Address); 2331 %} 2332 ins_pipe(pipe_slow); 2333 %} 2334 2335 instruct subF_imm(regF dst, immF con) %{ 2336 predicate((UseSSE>=1) && (UseAVX == 0)); 2337 match(Set dst (SubF dst con)); 2338 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2339 ins_cost(150); 2340 ins_encode %{ 2341 __ subss($dst$$XMMRegister, $constantaddress($con)); 2342 %} 2343 ins_pipe(pipe_slow); 2344 %} 2345 2346 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{ 2347 predicate(UseAVX > 0); 2348 match(Set dst (SubF src1 src2)); 2349 2350 format %{ "vsubss $dst, $src1, $src2" %} 2351 ins_cost(150); 2352 ins_encode %{ 2353 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2354 %} 2355 ins_pipe(pipe_slow); 2356 %} 2357 2358 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{ 2359 predicate(UseAVX > 0); 2360 match(Set dst (SubF src1 (LoadF src2))); 2361 2362 format %{ "vsubss $dst, $src1, $src2" %} 2363 ins_cost(150); 2364 ins_encode %{ 2365 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2366 %} 2367 ins_pipe(pipe_slow); 2368 %} 2369 2370 instruct subF_reg_imm(regF dst, regF src, immF con) %{ 2371 predicate(UseAVX > 0); 2372 match(Set dst (SubF src con)); 2373 2374 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2375 ins_cost(150); 2376 ins_encode %{ 2377 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2378 %} 2379 ins_pipe(pipe_slow); 2380 %} 2381 2382 instruct subD_reg(regD dst, regD src) %{ 2383 predicate((UseSSE>=2) && (UseAVX == 0)); 2384 match(Set dst (SubD dst src)); 2385 2386 format %{ "subsd $dst, $src" %} 2387 ins_cost(150); 2388 ins_encode %{ 2389 __ subsd($dst$$XMMRegister, $src$$XMMRegister); 2390 %} 2391 ins_pipe(pipe_slow); 2392 %} 2393 2394 instruct subD_mem(regD dst, memory src) %{ 2395 predicate((UseSSE>=2) && (UseAVX == 0)); 2396 match(Set dst (SubD dst (LoadD src))); 2397 2398 format %{ "subsd $dst, $src" %} 2399 ins_cost(150); 2400 ins_encode %{ 2401 __ subsd($dst$$XMMRegister, $src$$Address); 2402 %} 2403 ins_pipe(pipe_slow); 2404 %} 2405 2406 instruct subD_imm(regD dst, immD con) %{ 2407 predicate((UseSSE>=2) && (UseAVX == 0)); 2408 match(Set dst (SubD dst con)); 2409 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2410 ins_cost(150); 2411 ins_encode %{ 2412 __ subsd($dst$$XMMRegister, $constantaddress($con)); 2413 %} 2414 ins_pipe(pipe_slow); 2415 %} 2416 2417 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{ 2418 predicate(UseAVX > 0); 2419 match(Set dst (SubD src1 src2)); 2420 2421 format %{ "vsubsd $dst, $src1, $src2" %} 2422 ins_cost(150); 2423 ins_encode %{ 2424 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2425 %} 2426 ins_pipe(pipe_slow); 2427 %} 2428 2429 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{ 2430 predicate(UseAVX > 0); 2431 match(Set dst (SubD src1 (LoadD src2))); 2432 2433 format %{ "vsubsd $dst, $src1, $src2" %} 2434 ins_cost(150); 2435 ins_encode %{ 2436 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2437 %} 2438 ins_pipe(pipe_slow); 2439 %} 2440 2441 instruct subD_reg_imm(regD dst, regD src, immD con) %{ 2442 predicate(UseAVX > 0); 2443 match(Set dst (SubD src con)); 2444 2445 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2446 ins_cost(150); 2447 ins_encode %{ 2448 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2449 %} 2450 ins_pipe(pipe_slow); 2451 %} 2452 2453 instruct mulF_reg(regF dst, regF src) %{ 2454 predicate((UseSSE>=1) && (UseAVX == 0)); 2455 match(Set dst (MulF dst src)); 2456 2457 format %{ "mulss $dst, $src" %} 2458 ins_cost(150); 2459 ins_encode %{ 2460 __ mulss($dst$$XMMRegister, $src$$XMMRegister); 2461 %} 2462 ins_pipe(pipe_slow); 2463 %} 2464 2465 instruct mulF_mem(regF dst, memory src) %{ 2466 predicate((UseSSE>=1) && (UseAVX == 0)); 2467 match(Set dst (MulF dst (LoadF src))); 2468 2469 format %{ "mulss $dst, $src" %} 2470 ins_cost(150); 2471 ins_encode %{ 2472 __ mulss($dst$$XMMRegister, $src$$Address); 2473 %} 2474 ins_pipe(pipe_slow); 2475 %} 2476 2477 instruct mulF_imm(regF dst, immF con) %{ 2478 predicate((UseSSE>=1) && (UseAVX == 0)); 2479 match(Set dst (MulF dst con)); 2480 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2481 ins_cost(150); 2482 ins_encode %{ 2483 __ mulss($dst$$XMMRegister, $constantaddress($con)); 2484 %} 2485 ins_pipe(pipe_slow); 2486 %} 2487 2488 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{ 2489 predicate(UseAVX > 0); 2490 match(Set dst (MulF src1 src2)); 2491 2492 format %{ "vmulss $dst, $src1, $src2" %} 2493 ins_cost(150); 2494 ins_encode %{ 2495 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2496 %} 2497 ins_pipe(pipe_slow); 2498 %} 2499 2500 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{ 2501 predicate(UseAVX > 0); 2502 match(Set dst (MulF src1 (LoadF src2))); 2503 2504 format %{ "vmulss $dst, $src1, $src2" %} 2505 ins_cost(150); 2506 ins_encode %{ 2507 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2508 %} 2509 ins_pipe(pipe_slow); 2510 %} 2511 2512 instruct mulF_reg_imm(regF dst, regF src, immF con) %{ 2513 predicate(UseAVX > 0); 2514 match(Set dst (MulF src con)); 2515 2516 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2517 ins_cost(150); 2518 ins_encode %{ 2519 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2520 %} 2521 ins_pipe(pipe_slow); 2522 %} 2523 2524 instruct mulD_reg(regD dst, regD src) %{ 2525 predicate((UseSSE>=2) && (UseAVX == 0)); 2526 match(Set dst (MulD dst src)); 2527 2528 format %{ "mulsd $dst, $src" %} 2529 ins_cost(150); 2530 ins_encode %{ 2531 __ mulsd($dst$$XMMRegister, $src$$XMMRegister); 2532 %} 2533 ins_pipe(pipe_slow); 2534 %} 2535 2536 instruct mulD_mem(regD dst, memory src) %{ 2537 predicate((UseSSE>=2) && (UseAVX == 0)); 2538 match(Set dst (MulD dst (LoadD src))); 2539 2540 format %{ "mulsd $dst, $src" %} 2541 ins_cost(150); 2542 ins_encode %{ 2543 __ mulsd($dst$$XMMRegister, $src$$Address); 2544 %} 2545 ins_pipe(pipe_slow); 2546 %} 2547 2548 instruct mulD_imm(regD dst, immD con) %{ 2549 predicate((UseSSE>=2) && (UseAVX == 0)); 2550 match(Set dst (MulD dst con)); 2551 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2552 ins_cost(150); 2553 ins_encode %{ 2554 __ mulsd($dst$$XMMRegister, $constantaddress($con)); 2555 %} 2556 ins_pipe(pipe_slow); 2557 %} 2558 2559 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{ 2560 predicate(UseAVX > 0); 2561 match(Set dst (MulD src1 src2)); 2562 2563 format %{ "vmulsd $dst, $src1, $src2" %} 2564 ins_cost(150); 2565 ins_encode %{ 2566 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2567 %} 2568 ins_pipe(pipe_slow); 2569 %} 2570 2571 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{ 2572 predicate(UseAVX > 0); 2573 match(Set dst (MulD src1 (LoadD src2))); 2574 2575 format %{ "vmulsd $dst, $src1, $src2" %} 2576 ins_cost(150); 2577 ins_encode %{ 2578 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2579 %} 2580 ins_pipe(pipe_slow); 2581 %} 2582 2583 instruct mulD_reg_imm(regD dst, regD src, immD con) %{ 2584 predicate(UseAVX > 0); 2585 match(Set dst (MulD src con)); 2586 2587 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2588 ins_cost(150); 2589 ins_encode %{ 2590 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2591 %} 2592 ins_pipe(pipe_slow); 2593 %} 2594 2595 instruct divF_reg(regF dst, regF src) %{ 2596 predicate((UseSSE>=1) && (UseAVX == 0)); 2597 match(Set dst (DivF dst src)); 2598 2599 format %{ "divss $dst, $src" %} 2600 ins_cost(150); 2601 ins_encode %{ 2602 __ divss($dst$$XMMRegister, $src$$XMMRegister); 2603 %} 2604 ins_pipe(pipe_slow); 2605 %} 2606 2607 instruct divF_mem(regF dst, memory src) %{ 2608 predicate((UseSSE>=1) && (UseAVX == 0)); 2609 match(Set dst (DivF dst (LoadF src))); 2610 2611 format %{ "divss $dst, $src" %} 2612 ins_cost(150); 2613 ins_encode %{ 2614 __ divss($dst$$XMMRegister, $src$$Address); 2615 %} 2616 ins_pipe(pipe_slow); 2617 %} 2618 2619 instruct divF_imm(regF dst, immF con) %{ 2620 predicate((UseSSE>=1) && (UseAVX == 0)); 2621 match(Set dst (DivF dst con)); 2622 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2623 ins_cost(150); 2624 ins_encode %{ 2625 __ divss($dst$$XMMRegister, $constantaddress($con)); 2626 %} 2627 ins_pipe(pipe_slow); 2628 %} 2629 2630 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{ 2631 predicate(UseAVX > 0); 2632 match(Set dst (DivF src1 src2)); 2633 2634 format %{ "vdivss $dst, $src1, $src2" %} 2635 ins_cost(150); 2636 ins_encode %{ 2637 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2638 %} 2639 ins_pipe(pipe_slow); 2640 %} 2641 2642 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{ 2643 predicate(UseAVX > 0); 2644 match(Set dst (DivF src1 (LoadF src2))); 2645 2646 format %{ "vdivss $dst, $src1, $src2" %} 2647 ins_cost(150); 2648 ins_encode %{ 2649 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2650 %} 2651 ins_pipe(pipe_slow); 2652 %} 2653 2654 instruct divF_reg_imm(regF dst, regF src, immF con) %{ 2655 predicate(UseAVX > 0); 2656 match(Set dst (DivF src con)); 2657 2658 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2659 ins_cost(150); 2660 ins_encode %{ 2661 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2662 %} 2663 ins_pipe(pipe_slow); 2664 %} 2665 2666 instruct divD_reg(regD dst, regD src) %{ 2667 predicate((UseSSE>=2) && (UseAVX == 0)); 2668 match(Set dst (DivD dst src)); 2669 2670 format %{ "divsd $dst, $src" %} 2671 ins_cost(150); 2672 ins_encode %{ 2673 __ divsd($dst$$XMMRegister, $src$$XMMRegister); 2674 %} 2675 ins_pipe(pipe_slow); 2676 %} 2677 2678 instruct divD_mem(regD dst, memory src) %{ 2679 predicate((UseSSE>=2) && (UseAVX == 0)); 2680 match(Set dst (DivD dst (LoadD src))); 2681 2682 format %{ "divsd $dst, $src" %} 2683 ins_cost(150); 2684 ins_encode %{ 2685 __ divsd($dst$$XMMRegister, $src$$Address); 2686 %} 2687 ins_pipe(pipe_slow); 2688 %} 2689 2690 instruct divD_imm(regD dst, immD con) %{ 2691 predicate((UseSSE>=2) && (UseAVX == 0)); 2692 match(Set dst (DivD dst con)); 2693 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2694 ins_cost(150); 2695 ins_encode %{ 2696 __ divsd($dst$$XMMRegister, $constantaddress($con)); 2697 %} 2698 ins_pipe(pipe_slow); 2699 %} 2700 2701 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{ 2702 predicate(UseAVX > 0); 2703 match(Set dst (DivD src1 src2)); 2704 2705 format %{ "vdivsd $dst, $src1, $src2" %} 2706 ins_cost(150); 2707 ins_encode %{ 2708 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2709 %} 2710 ins_pipe(pipe_slow); 2711 %} 2712 2713 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{ 2714 predicate(UseAVX > 0); 2715 match(Set dst (DivD src1 (LoadD src2))); 2716 2717 format %{ "vdivsd $dst, $src1, $src2" %} 2718 ins_cost(150); 2719 ins_encode %{ 2720 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2721 %} 2722 ins_pipe(pipe_slow); 2723 %} 2724 2725 instruct divD_reg_imm(regD dst, regD src, immD con) %{ 2726 predicate(UseAVX > 0); 2727 match(Set dst (DivD src con)); 2728 2729 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2730 ins_cost(150); 2731 ins_encode %{ 2732 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2733 %} 2734 ins_pipe(pipe_slow); 2735 %} 2736 2737 instruct absF_reg(regF dst) %{ 2738 predicate((UseSSE>=1) && (UseAVX == 0)); 2739 match(Set dst (AbsF dst)); 2740 ins_cost(150); 2741 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %} 2742 ins_encode %{ 2743 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask())); 2744 %} 2745 ins_pipe(pipe_slow); 2746 %} 2747 2748 instruct absF_reg_reg(regF dst, regF src) %{ 2749 predicate(VM_Version::supports_avx256only()); 2750 match(Set dst (AbsF src)); 2751 ins_cost(150); 2752 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 2753 ins_encode %{ 2754 int vector_len = 0; 2755 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 2756 ExternalAddress(float_signmask()), vector_len); 2757 %} 2758 ins_pipe(pipe_slow); 2759 %} 2760 2761 #ifdef _LP64 2762 instruct absF_reg_reg_evex(regF dst, regF src) %{ 2763 predicate(UseAVX > 2 && VM_Version::supports_avx512vl()); 2764 match(Set dst (AbsF src)); 2765 ins_cost(150); 2766 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 2767 ins_encode %{ 2768 int vector_len = 0; 2769 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 2770 ExternalAddress(float_signmask()), vector_len); 2771 %} 2772 ins_pipe(pipe_slow); 2773 %} 2774 2775 instruct absF_reg_reg_evex_special(regF dst, regF src1, regF src2) %{ 2776 predicate(VM_Version::supports_avx512novl()); 2777 match(Set dst (AbsF src1)); 2778 effect(TEMP src2); 2779 ins_cost(150); 2780 format %{ "vabsss $dst, $src1, $src2, [0x7fffffff]\t# abs float by sign masking" %} 2781 ins_encode %{ 2782 int vector_len = 0; 2783 __ vabsss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 2784 ExternalAddress(float_signmask()), vector_len); 2785 %} 2786 ins_pipe(pipe_slow); 2787 %} 2788 #else // _LP64 2789 instruct absF_reg_reg_evex(regF dst, regF src) %{ 2790 predicate(UseAVX > 2); 2791 match(Set dst (AbsF src)); 2792 ins_cost(150); 2793 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 2794 ins_encode %{ 2795 int vector_len = 0; 2796 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 2797 ExternalAddress(float_signmask()), vector_len); 2798 %} 2799 ins_pipe(pipe_slow); 2800 %} 2801 #endif 2802 2803 instruct absD_reg(regD dst) %{ 2804 predicate((UseSSE>=2) && (UseAVX == 0)); 2805 match(Set dst (AbsD dst)); 2806 ins_cost(150); 2807 format %{ "andpd $dst, [0x7fffffffffffffff]\t" 2808 "# abs double by sign masking" %} 2809 ins_encode %{ 2810 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask())); 2811 %} 2812 ins_pipe(pipe_slow); 2813 %} 2814 2815 instruct absD_reg_reg(regD dst, regD src) %{ 2816 predicate(VM_Version::supports_avx256only()); 2817 match(Set dst (AbsD src)); 2818 ins_cost(150); 2819 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 2820 "# abs double by sign masking" %} 2821 ins_encode %{ 2822 int vector_len = 0; 2823 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 2824 ExternalAddress(double_signmask()), vector_len); 2825 %} 2826 ins_pipe(pipe_slow); 2827 %} 2828 2829 #ifdef _LP64 2830 instruct absD_reg_reg_evex(regD dst, regD src) %{ 2831 predicate(UseAVX > 2 && VM_Version::supports_avx512vl()); 2832 match(Set dst (AbsD src)); 2833 ins_cost(150); 2834 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 2835 "# abs double by sign masking" %} 2836 ins_encode %{ 2837 int vector_len = 0; 2838 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 2839 ExternalAddress(double_signmask()), vector_len); 2840 %} 2841 ins_pipe(pipe_slow); 2842 %} 2843 2844 instruct absD_reg_reg_evex_special(regD dst, regD src1, regD src2) %{ 2845 predicate(VM_Version::supports_avx512novl()); 2846 match(Set dst (AbsD src1)); 2847 effect(TEMP src2); 2848 ins_cost(150); 2849 format %{ "vabssd $dst, $src1, $src2, [0x7fffffffffffffff]\t# abs float by sign masking" %} 2850 ins_encode %{ 2851 int vector_len = 0; 2852 __ vabssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 2853 ExternalAddress(double_signmask()), vector_len); 2854 %} 2855 ins_pipe(pipe_slow); 2856 %} 2857 #else // _LP64 2858 instruct absD_reg_reg_evex(regD dst, regD src) %{ 2859 predicate(UseAVX > 2); 2860 match(Set dst (AbsD src)); 2861 ins_cost(150); 2862 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 2863 "# abs double by sign masking" %} 2864 ins_encode %{ 2865 int vector_len = 0; 2866 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 2867 ExternalAddress(double_signmask()), vector_len); 2868 %} 2869 ins_pipe(pipe_slow); 2870 %} 2871 #endif 2872 2873 instruct negF_reg(regF dst) %{ 2874 predicate((UseSSE>=1) && (UseAVX == 0)); 2875 match(Set dst (NegF dst)); 2876 ins_cost(150); 2877 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %} 2878 ins_encode %{ 2879 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip())); 2880 %} 2881 ins_pipe(pipe_slow); 2882 %} 2883 2884 instruct negF_reg_reg(regF dst, regF src) %{ 2885 predicate(UseAVX > 0); 2886 match(Set dst (NegF src)); 2887 ins_cost(150); 2888 format %{ "vnegatess $dst, $src, [0x80000000]\t# neg float by sign flipping" %} 2889 ins_encode %{ 2890 __ vnegatess($dst$$XMMRegister, $src$$XMMRegister, 2891 ExternalAddress(float_signflip())); 2892 %} 2893 ins_pipe(pipe_slow); 2894 %} 2895 2896 instruct negD_reg(regD dst) %{ 2897 predicate((UseSSE>=2) && (UseAVX == 0)); 2898 match(Set dst (NegD dst)); 2899 ins_cost(150); 2900 format %{ "xorpd $dst, [0x8000000000000000]\t" 2901 "# neg double by sign flipping" %} 2902 ins_encode %{ 2903 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip())); 2904 %} 2905 ins_pipe(pipe_slow); 2906 %} 2907 2908 instruct negD_reg_reg(regD dst, regD src) %{ 2909 predicate(UseAVX > 0); 2910 match(Set dst (NegD src)); 2911 ins_cost(150); 2912 format %{ "vnegatess $dst, $src, [0x8000000000000000]\t" 2913 "# neg double by sign flipping" %} 2914 ins_encode %{ 2915 __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister, 2916 ExternalAddress(double_signflip())); 2917 %} 2918 ins_pipe(pipe_slow); 2919 %} 2920 2921 instruct sqrtF_reg(regF dst, regF src) %{ 2922 predicate(UseSSE>=1); 2923 match(Set dst (ConvD2F (SqrtD (ConvF2D src)))); 2924 2925 format %{ "sqrtss $dst, $src" %} 2926 ins_cost(150); 2927 ins_encode %{ 2928 __ sqrtss($dst$$XMMRegister, $src$$XMMRegister); 2929 %} 2930 ins_pipe(pipe_slow); 2931 %} 2932 2933 instruct sqrtF_mem(regF dst, memory src) %{ 2934 predicate(UseSSE>=1); 2935 match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF src))))); 2936 2937 format %{ "sqrtss $dst, $src" %} 2938 ins_cost(150); 2939 ins_encode %{ 2940 __ sqrtss($dst$$XMMRegister, $src$$Address); 2941 %} 2942 ins_pipe(pipe_slow); 2943 %} 2944 2945 instruct sqrtF_imm(regF dst, immF con) %{ 2946 predicate(UseSSE>=1); 2947 match(Set dst (ConvD2F (SqrtD (ConvF2D con)))); 2948 format %{ "sqrtss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2949 ins_cost(150); 2950 ins_encode %{ 2951 __ sqrtss($dst$$XMMRegister, $constantaddress($con)); 2952 %} 2953 ins_pipe(pipe_slow); 2954 %} 2955 2956 instruct sqrtD_reg(regD dst, regD src) %{ 2957 predicate(UseSSE>=2); 2958 match(Set dst (SqrtD src)); 2959 2960 format %{ "sqrtsd $dst, $src" %} 2961 ins_cost(150); 2962 ins_encode %{ 2963 __ sqrtsd($dst$$XMMRegister, $src$$XMMRegister); 2964 %} 2965 ins_pipe(pipe_slow); 2966 %} 2967 2968 instruct sqrtD_mem(regD dst, memory src) %{ 2969 predicate(UseSSE>=2); 2970 match(Set dst (SqrtD (LoadD src))); 2971 2972 format %{ "sqrtsd $dst, $src" %} 2973 ins_cost(150); 2974 ins_encode %{ 2975 __ sqrtsd($dst$$XMMRegister, $src$$Address); 2976 %} 2977 ins_pipe(pipe_slow); 2978 %} 2979 2980 instruct sqrtD_imm(regD dst, immD con) %{ 2981 predicate(UseSSE>=2); 2982 match(Set dst (SqrtD con)); 2983 format %{ "sqrtsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2984 ins_cost(150); 2985 ins_encode %{ 2986 __ sqrtsd($dst$$XMMRegister, $constantaddress($con)); 2987 %} 2988 ins_pipe(pipe_slow); 2989 %} 2990 2991 // ====================VECTOR INSTRUCTIONS===================================== 2992 2993 // Load vectors (4 bytes long) 2994 instruct loadV4(vecS dst, memory mem) %{ 2995 predicate(n->as_LoadVector()->memory_size() == 4); 2996 match(Set dst (LoadVector mem)); 2997 ins_cost(125); 2998 format %{ "movd $dst,$mem\t! load vector (4 bytes)" %} 2999 ins_encode %{ 3000 __ movdl($dst$$XMMRegister, $mem$$Address); 3001 %} 3002 ins_pipe( pipe_slow ); 3003 %} 3004 3005 // Load vectors (8 bytes long) 3006 instruct loadV8(vecD dst, memory mem) %{ 3007 predicate(n->as_LoadVector()->memory_size() == 8); 3008 match(Set dst (LoadVector mem)); 3009 ins_cost(125); 3010 format %{ "movq $dst,$mem\t! load vector (8 bytes)" %} 3011 ins_encode %{ 3012 __ movq($dst$$XMMRegister, $mem$$Address); 3013 %} 3014 ins_pipe( pipe_slow ); 3015 %} 3016 3017 // Load vectors (16 bytes long) 3018 instruct loadV16(vecX dst, memory mem) %{ 3019 predicate(n->as_LoadVector()->memory_size() == 16); 3020 match(Set dst (LoadVector mem)); 3021 ins_cost(125); 3022 format %{ "movdqu $dst,$mem\t! load vector (16 bytes)" %} 3023 ins_encode %{ 3024 __ movdqu($dst$$XMMRegister, $mem$$Address); 3025 %} 3026 ins_pipe( pipe_slow ); 3027 %} 3028 3029 // Load vectors (32 bytes long) 3030 instruct loadV32(vecY dst, memory mem) %{ 3031 predicate(n->as_LoadVector()->memory_size() == 32); 3032 match(Set dst (LoadVector mem)); 3033 ins_cost(125); 3034 format %{ "vmovdqu $dst,$mem\t! load vector (32 bytes)" %} 3035 ins_encode %{ 3036 __ vmovdqu($dst$$XMMRegister, $mem$$Address); 3037 %} 3038 ins_pipe( pipe_slow ); 3039 %} 3040 3041 // Load vectors (64 bytes long) 3042 instruct loadV64(vecZ dst, memory mem) %{ 3043 predicate(n->as_LoadVector()->memory_size() == 64); 3044 match(Set dst (LoadVector mem)); 3045 ins_cost(125); 3046 format %{ "vmovdqu $dst k0,$mem\t! load vector (64 bytes)" %} 3047 ins_encode %{ 3048 int vector_len = 2; 3049 __ evmovdqul($dst$$XMMRegister, $mem$$Address, vector_len); 3050 %} 3051 ins_pipe( pipe_slow ); 3052 %} 3053 3054 // Store vectors 3055 instruct storeV4(memory mem, vecS src) %{ 3056 predicate(n->as_StoreVector()->memory_size() == 4); 3057 match(Set mem (StoreVector mem src)); 3058 ins_cost(145); 3059 format %{ "movd $mem,$src\t! store vector (4 bytes)" %} 3060 ins_encode %{ 3061 __ movdl($mem$$Address, $src$$XMMRegister); 3062 %} 3063 ins_pipe( pipe_slow ); 3064 %} 3065 3066 instruct storeV8(memory mem, vecD src) %{ 3067 predicate(n->as_StoreVector()->memory_size() == 8); 3068 match(Set mem (StoreVector mem src)); 3069 ins_cost(145); 3070 format %{ "movq $mem,$src\t! store vector (8 bytes)" %} 3071 ins_encode %{ 3072 __ movq($mem$$Address, $src$$XMMRegister); 3073 %} 3074 ins_pipe( pipe_slow ); 3075 %} 3076 3077 instruct storeV16(memory mem, vecX src) %{ 3078 predicate(n->as_StoreVector()->memory_size() == 16); 3079 match(Set mem (StoreVector mem src)); 3080 ins_cost(145); 3081 format %{ "movdqu $mem,$src\t! store vector (16 bytes)" %} 3082 ins_encode %{ 3083 __ movdqu($mem$$Address, $src$$XMMRegister); 3084 %} 3085 ins_pipe( pipe_slow ); 3086 %} 3087 3088 instruct storeV32(memory mem, vecY src) %{ 3089 predicate(n->as_StoreVector()->memory_size() == 32); 3090 match(Set mem (StoreVector mem src)); 3091 ins_cost(145); 3092 format %{ "vmovdqu $mem,$src\t! store vector (32 bytes)" %} 3093 ins_encode %{ 3094 __ vmovdqu($mem$$Address, $src$$XMMRegister); 3095 %} 3096 ins_pipe( pipe_slow ); 3097 %} 3098 3099 instruct storeV64(memory mem, vecZ src) %{ 3100 predicate(n->as_StoreVector()->memory_size() == 64); 3101 match(Set mem (StoreVector mem src)); 3102 ins_cost(145); 3103 format %{ "vmovdqu $mem k0,$src\t! store vector (64 bytes)" %} 3104 ins_encode %{ 3105 int vector_len = 2; 3106 __ evmovdqul($mem$$Address, $src$$XMMRegister, vector_len); 3107 %} 3108 ins_pipe( pipe_slow ); 3109 %} 3110 3111 // ====================LEGACY REPLICATE======================================= 3112 3113 instruct Repl4B_mem(vecS dst, memory mem) %{ 3114 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3115 match(Set dst (ReplicateB (LoadB mem))); 3116 format %{ "punpcklbw $dst,$mem\n\t" 3117 "pshuflw $dst,$dst,0x00\t! replicate4B" %} 3118 ins_encode %{ 3119 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 3120 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3121 %} 3122 ins_pipe( pipe_slow ); 3123 %} 3124 3125 instruct Repl8B_mem(vecD dst, memory mem) %{ 3126 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3127 match(Set dst (ReplicateB (LoadB mem))); 3128 format %{ "punpcklbw $dst,$mem\n\t" 3129 "pshuflw $dst,$dst,0x00\t! replicate8B" %} 3130 ins_encode %{ 3131 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 3132 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3133 %} 3134 ins_pipe( pipe_slow ); 3135 %} 3136 3137 instruct Repl16B(vecX dst, rRegI src) %{ 3138 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3139 match(Set dst (ReplicateB src)); 3140 format %{ "movd $dst,$src\n\t" 3141 "punpcklbw $dst,$dst\n\t" 3142 "pshuflw $dst,$dst,0x00\n\t" 3143 "punpcklqdq $dst,$dst\t! replicate16B" %} 3144 ins_encode %{ 3145 __ movdl($dst$$XMMRegister, $src$$Register); 3146 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3147 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3148 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3149 %} 3150 ins_pipe( pipe_slow ); 3151 %} 3152 3153 instruct Repl16B_mem(vecX dst, memory mem) %{ 3154 predicate(n->as_Vector()->length() == 16 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3155 match(Set dst (ReplicateB (LoadB mem))); 3156 format %{ "punpcklbw $dst,$mem\n\t" 3157 "pshuflw $dst,$dst,0x00\n\t" 3158 "punpcklqdq $dst,$dst\t! replicate16B" %} 3159 ins_encode %{ 3160 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 3161 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3162 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3163 %} 3164 ins_pipe( pipe_slow ); 3165 %} 3166 3167 instruct Repl32B(vecY dst, rRegI src) %{ 3168 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3169 match(Set dst (ReplicateB src)); 3170 format %{ "movd $dst,$src\n\t" 3171 "punpcklbw $dst,$dst\n\t" 3172 "pshuflw $dst,$dst,0x00\n\t" 3173 "punpcklqdq $dst,$dst\n\t" 3174 "vinserti128h $dst,$dst,$dst\t! replicate32B" %} 3175 ins_encode %{ 3176 __ movdl($dst$$XMMRegister, $src$$Register); 3177 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3178 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3179 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3180 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3181 %} 3182 ins_pipe( pipe_slow ); 3183 %} 3184 3185 instruct Repl32B_mem(vecY dst, memory mem) %{ 3186 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3187 match(Set dst (ReplicateB (LoadB mem))); 3188 format %{ "punpcklbw $dst,$mem\n\t" 3189 "pshuflw $dst,$dst,0x00\n\t" 3190 "punpcklqdq $dst,$dst\n\t" 3191 "vinserti128h $dst,$dst,$dst\t! replicate32B" %} 3192 ins_encode %{ 3193 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 3194 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3195 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3196 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3197 %} 3198 ins_pipe( pipe_slow ); 3199 %} 3200 3201 instruct Repl16B_imm(vecX dst, immI con) %{ 3202 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3203 match(Set dst (ReplicateB con)); 3204 format %{ "movq $dst,[$constantaddress]\n\t" 3205 "punpcklqdq $dst,$dst\t! replicate16B($con)" %} 3206 ins_encode %{ 3207 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3208 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3209 %} 3210 ins_pipe( pipe_slow ); 3211 %} 3212 3213 instruct Repl32B_imm(vecY dst, immI con) %{ 3214 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3215 match(Set dst (ReplicateB con)); 3216 format %{ "movq $dst,[$constantaddress]\n\t" 3217 "punpcklqdq $dst,$dst\n\t" 3218 "vinserti128h $dst,$dst,$dst\t! lreplicate32B($con)" %} 3219 ins_encode %{ 3220 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3221 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3222 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3223 %} 3224 ins_pipe( pipe_slow ); 3225 %} 3226 3227 instruct Repl4S(vecD dst, rRegI src) %{ 3228 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vlbw()); 3229 match(Set dst (ReplicateS src)); 3230 format %{ "movd $dst,$src\n\t" 3231 "pshuflw $dst,$dst,0x00\t! replicate4S" %} 3232 ins_encode %{ 3233 __ movdl($dst$$XMMRegister, $src$$Register); 3234 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3235 %} 3236 ins_pipe( pipe_slow ); 3237 %} 3238 3239 instruct Repl4S_mem(vecD dst, memory mem) %{ 3240 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3241 match(Set dst (ReplicateS (LoadS mem))); 3242 format %{ "pshuflw $dst,$mem,0x00\t! replicate4S" %} 3243 ins_encode %{ 3244 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3245 %} 3246 ins_pipe( pipe_slow ); 3247 %} 3248 3249 instruct Repl8S(vecX dst, rRegI src) %{ 3250 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vlbw()); 3251 match(Set dst (ReplicateS src)); 3252 format %{ "movd $dst,$src\n\t" 3253 "pshuflw $dst,$dst,0x00\n\t" 3254 "punpcklqdq $dst,$dst\t! replicate8S" %} 3255 ins_encode %{ 3256 __ movdl($dst$$XMMRegister, $src$$Register); 3257 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3258 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3259 %} 3260 ins_pipe( pipe_slow ); 3261 %} 3262 3263 instruct Repl8S_mem(vecX dst, memory mem) %{ 3264 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3265 match(Set dst (ReplicateS (LoadS mem))); 3266 format %{ "pshuflw $dst,$mem,0x00\n\t" 3267 "punpcklqdq $dst,$dst\t! replicate8S" %} 3268 ins_encode %{ 3269 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3270 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3271 %} 3272 ins_pipe( pipe_slow ); 3273 %} 3274 3275 instruct Repl8S_imm(vecX dst, immI con) %{ 3276 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vlbw()); 3277 match(Set dst (ReplicateS con)); 3278 format %{ "movq $dst,[$constantaddress]\n\t" 3279 "punpcklqdq $dst,$dst\t! replicate8S($con)" %} 3280 ins_encode %{ 3281 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3282 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3283 %} 3284 ins_pipe( pipe_slow ); 3285 %} 3286 3287 instruct Repl16S(vecY dst, rRegI src) %{ 3288 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3289 match(Set dst (ReplicateS src)); 3290 format %{ "movd $dst,$src\n\t" 3291 "pshuflw $dst,$dst,0x00\n\t" 3292 "punpcklqdq $dst,$dst\n\t" 3293 "vinserti128h $dst,$dst,$dst\t! replicate16S" %} 3294 ins_encode %{ 3295 __ movdl($dst$$XMMRegister, $src$$Register); 3296 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3297 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3298 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3299 %} 3300 ins_pipe( pipe_slow ); 3301 %} 3302 3303 instruct Repl16S_mem(vecY dst, memory mem) %{ 3304 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3305 match(Set dst (ReplicateS (LoadS mem))); 3306 format %{ "pshuflw $dst,$mem,0x00\n\t" 3307 "punpcklqdq $dst,$dst\n\t" 3308 "vinserti128h $dst,$dst,$dst\t! replicate16S" %} 3309 ins_encode %{ 3310 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3311 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3312 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3313 %} 3314 ins_pipe( pipe_slow ); 3315 %} 3316 3317 instruct Repl16S_imm(vecY dst, immI con) %{ 3318 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3319 match(Set dst (ReplicateS con)); 3320 format %{ "movq $dst,[$constantaddress]\n\t" 3321 "punpcklqdq $dst,$dst\n\t" 3322 "vinserti128h $dst,$dst,$dst\t! replicate16S($con)" %} 3323 ins_encode %{ 3324 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3325 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3326 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3327 %} 3328 ins_pipe( pipe_slow ); 3329 %} 3330 3331 instruct Repl4I(vecX dst, rRegI src) %{ 3332 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3333 match(Set dst (ReplicateI src)); 3334 format %{ "movd $dst,$src\n\t" 3335 "pshufd $dst,$dst,0x00\t! replicate4I" %} 3336 ins_encode %{ 3337 __ movdl($dst$$XMMRegister, $src$$Register); 3338 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3339 %} 3340 ins_pipe( pipe_slow ); 3341 %} 3342 3343 instruct Repl4I_mem(vecX dst, memory mem) %{ 3344 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3345 match(Set dst (ReplicateI (LoadI mem))); 3346 format %{ "pshufd $dst,$mem,0x00\t! replicate4I" %} 3347 ins_encode %{ 3348 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3349 %} 3350 ins_pipe( pipe_slow ); 3351 %} 3352 3353 instruct Repl8I(vecY dst, rRegI src) %{ 3354 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3355 match(Set dst (ReplicateI src)); 3356 format %{ "movd $dst,$src\n\t" 3357 "pshufd $dst,$dst,0x00\n\t" 3358 "vinserti128h $dst,$dst,$dst\t! replicate8I" %} 3359 ins_encode %{ 3360 __ movdl($dst$$XMMRegister, $src$$Register); 3361 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3362 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3363 %} 3364 ins_pipe( pipe_slow ); 3365 %} 3366 3367 instruct Repl8I_mem(vecY dst, memory mem) %{ 3368 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3369 match(Set dst (ReplicateI (LoadI mem))); 3370 format %{ "pshufd $dst,$mem,0x00\n\t" 3371 "vinserti128h $dst,$dst,$dst\t! replicate8I" %} 3372 ins_encode %{ 3373 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3374 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3375 %} 3376 ins_pipe( pipe_slow ); 3377 %} 3378 3379 instruct Repl4I_imm(vecX dst, immI con) %{ 3380 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3381 match(Set dst (ReplicateI con)); 3382 format %{ "movq $dst,[$constantaddress]\t! replicate4I($con)\n\t" 3383 "punpcklqdq $dst,$dst" %} 3384 ins_encode %{ 3385 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3386 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3387 %} 3388 ins_pipe( pipe_slow ); 3389 %} 3390 3391 instruct Repl8I_imm(vecY dst, immI con) %{ 3392 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3393 match(Set dst (ReplicateI con)); 3394 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 3395 "punpcklqdq $dst,$dst\n\t" 3396 "vinserti128h $dst,$dst,$dst" %} 3397 ins_encode %{ 3398 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3399 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3400 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3401 %} 3402 ins_pipe( pipe_slow ); 3403 %} 3404 3405 // Long could be loaded into xmm register directly from memory. 3406 instruct Repl2L_mem(vecX dst, memory mem) %{ 3407 predicate(n->as_Vector()->length() == 2 && !VM_Version::supports_avx512vlbw()); 3408 match(Set dst (ReplicateL (LoadL mem))); 3409 format %{ "movq $dst,$mem\n\t" 3410 "punpcklqdq $dst,$dst\t! replicate2L" %} 3411 ins_encode %{ 3412 __ movq($dst$$XMMRegister, $mem$$Address); 3413 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3414 %} 3415 ins_pipe( pipe_slow ); 3416 %} 3417 3418 // Replicate long (8 byte) scalar to be vector 3419 #ifdef _LP64 3420 instruct Repl4L(vecY dst, rRegL src) %{ 3421 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3422 match(Set dst (ReplicateL src)); 3423 format %{ "movdq $dst,$src\n\t" 3424 "punpcklqdq $dst,$dst\n\t" 3425 "vinserti128h $dst,$dst,$dst\t! replicate4L" %} 3426 ins_encode %{ 3427 __ movdq($dst$$XMMRegister, $src$$Register); 3428 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3429 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3430 %} 3431 ins_pipe( pipe_slow ); 3432 %} 3433 #else // _LP64 3434 instruct Repl4L(vecY dst, eRegL src, regD tmp) %{ 3435 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3436 match(Set dst (ReplicateL src)); 3437 effect(TEMP dst, USE src, TEMP tmp); 3438 format %{ "movdl $dst,$src.lo\n\t" 3439 "movdl $tmp,$src.hi\n\t" 3440 "punpckldq $dst,$tmp\n\t" 3441 "punpcklqdq $dst,$dst\n\t" 3442 "vinserti128h $dst,$dst,$dst\t! replicate4L" %} 3443 ins_encode %{ 3444 __ movdl($dst$$XMMRegister, $src$$Register); 3445 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3446 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3447 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3448 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3449 %} 3450 ins_pipe( pipe_slow ); 3451 %} 3452 #endif // _LP64 3453 3454 instruct Repl4L_imm(vecY dst, immL con) %{ 3455 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3456 match(Set dst (ReplicateL con)); 3457 format %{ "movq $dst,[$constantaddress]\n\t" 3458 "punpcklqdq $dst,$dst\n\t" 3459 "vinserti128h $dst,$dst,$dst\t! replicate4L($con)" %} 3460 ins_encode %{ 3461 __ movq($dst$$XMMRegister, $constantaddress($con)); 3462 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3463 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3464 %} 3465 ins_pipe( pipe_slow ); 3466 %} 3467 3468 instruct Repl4L_mem(vecY dst, memory mem) %{ 3469 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3470 match(Set dst (ReplicateL (LoadL mem))); 3471 format %{ "movq $dst,$mem\n\t" 3472 "punpcklqdq $dst,$dst\n\t" 3473 "vinserti128h $dst,$dst,$dst\t! replicate4L" %} 3474 ins_encode %{ 3475 __ movq($dst$$XMMRegister, $mem$$Address); 3476 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3477 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3478 %} 3479 ins_pipe( pipe_slow ); 3480 %} 3481 3482 instruct Repl2F_mem(vecD dst, memory mem) %{ 3483 predicate(n->as_Vector()->length() == 2 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3484 match(Set dst (ReplicateF (LoadF mem))); 3485 format %{ "pshufd $dst,$mem,0x00\t! replicate2F" %} 3486 ins_encode %{ 3487 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3488 %} 3489 ins_pipe( pipe_slow ); 3490 %} 3491 3492 instruct Repl4F_mem(vecX dst, memory mem) %{ 3493 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3494 match(Set dst (ReplicateF (LoadF mem))); 3495 format %{ "pshufd $dst,$mem,0x00\t! replicate4F" %} 3496 ins_encode %{ 3497 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3498 %} 3499 ins_pipe( pipe_slow ); 3500 %} 3501 3502 instruct Repl8F(vecY dst, regF src) %{ 3503 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3504 match(Set dst (ReplicateF src)); 3505 format %{ "pshufd $dst,$src,0x00\n\t" 3506 "vinsertf128h $dst,$dst,$dst\t! replicate8F" %} 3507 ins_encode %{ 3508 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3509 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3510 %} 3511 ins_pipe( pipe_slow ); 3512 %} 3513 3514 instruct Repl8F_mem(vecY dst, memory mem) %{ 3515 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3516 match(Set dst (ReplicateF (LoadF mem))); 3517 format %{ "pshufd $dst,$mem,0x00\n\t" 3518 "vinsertf128h $dst,$dst,$dst\t! replicate8F" %} 3519 ins_encode %{ 3520 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3521 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3522 %} 3523 ins_pipe( pipe_slow ); 3524 %} 3525 3526 instruct Repl2F_zero(vecD dst, immF0 zero) %{ 3527 predicate(n->as_Vector()->length() == 2 && UseAVX < 3); 3528 match(Set dst (ReplicateF zero)); 3529 format %{ "xorps $dst,$dst\t! replicate2F zero" %} 3530 ins_encode %{ 3531 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 3532 %} 3533 ins_pipe( fpu_reg_reg ); 3534 %} 3535 3536 instruct Repl4F_zero(vecX dst, immF0 zero) %{ 3537 predicate(n->as_Vector()->length() == 4 && UseAVX < 3); 3538 match(Set dst (ReplicateF zero)); 3539 format %{ "xorps $dst,$dst\t! replicate4F zero" %} 3540 ins_encode %{ 3541 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 3542 %} 3543 ins_pipe( fpu_reg_reg ); 3544 %} 3545 3546 instruct Repl8F_zero(vecY dst, immF0 zero) %{ 3547 predicate(n->as_Vector()->length() == 8 && UseAVX < 3); 3548 match(Set dst (ReplicateF zero)); 3549 format %{ "vxorps $dst,$dst,$dst\t! replicate8F zero" %} 3550 ins_encode %{ 3551 int vector_len = 1; 3552 __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3553 %} 3554 ins_pipe( fpu_reg_reg ); 3555 %} 3556 3557 instruct Repl2D_mem(vecX dst, memory mem) %{ 3558 predicate(n->as_Vector()->length() == 2 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3559 match(Set dst (ReplicateD (LoadD mem))); 3560 format %{ "pshufd $dst,$mem,0x44\t! replicate2D" %} 3561 ins_encode %{ 3562 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44); 3563 %} 3564 ins_pipe( pipe_slow ); 3565 %} 3566 3567 instruct Repl4D(vecY dst, regD src) %{ 3568 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3569 match(Set dst (ReplicateD src)); 3570 format %{ "pshufd $dst,$src,0x44\n\t" 3571 "vinsertf128h $dst,$dst,$dst\t! replicate4D" %} 3572 ins_encode %{ 3573 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 3574 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3575 %} 3576 ins_pipe( pipe_slow ); 3577 %} 3578 3579 instruct Repl4D_mem(vecY dst, memory mem) %{ 3580 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3581 match(Set dst (ReplicateD (LoadD mem))); 3582 format %{ "pshufd $dst,$mem,0x44\n\t" 3583 "vinsertf128h $dst,$dst,$dst\t! replicate4D" %} 3584 ins_encode %{ 3585 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44); 3586 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3587 %} 3588 ins_pipe( pipe_slow ); 3589 %} 3590 3591 // Replicate double (8 byte) scalar zero to be vector 3592 instruct Repl2D_zero(vecX dst, immD0 zero) %{ 3593 predicate(n->as_Vector()->length() == 2 && UseAVX < 3); 3594 match(Set dst (ReplicateD zero)); 3595 format %{ "xorpd $dst,$dst\t! replicate2D zero" %} 3596 ins_encode %{ 3597 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister); 3598 %} 3599 ins_pipe( fpu_reg_reg ); 3600 %} 3601 3602 instruct Repl4D_zero(vecY dst, immD0 zero) %{ 3603 predicate(n->as_Vector()->length() == 4 && UseAVX < 3); 3604 match(Set dst (ReplicateD zero)); 3605 format %{ "vxorpd $dst,$dst,$dst,vect256\t! replicate4D zero" %} 3606 ins_encode %{ 3607 int vector_len = 1; 3608 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3609 %} 3610 ins_pipe( fpu_reg_reg ); 3611 %} 3612 3613 // ====================GENERIC REPLICATE========================================== 3614 3615 // Replicate byte scalar to be vector 3616 instruct Repl4B(vecS dst, rRegI src) %{ 3617 predicate(n->as_Vector()->length() == 4); 3618 match(Set dst (ReplicateB src)); 3619 format %{ "movd $dst,$src\n\t" 3620 "punpcklbw $dst,$dst\n\t" 3621 "pshuflw $dst,$dst,0x00\t! replicate4B" %} 3622 ins_encode %{ 3623 __ movdl($dst$$XMMRegister, $src$$Register); 3624 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3625 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3626 %} 3627 ins_pipe( pipe_slow ); 3628 %} 3629 3630 instruct Repl8B(vecD dst, rRegI src) %{ 3631 predicate(n->as_Vector()->length() == 8); 3632 match(Set dst (ReplicateB src)); 3633 format %{ "movd $dst,$src\n\t" 3634 "punpcklbw $dst,$dst\n\t" 3635 "pshuflw $dst,$dst,0x00\t! replicate8B" %} 3636 ins_encode %{ 3637 __ movdl($dst$$XMMRegister, $src$$Register); 3638 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3639 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3640 %} 3641 ins_pipe( pipe_slow ); 3642 %} 3643 3644 // Replicate byte scalar immediate to be vector by loading from const table. 3645 instruct Repl4B_imm(vecS dst, immI con) %{ 3646 predicate(n->as_Vector()->length() == 4); 3647 match(Set dst (ReplicateB con)); 3648 format %{ "movdl $dst,[$constantaddress]\t! replicate4B($con)" %} 3649 ins_encode %{ 3650 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 1))); 3651 %} 3652 ins_pipe( pipe_slow ); 3653 %} 3654 3655 instruct Repl8B_imm(vecD dst, immI con) %{ 3656 predicate(n->as_Vector()->length() == 8); 3657 match(Set dst (ReplicateB con)); 3658 format %{ "movq $dst,[$constantaddress]\t! replicate8B($con)" %} 3659 ins_encode %{ 3660 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3661 %} 3662 ins_pipe( pipe_slow ); 3663 %} 3664 3665 // Replicate byte scalar zero to be vector 3666 instruct Repl4B_zero(vecS dst, immI0 zero) %{ 3667 predicate(n->as_Vector()->length() == 4); 3668 match(Set dst (ReplicateB zero)); 3669 format %{ "pxor $dst,$dst\t! replicate4B zero" %} 3670 ins_encode %{ 3671 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3672 %} 3673 ins_pipe( fpu_reg_reg ); 3674 %} 3675 3676 instruct Repl8B_zero(vecD dst, immI0 zero) %{ 3677 predicate(n->as_Vector()->length() == 8); 3678 match(Set dst (ReplicateB zero)); 3679 format %{ "pxor $dst,$dst\t! replicate8B zero" %} 3680 ins_encode %{ 3681 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3682 %} 3683 ins_pipe( fpu_reg_reg ); 3684 %} 3685 3686 instruct Repl16B_zero(vecX dst, immI0 zero) %{ 3687 predicate(n->as_Vector()->length() == 16); 3688 match(Set dst (ReplicateB zero)); 3689 format %{ "pxor $dst,$dst\t! replicate16B zero" %} 3690 ins_encode %{ 3691 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3692 %} 3693 ins_pipe( fpu_reg_reg ); 3694 %} 3695 3696 instruct Repl32B_zero(vecY dst, immI0 zero) %{ 3697 predicate(n->as_Vector()->length() == 32); 3698 match(Set dst (ReplicateB zero)); 3699 format %{ "vpxor $dst,$dst,$dst\t! replicate32B zero" %} 3700 ins_encode %{ 3701 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3702 int vector_len = 1; 3703 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3704 %} 3705 ins_pipe( fpu_reg_reg ); 3706 %} 3707 3708 // Replicate char/short (2 byte) scalar to be vector 3709 instruct Repl2S(vecS dst, rRegI src) %{ 3710 predicate(n->as_Vector()->length() == 2); 3711 match(Set dst (ReplicateS src)); 3712 format %{ "movd $dst,$src\n\t" 3713 "pshuflw $dst,$dst,0x00\t! replicate2S" %} 3714 ins_encode %{ 3715 __ movdl($dst$$XMMRegister, $src$$Register); 3716 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3717 %} 3718 ins_pipe( fpu_reg_reg ); 3719 %} 3720 3721 // Replicate char/short (2 byte) scalar immediate to be vector by loading from const table. 3722 instruct Repl2S_imm(vecS dst, immI con) %{ 3723 predicate(n->as_Vector()->length() == 2); 3724 match(Set dst (ReplicateS con)); 3725 format %{ "movdl $dst,[$constantaddress]\t! replicate2S($con)" %} 3726 ins_encode %{ 3727 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 2))); 3728 %} 3729 ins_pipe( fpu_reg_reg ); 3730 %} 3731 3732 instruct Repl4S_imm(vecD dst, immI con) %{ 3733 predicate(n->as_Vector()->length() == 4); 3734 match(Set dst (ReplicateS con)); 3735 format %{ "movq $dst,[$constantaddress]\t! replicate4S($con)" %} 3736 ins_encode %{ 3737 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3738 %} 3739 ins_pipe( fpu_reg_reg ); 3740 %} 3741 3742 // Replicate char/short (2 byte) scalar zero to be vector 3743 instruct Repl2S_zero(vecS dst, immI0 zero) %{ 3744 predicate(n->as_Vector()->length() == 2); 3745 match(Set dst (ReplicateS zero)); 3746 format %{ "pxor $dst,$dst\t! replicate2S zero" %} 3747 ins_encode %{ 3748 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3749 %} 3750 ins_pipe( fpu_reg_reg ); 3751 %} 3752 3753 instruct Repl4S_zero(vecD dst, immI0 zero) %{ 3754 predicate(n->as_Vector()->length() == 4); 3755 match(Set dst (ReplicateS zero)); 3756 format %{ "pxor $dst,$dst\t! replicate4S zero" %} 3757 ins_encode %{ 3758 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3759 %} 3760 ins_pipe( fpu_reg_reg ); 3761 %} 3762 3763 instruct Repl8S_zero(vecX dst, immI0 zero) %{ 3764 predicate(n->as_Vector()->length() == 8); 3765 match(Set dst (ReplicateS zero)); 3766 format %{ "pxor $dst,$dst\t! replicate8S zero" %} 3767 ins_encode %{ 3768 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3769 %} 3770 ins_pipe( fpu_reg_reg ); 3771 %} 3772 3773 instruct Repl16S_zero(vecY dst, immI0 zero) %{ 3774 predicate(n->as_Vector()->length() == 16); 3775 match(Set dst (ReplicateS zero)); 3776 format %{ "vpxor $dst,$dst,$dst\t! replicate16S zero" %} 3777 ins_encode %{ 3778 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3779 int vector_len = 1; 3780 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3781 %} 3782 ins_pipe( fpu_reg_reg ); 3783 %} 3784 3785 // Replicate integer (4 byte) scalar to be vector 3786 instruct Repl2I(vecD dst, rRegI src) %{ 3787 predicate(n->as_Vector()->length() == 2); 3788 match(Set dst (ReplicateI src)); 3789 format %{ "movd $dst,$src\n\t" 3790 "pshufd $dst,$dst,0x00\t! replicate2I" %} 3791 ins_encode %{ 3792 __ movdl($dst$$XMMRegister, $src$$Register); 3793 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3794 %} 3795 ins_pipe( fpu_reg_reg ); 3796 %} 3797 3798 // Integer could be loaded into xmm register directly from memory. 3799 instruct Repl2I_mem(vecD dst, memory mem) %{ 3800 predicate(n->as_Vector()->length() == 2); 3801 match(Set dst (ReplicateI (LoadI mem))); 3802 format %{ "movd $dst,$mem\n\t" 3803 "pshufd $dst,$dst,0x00\t! replicate2I" %} 3804 ins_encode %{ 3805 __ movdl($dst$$XMMRegister, $mem$$Address); 3806 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3807 %} 3808 ins_pipe( fpu_reg_reg ); 3809 %} 3810 3811 // Replicate integer (4 byte) scalar immediate to be vector by loading from const table. 3812 instruct Repl2I_imm(vecD dst, immI con) %{ 3813 predicate(n->as_Vector()->length() == 2); 3814 match(Set dst (ReplicateI con)); 3815 format %{ "movq $dst,[$constantaddress]\t! replicate2I($con)" %} 3816 ins_encode %{ 3817 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3818 %} 3819 ins_pipe( fpu_reg_reg ); 3820 %} 3821 3822 // Replicate integer (4 byte) scalar zero to be vector 3823 instruct Repl2I_zero(vecD dst, immI0 zero) %{ 3824 predicate(n->as_Vector()->length() == 2); 3825 match(Set dst (ReplicateI zero)); 3826 format %{ "pxor $dst,$dst\t! replicate2I" %} 3827 ins_encode %{ 3828 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3829 %} 3830 ins_pipe( fpu_reg_reg ); 3831 %} 3832 3833 instruct Repl4I_zero(vecX dst, immI0 zero) %{ 3834 predicate(n->as_Vector()->length() == 4); 3835 match(Set dst (ReplicateI zero)); 3836 format %{ "pxor $dst,$dst\t! replicate4I zero)" %} 3837 ins_encode %{ 3838 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3839 %} 3840 ins_pipe( fpu_reg_reg ); 3841 %} 3842 3843 instruct Repl8I_zero(vecY dst, immI0 zero) %{ 3844 predicate(n->as_Vector()->length() == 8); 3845 match(Set dst (ReplicateI zero)); 3846 format %{ "vpxor $dst,$dst,$dst\t! replicate8I zero" %} 3847 ins_encode %{ 3848 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3849 int vector_len = 1; 3850 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3851 %} 3852 ins_pipe( fpu_reg_reg ); 3853 %} 3854 3855 // Replicate long (8 byte) scalar to be vector 3856 #ifdef _LP64 3857 instruct Repl2L(vecX dst, rRegL src) %{ 3858 predicate(n->as_Vector()->length() == 2); 3859 match(Set dst (ReplicateL src)); 3860 format %{ "movdq $dst,$src\n\t" 3861 "punpcklqdq $dst,$dst\t! replicate2L" %} 3862 ins_encode %{ 3863 __ movdq($dst$$XMMRegister, $src$$Register); 3864 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3865 %} 3866 ins_pipe( pipe_slow ); 3867 %} 3868 #else // _LP64 3869 instruct Repl2L(vecX dst, eRegL src, regD tmp) %{ 3870 predicate(n->as_Vector()->length() == 2); 3871 match(Set dst (ReplicateL src)); 3872 effect(TEMP dst, USE src, TEMP tmp); 3873 format %{ "movdl $dst,$src.lo\n\t" 3874 "movdl $tmp,$src.hi\n\t" 3875 "punpckldq $dst,$tmp\n\t" 3876 "punpcklqdq $dst,$dst\t! replicate2L"%} 3877 ins_encode %{ 3878 __ movdl($dst$$XMMRegister, $src$$Register); 3879 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3880 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3881 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3882 %} 3883 ins_pipe( pipe_slow ); 3884 %} 3885 #endif // _LP64 3886 3887 // Replicate long (8 byte) scalar immediate to be vector by loading from const table. 3888 instruct Repl2L_imm(vecX dst, immL con) %{ 3889 predicate(n->as_Vector()->length() == 2); 3890 match(Set dst (ReplicateL con)); 3891 format %{ "movq $dst,[$constantaddress]\n\t" 3892 "punpcklqdq $dst,$dst\t! replicate2L($con)" %} 3893 ins_encode %{ 3894 __ movq($dst$$XMMRegister, $constantaddress($con)); 3895 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3896 %} 3897 ins_pipe( pipe_slow ); 3898 %} 3899 3900 // Replicate long (8 byte) scalar zero to be vector 3901 instruct Repl2L_zero(vecX dst, immL0 zero) %{ 3902 predicate(n->as_Vector()->length() == 2); 3903 match(Set dst (ReplicateL zero)); 3904 format %{ "pxor $dst,$dst\t! replicate2L zero" %} 3905 ins_encode %{ 3906 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3907 %} 3908 ins_pipe( fpu_reg_reg ); 3909 %} 3910 3911 instruct Repl4L_zero(vecY dst, immL0 zero) %{ 3912 predicate(n->as_Vector()->length() == 4); 3913 match(Set dst (ReplicateL zero)); 3914 format %{ "vpxor $dst,$dst,$dst\t! replicate4L zero" %} 3915 ins_encode %{ 3916 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3917 int vector_len = 1; 3918 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3919 %} 3920 ins_pipe( fpu_reg_reg ); 3921 %} 3922 3923 // Replicate float (4 byte) scalar to be vector 3924 instruct Repl2F(vecD dst, regF src) %{ 3925 predicate(n->as_Vector()->length() == 2); 3926 match(Set dst (ReplicateF src)); 3927 format %{ "pshufd $dst,$dst,0x00\t! replicate2F" %} 3928 ins_encode %{ 3929 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3930 %} 3931 ins_pipe( fpu_reg_reg ); 3932 %} 3933 3934 instruct Repl4F(vecX dst, regF src) %{ 3935 predicate(n->as_Vector()->length() == 4); 3936 match(Set dst (ReplicateF src)); 3937 format %{ "pshufd $dst,$dst,0x00\t! replicate4F" %} 3938 ins_encode %{ 3939 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3940 %} 3941 ins_pipe( pipe_slow ); 3942 %} 3943 3944 // Replicate double (8 bytes) scalar to be vector 3945 instruct Repl2D(vecX dst, regD src) %{ 3946 predicate(n->as_Vector()->length() == 2); 3947 match(Set dst (ReplicateD src)); 3948 format %{ "pshufd $dst,$src,0x44\t! replicate2D" %} 3949 ins_encode %{ 3950 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 3951 %} 3952 ins_pipe( pipe_slow ); 3953 %} 3954 3955 // ====================EVEX REPLICATE============================================= 3956 3957 instruct Repl4B_mem_evex(vecS dst, memory mem) %{ 3958 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vlbw()); 3959 match(Set dst (ReplicateB (LoadB mem))); 3960 format %{ "vpbroadcastb $dst,$mem\t! replicate4B" %} 3961 ins_encode %{ 3962 int vector_len = 0; 3963 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 3964 %} 3965 ins_pipe( pipe_slow ); 3966 %} 3967 3968 instruct Repl8B_mem_evex(vecD dst, memory mem) %{ 3969 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); 3970 match(Set dst (ReplicateB (LoadB mem))); 3971 format %{ "vpbroadcastb $dst,$mem\t! replicate8B" %} 3972 ins_encode %{ 3973 int vector_len = 0; 3974 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 3975 %} 3976 ins_pipe( pipe_slow ); 3977 %} 3978 3979 instruct Repl16B_evex(vecX dst, rRegI src) %{ 3980 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 3981 match(Set dst (ReplicateB src)); 3982 format %{ "vpbroadcastb $dst,$src\t! replicate16B" %} 3983 ins_encode %{ 3984 int vector_len = 0; 3985 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 3986 %} 3987 ins_pipe( pipe_slow ); 3988 %} 3989 3990 instruct Repl16B_mem_evex(vecX dst, memory mem) %{ 3991 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 3992 match(Set dst (ReplicateB (LoadB mem))); 3993 format %{ "vpbroadcastb $dst,$mem\t! replicate16B" %} 3994 ins_encode %{ 3995 int vector_len = 0; 3996 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 3997 %} 3998 ins_pipe( pipe_slow ); 3999 %} 4000 4001 instruct Repl32B_evex(vecY dst, rRegI src) %{ 4002 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw()); 4003 match(Set dst (ReplicateB src)); 4004 format %{ "vpbroadcastb $dst,$src\t! replicate32B" %} 4005 ins_encode %{ 4006 int vector_len = 1; 4007 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 4008 %} 4009 ins_pipe( pipe_slow ); 4010 %} 4011 4012 instruct Repl32B_mem_evex(vecY dst, memory mem) %{ 4013 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw()); 4014 match(Set dst (ReplicateB (LoadB mem))); 4015 format %{ "vpbroadcastb $dst,$mem\t! replicate32B" %} 4016 ins_encode %{ 4017 int vector_len = 1; 4018 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 4019 %} 4020 ins_pipe( pipe_slow ); 4021 %} 4022 4023 instruct Repl64B_evex(vecZ dst, rRegI src) %{ 4024 predicate(n->as_Vector()->length() == 64 && VM_Version::supports_avx512bw()); 4025 match(Set dst (ReplicateB src)); 4026 format %{ "vpbroadcastb $dst,$src\t! upper replicate64B" %} 4027 ins_encode %{ 4028 int vector_len = 2; 4029 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 4030 %} 4031 ins_pipe( pipe_slow ); 4032 %} 4033 4034 instruct Repl64B_mem_evex(vecZ dst, memory mem) %{ 4035 predicate(n->as_Vector()->length() == 64 && VM_Version::supports_avx512bw()); 4036 match(Set dst (ReplicateB (LoadB mem))); 4037 format %{ "vpbroadcastb $dst,$mem\t! replicate64B" %} 4038 ins_encode %{ 4039 int vector_len = 2; 4040 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 4041 %} 4042 ins_pipe( pipe_slow ); 4043 %} 4044 4045 instruct Repl16B_imm_evex(vecX dst, immI con) %{ 4046 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 4047 match(Set dst (ReplicateB con)); 4048 format %{ "movq $dst,[$constantaddress]\n\t" 4049 "vpbroadcastb $dst,$dst\t! replicate16B" %} 4050 ins_encode %{ 4051 int vector_len = 0; 4052 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 4053 __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4054 %} 4055 ins_pipe( pipe_slow ); 4056 %} 4057 4058 instruct Repl32B_imm_evex(vecY dst, immI con) %{ 4059 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw()); 4060 match(Set dst (ReplicateB con)); 4061 format %{ "movq $dst,[$constantaddress]\n\t" 4062 "vpbroadcastb $dst,$dst\t! replicate32B" %} 4063 ins_encode %{ 4064 int vector_len = 1; 4065 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 4066 __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4067 %} 4068 ins_pipe( pipe_slow ); 4069 %} 4070 4071 instruct Repl64B_imm_evex(vecZ dst, immI con) %{ 4072 predicate(n->as_Vector()->length() == 64 && VM_Version::supports_avx512bw()); 4073 match(Set dst (ReplicateB con)); 4074 format %{ "movq $dst,[$constantaddress]\n\t" 4075 "vpbroadcastb $dst,$dst\t! upper replicate64B" %} 4076 ins_encode %{ 4077 int vector_len = 2; 4078 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 4079 __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4080 %} 4081 ins_pipe( pipe_slow ); 4082 %} 4083 4084 instruct Repl64B_zero_evex(vecZ dst, immI0 zero) %{ 4085 predicate(n->as_Vector()->length() == 64 && UseAVX > 2); 4086 match(Set dst (ReplicateB zero)); 4087 format %{ "vpxor $dst k0,$dst,$dst\t! replicate64B zero" %} 4088 ins_encode %{ 4089 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 4090 int vector_len = 2; 4091 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4092 %} 4093 ins_pipe( fpu_reg_reg ); 4094 %} 4095 4096 instruct Repl4S_evex(vecD dst, rRegI src) %{ 4097 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vlbw()); 4098 match(Set dst (ReplicateS src)); 4099 format %{ "vpbroadcastw $dst,$src\t! replicate4S" %} 4100 ins_encode %{ 4101 int vector_len = 0; 4102 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 4103 %} 4104 ins_pipe( pipe_slow ); 4105 %} 4106 4107 instruct Repl4S_mem_evex(vecD dst, memory mem) %{ 4108 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vlbw()); 4109 match(Set dst (ReplicateS (LoadS mem))); 4110 format %{ "vpbroadcastw $dst,$mem\t! replicate4S" %} 4111 ins_encode %{ 4112 int vector_len = 0; 4113 __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 4114 %} 4115 ins_pipe( pipe_slow ); 4116 %} 4117 4118 instruct Repl8S_evex(vecX dst, rRegI src) %{ 4119 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); 4120 match(Set dst (ReplicateS src)); 4121 format %{ "vpbroadcastw $dst,$src\t! replicate8S" %} 4122 ins_encode %{ 4123 int vector_len = 0; 4124 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 4125 %} 4126 ins_pipe( pipe_slow ); 4127 %} 4128 4129 instruct Repl8S_mem_evex(vecX dst, memory mem) %{ 4130 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); 4131 match(Set dst (ReplicateS (LoadS mem))); 4132 format %{ "vpbroadcastw $dst,$mem\t! replicate8S" %} 4133 ins_encode %{ 4134 int vector_len = 0; 4135 __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 4136 %} 4137 ins_pipe( pipe_slow ); 4138 %} 4139 4140 instruct Repl16S_evex(vecY dst, rRegI src) %{ 4141 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 4142 match(Set dst (ReplicateS src)); 4143 format %{ "vpbroadcastw $dst,$src\t! replicate16S" %} 4144 ins_encode %{ 4145 int vector_len = 1; 4146 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 4147 %} 4148 ins_pipe( pipe_slow ); 4149 %} 4150 4151 instruct Repl16S_mem_evex(vecY dst, memory mem) %{ 4152 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 4153 match(Set dst (ReplicateS (LoadS mem))); 4154 format %{ "vpbroadcastw $dst,$mem\t! replicate16S" %} 4155 ins_encode %{ 4156 int vector_len = 1; 4157 __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 4158 %} 4159 ins_pipe( pipe_slow ); 4160 %} 4161 4162 instruct Repl32S_evex(vecZ dst, rRegI src) %{ 4163 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512bw()); 4164 match(Set dst (ReplicateS src)); 4165 format %{ "vpbroadcastw $dst,$src\t! replicate32S" %} 4166 ins_encode %{ 4167 int vector_len = 2; 4168 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 4169 %} 4170 ins_pipe( pipe_slow ); 4171 %} 4172 4173 instruct Repl32S_mem_evex(vecZ dst, memory mem) %{ 4174 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512bw()); 4175 match(Set dst (ReplicateS (LoadS mem))); 4176 format %{ "vpbroadcastw $dst,$mem\t! replicate32S" %} 4177 ins_encode %{ 4178 int vector_len = 2; 4179 __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 4180 %} 4181 ins_pipe( pipe_slow ); 4182 %} 4183 4184 instruct Repl8S_imm_evex(vecX dst, immI con) %{ 4185 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); 4186 match(Set dst (ReplicateS con)); 4187 format %{ "movq $dst,[$constantaddress]\n\t" 4188 "vpbroadcastw $dst,$dst\t! replicate8S" %} 4189 ins_encode %{ 4190 int vector_len = 0; 4191 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 4192 __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4193 %} 4194 ins_pipe( pipe_slow ); 4195 %} 4196 4197 instruct Repl16S_imm_evex(vecY dst, immI con) %{ 4198 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 4199 match(Set dst (ReplicateS con)); 4200 format %{ "movq $dst,[$constantaddress]\n\t" 4201 "vpbroadcastw $dst,$dst\t! replicate16S" %} 4202 ins_encode %{ 4203 int vector_len = 1; 4204 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 4205 __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4206 %} 4207 ins_pipe( pipe_slow ); 4208 %} 4209 4210 instruct Repl32S_imm_evex(vecZ dst, immI con) %{ 4211 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512bw()); 4212 match(Set dst (ReplicateS con)); 4213 format %{ "movq $dst,[$constantaddress]\n\t" 4214 "vpbroadcastw $dst,$dst\t! replicate32S" %} 4215 ins_encode %{ 4216 int vector_len = 2; 4217 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 4218 __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4219 %} 4220 ins_pipe( pipe_slow ); 4221 %} 4222 4223 instruct Repl32S_zero_evex(vecZ dst, immI0 zero) %{ 4224 predicate(n->as_Vector()->length() == 32 && UseAVX > 2); 4225 match(Set dst (ReplicateS zero)); 4226 format %{ "vpxor $dst k0,$dst,$dst\t! replicate32S zero" %} 4227 ins_encode %{ 4228 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 4229 int vector_len = 2; 4230 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4231 %} 4232 ins_pipe( fpu_reg_reg ); 4233 %} 4234 4235 instruct Repl4I_evex(vecX dst, rRegI src) %{ 4236 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4237 match(Set dst (ReplicateI src)); 4238 format %{ "vpbroadcastd $dst,$src\t! replicate4I" %} 4239 ins_encode %{ 4240 int vector_len = 0; 4241 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 4242 %} 4243 ins_pipe( pipe_slow ); 4244 %} 4245 4246 instruct Repl4I_mem_evex(vecX dst, memory mem) %{ 4247 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4248 match(Set dst (ReplicateI (LoadI mem))); 4249 format %{ "vpbroadcastd $dst,$mem\t! replicate4I" %} 4250 ins_encode %{ 4251 int vector_len = 0; 4252 __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 4253 %} 4254 ins_pipe( pipe_slow ); 4255 %} 4256 4257 instruct Repl8I_evex(vecY dst, rRegI src) %{ 4258 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4259 match(Set dst (ReplicateI src)); 4260 format %{ "vpbroadcastd $dst,$src\t! replicate8I" %} 4261 ins_encode %{ 4262 int vector_len = 1; 4263 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 4264 %} 4265 ins_pipe( pipe_slow ); 4266 %} 4267 4268 instruct Repl8I_mem_evex(vecY dst, memory mem) %{ 4269 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4270 match(Set dst (ReplicateI (LoadI mem))); 4271 format %{ "vpbroadcastd $dst,$mem\t! replicate8I" %} 4272 ins_encode %{ 4273 int vector_len = 1; 4274 __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 4275 %} 4276 ins_pipe( pipe_slow ); 4277 %} 4278 4279 instruct Repl16I_evex(vecZ dst, rRegI src) %{ 4280 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4281 match(Set dst (ReplicateI src)); 4282 format %{ "vpbroadcastd $dst,$src\t! replicate16I" %} 4283 ins_encode %{ 4284 int vector_len = 2; 4285 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 4286 %} 4287 ins_pipe( pipe_slow ); 4288 %} 4289 4290 instruct Repl16I_mem_evex(vecZ dst, memory mem) %{ 4291 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4292 match(Set dst (ReplicateI (LoadI mem))); 4293 format %{ "vpbroadcastd $dst,$mem\t! replicate16I" %} 4294 ins_encode %{ 4295 int vector_len = 2; 4296 __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 4297 %} 4298 ins_pipe( pipe_slow ); 4299 %} 4300 4301 instruct Repl4I_imm_evex(vecX dst, immI con) %{ 4302 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4303 match(Set dst (ReplicateI con)); 4304 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 4305 "vpbroadcastd $dst,$dst\t! replicate4I" %} 4306 ins_encode %{ 4307 int vector_len = 0; 4308 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4309 __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4310 %} 4311 ins_pipe( pipe_slow ); 4312 %} 4313 4314 instruct Repl8I_imm_evex(vecY dst, immI con) %{ 4315 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4316 match(Set dst (ReplicateI con)); 4317 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 4318 "vpbroadcastd $dst,$dst\t! replicate8I" %} 4319 ins_encode %{ 4320 int vector_len = 1; 4321 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4322 __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4323 %} 4324 ins_pipe( pipe_slow ); 4325 %} 4326 4327 instruct Repl16I_imm_evex(vecZ dst, immI con) %{ 4328 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4329 match(Set dst (ReplicateI con)); 4330 format %{ "movq $dst,[$constantaddress]\t! replicate16I($con)\n\t" 4331 "vpbroadcastd $dst,$dst\t! replicate16I" %} 4332 ins_encode %{ 4333 int vector_len = 2; 4334 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4335 __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4336 %} 4337 ins_pipe( pipe_slow ); 4338 %} 4339 4340 instruct Repl16I_zero_evex(vecZ dst, immI0 zero) %{ 4341 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4342 match(Set dst (ReplicateI zero)); 4343 format %{ "vpxor $dst k0,$dst,$dst\t! replicate16I zero" %} 4344 ins_encode %{ 4345 // Use vxorpd since AVX does not have vpxor for 512-bit (AVX2 will have it). 4346 int vector_len = 2; 4347 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4348 %} 4349 ins_pipe( fpu_reg_reg ); 4350 %} 4351 4352 // Replicate long (8 byte) scalar to be vector 4353 #ifdef _LP64 4354 instruct Repl4L_evex(vecY dst, rRegL src) %{ 4355 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4356 match(Set dst (ReplicateL src)); 4357 format %{ "vpbroadcastq $dst,$src\t! replicate4L" %} 4358 ins_encode %{ 4359 int vector_len = 1; 4360 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vector_len); 4361 %} 4362 ins_pipe( pipe_slow ); 4363 %} 4364 4365 instruct Repl8L_evex(vecZ dst, rRegL src) %{ 4366 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4367 match(Set dst (ReplicateL src)); 4368 format %{ "vpbroadcastq $dst,$src\t! replicate8L" %} 4369 ins_encode %{ 4370 int vector_len = 2; 4371 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vector_len); 4372 %} 4373 ins_pipe( pipe_slow ); 4374 %} 4375 #else // _LP64 4376 instruct Repl4L_evex(vecY dst, eRegL src, regD tmp) %{ 4377 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4378 match(Set dst (ReplicateL src)); 4379 effect(TEMP dst, USE src, TEMP tmp); 4380 format %{ "movdl $dst,$src.lo\n\t" 4381 "movdl $tmp,$src.hi\n\t" 4382 "punpckldq $dst,$tmp\n\t" 4383 "vpbroadcastq $dst,$dst\t! replicate4L" %} 4384 ins_encode %{ 4385 int vector_len = 1; 4386 __ movdl($dst$$XMMRegister, $src$$Register); 4387 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4388 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4389 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4390 %} 4391 ins_pipe( pipe_slow ); 4392 %} 4393 4394 instruct Repl8L_evex(vecZ dst, eRegL src, regD tmp) %{ 4395 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4396 match(Set dst (ReplicateL src)); 4397 effect(TEMP dst, USE src, TEMP tmp); 4398 format %{ "movdl $dst,$src.lo\n\t" 4399 "movdl $tmp,$src.hi\n\t" 4400 "punpckldq $dst,$tmp\n\t" 4401 "vpbroadcastq $dst,$dst\t! replicate8L" %} 4402 ins_encode %{ 4403 int vector_len = 2; 4404 __ movdl($dst$$XMMRegister, $src$$Register); 4405 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4406 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4407 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4408 %} 4409 ins_pipe( pipe_slow ); 4410 %} 4411 #endif // _LP64 4412 4413 instruct Repl4L_imm_evex(vecY dst, immL con) %{ 4414 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4415 match(Set dst (ReplicateL con)); 4416 format %{ "movq $dst,[$constantaddress]\n\t" 4417 "vpbroadcastq $dst,$dst\t! replicate4L" %} 4418 ins_encode %{ 4419 int vector_len = 1; 4420 __ movq($dst$$XMMRegister, $constantaddress($con)); 4421 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4422 %} 4423 ins_pipe( pipe_slow ); 4424 %} 4425 4426 instruct Repl8L_imm_evex(vecZ dst, immL con) %{ 4427 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4428 match(Set dst (ReplicateL con)); 4429 format %{ "movq $dst,[$constantaddress]\n\t" 4430 "vpbroadcastq $dst,$dst\t! replicate8L" %} 4431 ins_encode %{ 4432 int vector_len = 2; 4433 __ movq($dst$$XMMRegister, $constantaddress($con)); 4434 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4435 %} 4436 ins_pipe( pipe_slow ); 4437 %} 4438 4439 instruct Repl2L_mem_evex(vecX dst, memory mem) %{ 4440 predicate(n->as_Vector()->length() == 2 && VM_Version::supports_avx512vl()); 4441 match(Set dst (ReplicateL (LoadL mem))); 4442 format %{ "vpbroadcastd $dst,$mem\t! replicate2L" %} 4443 ins_encode %{ 4444 int vector_len = 0; 4445 __ evpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4446 %} 4447 ins_pipe( pipe_slow ); 4448 %} 4449 4450 instruct Repl4L_mem_evex(vecY dst, memory mem) %{ 4451 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4452 match(Set dst (ReplicateL (LoadL mem))); 4453 format %{ "vpbroadcastd $dst,$mem\t! replicate4L" %} 4454 ins_encode %{ 4455 int vector_len = 1; 4456 __ evpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4457 %} 4458 ins_pipe( pipe_slow ); 4459 %} 4460 4461 instruct Repl8L_mem_evex(vecZ dst, memory mem) %{ 4462 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4463 match(Set dst (ReplicateL (LoadL mem))); 4464 format %{ "vpbroadcastd $dst,$mem\t! replicate8L" %} 4465 ins_encode %{ 4466 int vector_len = 2; 4467 __ evpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4468 %} 4469 ins_pipe( pipe_slow ); 4470 %} 4471 4472 instruct Repl8L_zero_evex(vecZ dst, immL0 zero) %{ 4473 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4474 match(Set dst (ReplicateL zero)); 4475 format %{ "vpxor $dst k0,$dst,$dst\t! replicate8L zero" %} 4476 ins_encode %{ 4477 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 4478 int vector_len = 2; 4479 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4480 %} 4481 ins_pipe( fpu_reg_reg ); 4482 %} 4483 4484 instruct Repl8F_evex(vecY dst, regF src) %{ 4485 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4486 match(Set dst (ReplicateF src)); 4487 format %{ "vbroadcastss $dst,$src\t! replicate8F" %} 4488 ins_encode %{ 4489 int vector_len = 1; 4490 __ evpbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4491 %} 4492 ins_pipe( pipe_slow ); 4493 %} 4494 4495 instruct Repl8F_mem_evex(vecY dst, memory mem) %{ 4496 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4497 match(Set dst (ReplicateF (LoadF mem))); 4498 format %{ "vbroadcastss $dst,$mem\t! replicate8F" %} 4499 ins_encode %{ 4500 int vector_len = 1; 4501 __ evpbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len); 4502 %} 4503 ins_pipe( pipe_slow ); 4504 %} 4505 4506 instruct Repl16F_evex(vecZ dst, regF src) %{ 4507 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4508 match(Set dst (ReplicateF src)); 4509 format %{ "vbroadcastss $dst,$src\t! replicate16F" %} 4510 ins_encode %{ 4511 int vector_len = 2; 4512 __ evpbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4513 %} 4514 ins_pipe( pipe_slow ); 4515 %} 4516 4517 instruct Repl16F_mem_evex(vecZ dst, memory mem) %{ 4518 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4519 match(Set dst (ReplicateF (LoadF mem))); 4520 format %{ "vbroadcastss $dst,$mem\t! replicate16F" %} 4521 ins_encode %{ 4522 int vector_len = 2; 4523 __ evpbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len); 4524 %} 4525 ins_pipe( pipe_slow ); 4526 %} 4527 4528 instruct Repl2F_zero_evex(vecD dst, immF0 zero) %{ 4529 predicate(n->as_Vector()->length() == 2 && UseAVX > 2); 4530 match(Set dst (ReplicateF zero)); 4531 format %{ "vpxor $dst k0,$dst,$dst\t! replicate2F zero" %} 4532 ins_encode %{ 4533 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4534 int vector_len = 2; 4535 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4536 %} 4537 ins_pipe( fpu_reg_reg ); 4538 %} 4539 4540 instruct Repl4F_zero_evex(vecX dst, immF0 zero) %{ 4541 predicate(n->as_Vector()->length() == 4 && UseAVX > 2); 4542 match(Set dst (ReplicateF zero)); 4543 format %{ "vpxor $dst k0,$dst,$dst\t! replicate4F zero" %} 4544 ins_encode %{ 4545 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4546 int vector_len = 2; 4547 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4548 %} 4549 ins_pipe( fpu_reg_reg ); 4550 %} 4551 4552 instruct Repl8F_zero_evex(vecY dst, immF0 zero) %{ 4553 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4554 match(Set dst (ReplicateF zero)); 4555 format %{ "vpxor $dst k0,$dst,$dst\t! replicate8F zero" %} 4556 ins_encode %{ 4557 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4558 int vector_len = 2; 4559 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4560 %} 4561 ins_pipe( fpu_reg_reg ); 4562 %} 4563 4564 instruct Repl16F_zero_evex(vecZ dst, immF0 zero) %{ 4565 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4566 match(Set dst (ReplicateF zero)); 4567 format %{ "vpxor $dst k0,$dst,$dst\t! replicate16F zero" %} 4568 ins_encode %{ 4569 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4570 int vector_len = 2; 4571 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4572 %} 4573 ins_pipe( fpu_reg_reg ); 4574 %} 4575 4576 instruct Repl4D_evex(vecY dst, regD src) %{ 4577 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4578 match(Set dst (ReplicateD src)); 4579 format %{ "vbroadcastsd $dst,$src\t! replicate4D" %} 4580 ins_encode %{ 4581 int vector_len = 1; 4582 __ evpbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4583 %} 4584 ins_pipe( pipe_slow ); 4585 %} 4586 4587 instruct Repl4D_mem_evex(vecY dst, memory mem) %{ 4588 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4589 match(Set dst (ReplicateD (LoadD mem))); 4590 format %{ "vbroadcastsd $dst,$mem\t! replicate4D" %} 4591 ins_encode %{ 4592 int vector_len = 1; 4593 __ evpbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len); 4594 %} 4595 ins_pipe( pipe_slow ); 4596 %} 4597 4598 instruct Repl8D_evex(vecZ dst, regD src) %{ 4599 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4600 match(Set dst (ReplicateD src)); 4601 format %{ "vbroadcastsd $dst,$src\t! replicate8D" %} 4602 ins_encode %{ 4603 int vector_len = 2; 4604 __ evpbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4605 %} 4606 ins_pipe( pipe_slow ); 4607 %} 4608 4609 instruct Repl8D_mem_evex(vecZ dst, memory mem) %{ 4610 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4611 match(Set dst (ReplicateD (LoadD mem))); 4612 format %{ "vbroadcastsd $dst,$mem\t! replicate8D" %} 4613 ins_encode %{ 4614 int vector_len = 2; 4615 __ evpbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len); 4616 %} 4617 ins_pipe( pipe_slow ); 4618 %} 4619 4620 instruct Repl2D_zero_evex(vecX dst, immD0 zero) %{ 4621 predicate(n->as_Vector()->length() == 2 && UseAVX > 2); 4622 match(Set dst (ReplicateD zero)); 4623 format %{ "vpxor $dst k0,$dst,$dst\t! replicate2D zero" %} 4624 ins_encode %{ 4625 // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation 4626 int vector_len = 2; 4627 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4628 %} 4629 ins_pipe( fpu_reg_reg ); 4630 %} 4631 4632 instruct Repl4D_zero_evex(vecY dst, immD0 zero) %{ 4633 predicate(n->as_Vector()->length() == 4 && UseAVX > 2); 4634 match(Set dst (ReplicateD zero)); 4635 format %{ "vpxor $dst k0,$dst,$dst\t! replicate4D zero" %} 4636 ins_encode %{ 4637 // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation 4638 int vector_len = 2; 4639 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4640 %} 4641 ins_pipe( fpu_reg_reg ); 4642 %} 4643 4644 instruct Repl8D_zero_evex(vecZ dst, immD0 zero) %{ 4645 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4646 match(Set dst (ReplicateD zero)); 4647 format %{ "vpxor $dst k0,$dst,$dst,vect512\t! replicate8D zero" %} 4648 ins_encode %{ 4649 // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation 4650 int vector_len = 2; 4651 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4652 %} 4653 ins_pipe( fpu_reg_reg ); 4654 %} 4655 4656 // ====================REDUCTION ARITHMETIC======================================= 4657 4658 instruct rsadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 4659 predicate(UseSSE > 2 && UseAVX == 0); 4660 match(Set dst (AddReductionVI src1 src2)); 4661 effect(TEMP tmp2, TEMP tmp); 4662 format %{ "movdqu $tmp2,$src2\n\t" 4663 "phaddd $tmp2,$tmp2\n\t" 4664 "movd $tmp,$src1\n\t" 4665 "paddd $tmp,$tmp2\n\t" 4666 "movd $dst,$tmp\t! add reduction2I" %} 4667 ins_encode %{ 4668 __ movdqu($tmp2$$XMMRegister, $src2$$XMMRegister); 4669 __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister); 4670 __ movdl($tmp$$XMMRegister, $src1$$Register); 4671 __ paddd($tmp$$XMMRegister, $tmp2$$XMMRegister); 4672 __ movdl($dst$$Register, $tmp$$XMMRegister); 4673 %} 4674 ins_pipe( pipe_slow ); 4675 %} 4676 4677 instruct rvadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 4678 predicate(VM_Version::supports_avx256only()); 4679 match(Set dst (AddReductionVI src1 src2)); 4680 effect(TEMP tmp, TEMP tmp2); 4681 format %{ "vphaddd $tmp,$src2,$src2\n\t" 4682 "movd $tmp2,$src1\n\t" 4683 "vpaddd $tmp2,$tmp2,$tmp\n\t" 4684 "movd $dst,$tmp2\t! add reduction2I" %} 4685 ins_encode %{ 4686 int vector_len = 0; 4687 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 4688 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4689 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 4690 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4691 %} 4692 ins_pipe( pipe_slow ); 4693 %} 4694 4695 instruct rvadd2I_reduction_reg_evex(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 4696 predicate(UseAVX > 2); 4697 match(Set dst (AddReductionVI src1 src2)); 4698 effect(TEMP tmp, TEMP tmp2); 4699 format %{ "pshufd $tmp2,$src2,0x1\n\t" 4700 "vpaddd $tmp,$src2,$tmp2\n\t" 4701 "movd $tmp2,$src1\n\t" 4702 "vpaddd $tmp2,$tmp,$tmp2\n\t" 4703 "movd $dst,$tmp2\t! add reduction2I" %} 4704 ins_encode %{ 4705 int vector_len = 0; 4706 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 4707 __ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4708 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4709 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4710 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4711 %} 4712 ins_pipe( pipe_slow ); 4713 %} 4714 4715 instruct rsadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 4716 predicate(UseSSE > 2 && UseAVX == 0); 4717 match(Set dst (AddReductionVI src1 src2)); 4718 effect(TEMP tmp2, TEMP tmp); 4719 format %{ "movdqu $tmp2,$src2\n\t" 4720 "phaddd $tmp2,$tmp2\n\t" 4721 "phaddd $tmp2,$tmp2\n\t" 4722 "movd $tmp,$src1\n\t" 4723 "paddd $tmp,$tmp2\n\t" 4724 "movd $dst,$tmp\t! add reduction4I" %} 4725 ins_encode %{ 4726 __ movdqu($tmp2$$XMMRegister, $src2$$XMMRegister); 4727 __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister); 4728 __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister); 4729 __ movdl($tmp$$XMMRegister, $src1$$Register); 4730 __ paddd($tmp$$XMMRegister, $tmp2$$XMMRegister); 4731 __ movdl($dst$$Register, $tmp$$XMMRegister); 4732 %} 4733 ins_pipe( pipe_slow ); 4734 %} 4735 4736 instruct rvadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 4737 predicate(VM_Version::supports_avx256only()); 4738 match(Set dst (AddReductionVI src1 src2)); 4739 effect(TEMP tmp, TEMP tmp2); 4740 format %{ "vphaddd $tmp,$src2,$src2\n\t" 4741 "vphaddd $tmp,$tmp,$tmp2\n\t" 4742 "movd $tmp2,$src1\n\t" 4743 "vpaddd $tmp2,$tmp2,$tmp\n\t" 4744 "movd $dst,$tmp2\t! add reduction4I" %} 4745 ins_encode %{ 4746 int vector_len = 0; 4747 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 4748 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4749 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4750 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 4751 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4752 %} 4753 ins_pipe( pipe_slow ); 4754 %} 4755 4756 instruct rvadd4I_reduction_reg_evex(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 4757 predicate(UseAVX > 2); 4758 match(Set dst (AddReductionVI src1 src2)); 4759 effect(TEMP tmp, TEMP tmp2); 4760 format %{ "pshufd $tmp2,$src2,0xE\n\t" 4761 "vpaddd $tmp,$src2,$tmp2\n\t" 4762 "pshufd $tmp2,$tmp,0x1\n\t" 4763 "vpaddd $tmp,$tmp,$tmp2\n\t" 4764 "movd $tmp2,$src1\n\t" 4765 "vpaddd $tmp2,$tmp,$tmp2\n\t" 4766 "movd $dst,$tmp2\t! add reduction4I" %} 4767 ins_encode %{ 4768 int vector_len = 0; 4769 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 4770 __ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4771 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 4772 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4773 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4774 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4775 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4776 %} 4777 ins_pipe( pipe_slow ); 4778 %} 4779 4780 instruct rvadd8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 4781 predicate(VM_Version::supports_avx256only()); 4782 match(Set dst (AddReductionVI src1 src2)); 4783 effect(TEMP tmp, TEMP tmp2); 4784 format %{ "vphaddd $tmp,$src2,$src2\n\t" 4785 "vphaddd $tmp,$tmp,$tmp2\n\t" 4786 "vextracti128 $tmp2,$tmp\n\t" 4787 "vpaddd $tmp,$tmp,$tmp2\n\t" 4788 "movd $tmp2,$src1\n\t" 4789 "vpaddd $tmp2,$tmp2,$tmp\n\t" 4790 "movd $dst,$tmp2\t! add reduction8I" %} 4791 ins_encode %{ 4792 int vector_len = 1; 4793 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 4794 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4795 __ vextracti128h($tmp2$$XMMRegister, $tmp$$XMMRegister); 4796 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4797 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4798 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4799 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4800 %} 4801 ins_pipe( pipe_slow ); 4802 %} 4803 4804 instruct rvadd8I_reduction_reg_evex(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 4805 predicate(UseAVX > 2); 4806 match(Set dst (AddReductionVI src1 src2)); 4807 effect(TEMP tmp, TEMP tmp2); 4808 format %{ "vextracti128 $tmp,$src2\n\t" 4809 "vpaddd $tmp,$tmp,$src2\n\t" 4810 "pshufd $tmp2,$tmp,0xE\n\t" 4811 "vpaddd $tmp,$tmp,$tmp2\n\t" 4812 "pshufd $tmp2,$tmp,0x1\n\t" 4813 "vpaddd $tmp,$tmp,$tmp2\n\t" 4814 "movd $tmp2,$src1\n\t" 4815 "vpaddd $tmp2,$tmp,$tmp2\n\t" 4816 "movd $dst,$tmp2\t! add reduction8I" %} 4817 ins_encode %{ 4818 int vector_len = 0; 4819 __ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister); 4820 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 4821 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 4822 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4823 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 4824 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4825 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4826 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4827 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4828 %} 4829 ins_pipe( pipe_slow ); 4830 %} 4831 4832 instruct rvadd16I_reduction_reg_evex(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ 4833 predicate(UseAVX > 2); 4834 match(Set dst (AddReductionVI src1 src2)); 4835 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 4836 format %{ "vextracti64x4 $tmp3,$src2,0x1\n\t" 4837 "vpaddd $tmp3,$tmp3,$src2\n\t" 4838 "vextracti128 $tmp,$tmp3\n\t" 4839 "vpaddd $tmp,$tmp,$tmp3\n\t" 4840 "pshufd $tmp2,$tmp,0xE\n\t" 4841 "vpaddd $tmp,$tmp,$tmp2\n\t" 4842 "pshufd $tmp2,$tmp,0x1\n\t" 4843 "vpaddd $tmp,$tmp,$tmp2\n\t" 4844 "movd $tmp2,$src1\n\t" 4845 "vpaddd $tmp2,$tmp,$tmp2\n\t" 4846 "movd $dst,$tmp2\t! mul reduction16I" %} 4847 ins_encode %{ 4848 __ vextracti64x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 1); 4849 __ vpaddd($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); 4850 __ vextracti128h($tmp$$XMMRegister, $tmp3$$XMMRegister); 4851 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); 4852 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 4853 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4854 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 4855 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4856 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4857 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4858 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4859 %} 4860 ins_pipe( pipe_slow ); 4861 %} 4862 4863 #ifdef _LP64 4864 instruct rvadd2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, regF tmp, regF tmp2) %{ 4865 predicate(UseAVX > 2); 4866 match(Set dst (AddReductionVL src1 src2)); 4867 effect(TEMP tmp, TEMP tmp2); 4868 format %{ "pshufd $tmp2,$src2,0xE\n\t" 4869 "vpaddq $tmp,$src2,$tmp2\n\t" 4870 "movdq $tmp2,$src1\n\t" 4871 "vpaddq $tmp2,$tmp,$tmp2\n\t" 4872 "movdq $dst,$tmp2\t! add reduction2L" %} 4873 ins_encode %{ 4874 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 4875 __ vpaddq($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, 0); 4876 __ movdq($tmp2$$XMMRegister, $src1$$Register); 4877 __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4878 __ movdq($dst$$Register, $tmp2$$XMMRegister); 4879 %} 4880 ins_pipe( pipe_slow ); 4881 %} 4882 4883 instruct rvadd4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{ 4884 predicate(UseAVX > 2); 4885 match(Set dst (AddReductionVL src1 src2)); 4886 effect(TEMP tmp, TEMP tmp2); 4887 format %{ "vextracti128 $tmp,$src2\n\t" 4888 "vpaddq $tmp2,$tmp,$src2\n\t" 4889 "pshufd $tmp,$tmp2,0xE\n\t" 4890 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4891 "movdq $tmp,$src1\n\t" 4892 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4893 "movdq $dst,$tmp2\t! add reduction4L" %} 4894 ins_encode %{ 4895 __ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister); 4896 __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); 4897 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 4898 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4899 __ movdq($tmp$$XMMRegister, $src1$$Register); 4900 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4901 __ movdq($dst$$Register, $tmp2$$XMMRegister); 4902 %} 4903 ins_pipe( pipe_slow ); 4904 %} 4905 4906 instruct rvadd8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{ 4907 predicate(UseAVX > 2); 4908 match(Set dst (AddReductionVL src1 src2)); 4909 effect(TEMP tmp, TEMP tmp2); 4910 format %{ "vextracti64x4 $tmp2,$src2,0x1\n\t" 4911 "vpaddq $tmp2,$tmp2,$src2\n\t" 4912 "vextracti128 $tmp,$tmp2\n\t" 4913 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4914 "pshufd $tmp,$tmp2,0xE\n\t" 4915 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4916 "movdq $tmp,$src1\n\t" 4917 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4918 "movdq $dst,$tmp2\t! add reduction8L" %} 4919 ins_encode %{ 4920 __ vextracti64x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 1); 4921 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 4922 __ vextracti128h($tmp$$XMMRegister, $tmp2$$XMMRegister); 4923 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4924 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 4925 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4926 __ movdq($tmp$$XMMRegister, $src1$$Register); 4927 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4928 __ movdq($dst$$Register, $tmp2$$XMMRegister); 4929 %} 4930 ins_pipe( pipe_slow ); 4931 %} 4932 #endif 4933 4934 instruct rsadd2F_reduction_reg(regF dst, vecD src2, regF tmp) %{ 4935 predicate(UseSSE >= 1 && UseAVX == 0); 4936 match(Set dst (AddReductionVF dst src2)); 4937 effect(TEMP dst, TEMP tmp); 4938 format %{ "addss $dst,$src2\n\t" 4939 "pshufd $tmp,$src2,0x01\n\t" 4940 "addss $dst,$tmp\t! add reduction2F" %} 4941 ins_encode %{ 4942 __ addss($dst$$XMMRegister, $src2$$XMMRegister); 4943 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 4944 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 4945 %} 4946 ins_pipe( pipe_slow ); 4947 %} 4948 4949 instruct rvadd2F_reduction_reg(regF dst, vecD src2, regF tmp) %{ 4950 predicate(UseAVX > 0); 4951 match(Set dst (AddReductionVF dst src2)); 4952 effect(TEMP dst, TEMP tmp); 4953 format %{ "vaddss $dst,$dst,$src2\n\t" 4954 "pshufd $tmp,$src2,0x01\n\t" 4955 "vaddss $dst,$dst,$tmp\t! add reduction2F" %} 4956 ins_encode %{ 4957 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 4958 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 4959 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4960 %} 4961 ins_pipe( pipe_slow ); 4962 %} 4963 4964 instruct rsadd4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ 4965 predicate(UseSSE >= 1 && UseAVX == 0); 4966 match(Set dst (AddReductionVF dst src2)); 4967 effect(TEMP dst, TEMP tmp); 4968 format %{ "addss $dst,$src2\n\t" 4969 "pshufd $tmp,$src2,0x01\n\t" 4970 "addss $dst,$tmp\n\t" 4971 "pshufd $tmp,$src2,0x02\n\t" 4972 "addss $dst,$tmp\n\t" 4973 "pshufd $tmp,$src2,0x03\n\t" 4974 "addss $dst,$tmp\t! add reduction4F" %} 4975 ins_encode %{ 4976 __ addss($dst$$XMMRegister, $src2$$XMMRegister); 4977 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 4978 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 4979 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 4980 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 4981 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 4982 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 4983 %} 4984 ins_pipe( pipe_slow ); 4985 %} 4986 4987 instruct rvadd4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ 4988 predicate(UseAVX > 0); 4989 match(Set dst (AddReductionVF dst src2)); 4990 effect(TEMP tmp, TEMP dst); 4991 format %{ "vaddss $dst,dst,$src2\n\t" 4992 "pshufd $tmp,$src2,0x01\n\t" 4993 "vaddss $dst,$dst,$tmp\n\t" 4994 "pshufd $tmp,$src2,0x02\n\t" 4995 "vaddss $dst,$dst,$tmp\n\t" 4996 "pshufd $tmp,$src2,0x03\n\t" 4997 "vaddss $dst,$dst,$tmp\t! add reduction4F" %} 4998 ins_encode %{ 4999 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5000 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5001 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5002 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5003 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5004 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5005 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5006 %} 5007 ins_pipe( pipe_slow ); 5008 %} 5009 5010 instruct radd8F_reduction_reg(regF dst, vecY src2, regF tmp, regF tmp2) %{ 5011 predicate(UseAVX > 0); 5012 match(Set dst (AddReductionVF dst src2)); 5013 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5014 format %{ "vaddss $dst,$dst,$src2\n\t" 5015 "pshufd $tmp,$src2,0x01\n\t" 5016 "vaddss $dst,$dst,$tmp\n\t" 5017 "pshufd $tmp,$src2,0x02\n\t" 5018 "vaddss $dst,$dst,$tmp\n\t" 5019 "pshufd $tmp,$src2,0x03\n\t" 5020 "vaddss $dst,$dst,$tmp\n\t" 5021 "vextractf128 $tmp2,$src2\n\t" 5022 "vaddss $dst,$dst,$tmp2\n\t" 5023 "pshufd $tmp,$tmp2,0x01\n\t" 5024 "vaddss $dst,$dst,$tmp\n\t" 5025 "pshufd $tmp,$tmp2,0x02\n\t" 5026 "vaddss $dst,$dst,$tmp\n\t" 5027 "pshufd $tmp,$tmp2,0x03\n\t" 5028 "vaddss $dst,$dst,$tmp\t! add reduction8F" %} 5029 ins_encode %{ 5030 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5031 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5032 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5033 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5034 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5035 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5036 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5037 __ vextractf128h($tmp2$$XMMRegister, $src2$$XMMRegister); 5038 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5039 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5040 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5041 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5042 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5043 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5044 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5045 %} 5046 ins_pipe( pipe_slow ); 5047 %} 5048 5049 instruct radd16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{ 5050 predicate(UseAVX > 2); 5051 match(Set dst (AddReductionVF dst src2)); 5052 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5053 format %{ "vaddss $dst,$dst,$src2\n\t" 5054 "pshufd $tmp,$src2,0x01\n\t" 5055 "vaddss $dst,$dst,$tmp\n\t" 5056 "pshufd $tmp,$src2,0x02\n\t" 5057 "vaddss $dst,$dst,$tmp\n\t" 5058 "pshufd $tmp,$src2,0x03\n\t" 5059 "vaddss $dst,$dst,$tmp\n\t" 5060 "vextractf32x4 $tmp2,$src2, 0x1\n\t" 5061 "vaddss $dst,$dst,$tmp2\n\t" 5062 "pshufd $tmp,$tmp2,0x01\n\t" 5063 "vaddss $dst,$dst,$tmp\n\t" 5064 "pshufd $tmp,$tmp2,0x02\n\t" 5065 "vaddss $dst,$dst,$tmp\n\t" 5066 "pshufd $tmp,$tmp2,0x03\n\t" 5067 "vaddss $dst,$dst,$tmp\n\t" 5068 "vextractf32x4 $tmp2,$src2, 0x2\n\t" 5069 "vaddss $dst,$dst,$tmp2\n\t" 5070 "pshufd $tmp,$tmp2,0x01\n\t" 5071 "vaddss $dst,$dst,$tmp\n\t" 5072 "pshufd $tmp,$tmp2,0x02\n\t" 5073 "vaddss $dst,$dst,$tmp\n\t" 5074 "pshufd $tmp,$tmp2,0x03\n\t" 5075 "vaddss $dst,$dst,$tmp\n\t" 5076 "vextractf32x4 $tmp2,$src2, 0x3\n\t" 5077 "vaddss $dst,$dst,$tmp2\n\t" 5078 "pshufd $tmp,$tmp2,0x01\n\t" 5079 "vaddss $dst,$dst,$tmp\n\t" 5080 "pshufd $tmp,$tmp2,0x02\n\t" 5081 "vaddss $dst,$dst,$tmp\n\t" 5082 "pshufd $tmp,$tmp2,0x03\n\t" 5083 "vaddss $dst,$dst,$tmp\t! add reduction16F" %} 5084 ins_encode %{ 5085 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5086 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5087 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5088 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5089 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5090 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5091 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5092 __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5093 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5094 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5095 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5096 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5097 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5098 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5099 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5100 __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 5101 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5102 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5103 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5104 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5105 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5106 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5107 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5108 __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 5109 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5110 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5111 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5112 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5113 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5114 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5115 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5116 %} 5117 ins_pipe( pipe_slow ); 5118 %} 5119 5120 instruct rsadd2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ 5121 predicate(UseSSE >= 1 && UseAVX == 0); 5122 match(Set dst (AddReductionVD dst src2)); 5123 effect(TEMP tmp, TEMP dst); 5124 format %{ "addsd $dst,$src2\n\t" 5125 "pshufd $tmp,$src2,0xE\n\t" 5126 "addsd $dst,$tmp\t! add reduction2D" %} 5127 ins_encode %{ 5128 __ addsd($dst$$XMMRegister, $src2$$XMMRegister); 5129 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5130 __ addsd($dst$$XMMRegister, $tmp$$XMMRegister); 5131 %} 5132 ins_pipe( pipe_slow ); 5133 %} 5134 5135 instruct rvadd2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ 5136 predicate(UseAVX > 0); 5137 match(Set dst (AddReductionVD dst src2)); 5138 effect(TEMP tmp, TEMP dst); 5139 format %{ "vaddsd $dst,$dst,$src2\n\t" 5140 "pshufd $tmp,$src2,0xE\n\t" 5141 "vaddsd $dst,$dst,$tmp\t! add reduction2D" %} 5142 ins_encode %{ 5143 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5144 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5145 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5146 %} 5147 ins_pipe( pipe_slow ); 5148 %} 5149 5150 instruct rvadd4D_reduction_reg(regD dst, vecY src2, regD tmp, regD tmp2) %{ 5151 predicate(UseAVX > 0); 5152 match(Set dst (AddReductionVD dst src2)); 5153 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5154 format %{ "vaddsd $dst,$dst,$src2\n\t" 5155 "pshufd $tmp,$src2,0xE\n\t" 5156 "vaddsd $dst,$dst,$tmp\n\t" 5157 "vextractf32x4h $tmp2,$src2, 0x1\n\t" 5158 "vaddsd $dst,$dst,$tmp2\n\t" 5159 "pshufd $tmp,$tmp2,0xE\n\t" 5160 "vaddsd $dst,$dst,$tmp\t! add reduction4D" %} 5161 ins_encode %{ 5162 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5163 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5164 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5165 __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5166 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5167 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5168 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5169 %} 5170 ins_pipe( pipe_slow ); 5171 %} 5172 5173 instruct rvadd8D_reduction_reg(regD dst, vecZ src2, regD tmp, regD tmp2) %{ 5174 predicate(UseAVX > 2); 5175 match(Set dst (AddReductionVD dst src2)); 5176 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5177 format %{ "vaddsd $dst,$dst,$src2\n\t" 5178 "pshufd $tmp,$src2,0xE\n\t" 5179 "vaddsd $dst,$dst,$tmp\n\t" 5180 "vextractf32x4 $tmp2,$src2, 0x1\n\t" 5181 "vaddsd $dst,$dst,$tmp2\n\t" 5182 "pshufd $tmp,$tmp2,0xE\n\t" 5183 "vaddsd $dst,$dst,$tmp\n\t" 5184 "vextractf32x4 $tmp2,$src2, 0x2\n\t" 5185 "vaddsd $dst,$dst,$tmp2\n\t" 5186 "pshufd $tmp,$tmp2,0xE\n\t" 5187 "vaddsd $dst,$dst,$tmp\n\t" 5188 "vextractf32x4 $tmp2,$src2, 0x3\n\t" 5189 "vaddsd $dst,$dst,$tmp2\n\t" 5190 "pshufd $tmp,$tmp2,0xE\n\t" 5191 "vaddsd $dst,$dst,$tmp\t! add reduction8D" %} 5192 ins_encode %{ 5193 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5194 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5195 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5196 __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5197 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5198 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5199 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5200 __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 5201 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5202 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5203 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5204 __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 5205 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5206 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5207 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5208 %} 5209 ins_pipe( pipe_slow ); 5210 %} 5211 5212 instruct rsmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 5213 predicate(UseSSE > 3 && UseAVX == 0); 5214 match(Set dst (MulReductionVI src1 src2)); 5215 effect(TEMP tmp, TEMP tmp2); 5216 format %{ "pshufd $tmp2,$src2,0x1\n\t" 5217 "pmulld $tmp2,$src2\n\t" 5218 "movd $tmp,$src1\n\t" 5219 "pmulld $tmp2,$tmp\n\t" 5220 "movd $dst,$tmp2\t! mul reduction2I" %} 5221 ins_encode %{ 5222 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5223 __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister); 5224 __ movdl($tmp$$XMMRegister, $src1$$Register); 5225 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 5226 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5227 %} 5228 ins_pipe( pipe_slow ); 5229 %} 5230 5231 instruct rvmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 5232 predicate(UseAVX > 0); 5233 match(Set dst (MulReductionVI src1 src2)); 5234 effect(TEMP tmp, TEMP tmp2); 5235 format %{ "pshufd $tmp2,$src2,0x1\n\t" 5236 "vpmulld $tmp,$src2,$tmp2\n\t" 5237 "movd $tmp2,$src1\n\t" 5238 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5239 "movd $dst,$tmp2\t! mul reduction2I" %} 5240 ins_encode %{ 5241 int vector_len = 0; 5242 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5243 __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5244 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5245 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5246 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5247 %} 5248 ins_pipe( pipe_slow ); 5249 %} 5250 5251 instruct rsmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 5252 predicate(UseSSE > 3 && UseAVX == 0); 5253 match(Set dst (MulReductionVI src1 src2)); 5254 effect(TEMP tmp, TEMP tmp2); 5255 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5256 "pmulld $tmp2,$src2\n\t" 5257 "pshufd $tmp,$tmp2,0x1\n\t" 5258 "pmulld $tmp2,$tmp\n\t" 5259 "movd $tmp,$src1\n\t" 5260 "pmulld $tmp2,$tmp\n\t" 5261 "movd $dst,$tmp2\t! mul reduction4I" %} 5262 ins_encode %{ 5263 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5264 __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister); 5265 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x1); 5266 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 5267 __ movdl($tmp$$XMMRegister, $src1$$Register); 5268 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 5269 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5270 %} 5271 ins_pipe( pipe_slow ); 5272 %} 5273 5274 instruct rvmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 5275 predicate(UseAVX > 0); 5276 match(Set dst (MulReductionVI src1 src2)); 5277 effect(TEMP tmp, TEMP tmp2); 5278 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5279 "vpmulld $tmp,$src2,$tmp2\n\t" 5280 "pshufd $tmp2,$tmp,0x1\n\t" 5281 "vpmulld $tmp,$tmp,$tmp2\n\t" 5282 "movd $tmp2,$src1\n\t" 5283 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5284 "movd $dst,$tmp2\t! mul reduction4I" %} 5285 ins_encode %{ 5286 int vector_len = 0; 5287 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5288 __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5289 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5290 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5291 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5292 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5293 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5294 %} 5295 ins_pipe( pipe_slow ); 5296 %} 5297 5298 instruct rvmul8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 5299 predicate(UseAVX > 0); 5300 match(Set dst (MulReductionVI src1 src2)); 5301 effect(TEMP tmp, TEMP tmp2); 5302 format %{ "vextracti128 $tmp,$src2\n\t" 5303 "vpmulld $tmp,$tmp,$src2\n\t" 5304 "pshufd $tmp2,$tmp,0xE\n\t" 5305 "vpmulld $tmp,$tmp,$tmp2\n\t" 5306 "pshufd $tmp2,$tmp,0x1\n\t" 5307 "vpmulld $tmp,$tmp,$tmp2\n\t" 5308 "movd $tmp2,$src1\n\t" 5309 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5310 "movd $dst,$tmp2\t! mul reduction8I" %} 5311 ins_encode %{ 5312 int vector_len = 0; 5313 __ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister); 5314 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 5315 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5316 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5317 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5318 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5319 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5320 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5321 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5322 %} 5323 ins_pipe( pipe_slow ); 5324 %} 5325 5326 instruct rvmul16I_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ 5327 predicate(UseAVX > 2); 5328 match(Set dst (MulReductionVI src1 src2)); 5329 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 5330 format %{ "vextracti64x4 $tmp3,$src2,0x1\n\t" 5331 "vpmulld $tmp3,$tmp3,$src2\n\t" 5332 "vextracti128 $tmp,$tmp3\n\t" 5333 "vpmulld $tmp,$tmp,$src2\n\t" 5334 "pshufd $tmp2,$tmp,0xE\n\t" 5335 "vpmulld $tmp,$tmp,$tmp2\n\t" 5336 "pshufd $tmp2,$tmp,0x1\n\t" 5337 "vpmulld $tmp,$tmp,$tmp2\n\t" 5338 "movd $tmp2,$src1\n\t" 5339 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5340 "movd $dst,$tmp2\t! mul reduction16I" %} 5341 ins_encode %{ 5342 __ vextracti64x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 1); 5343 __ vpmulld($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); 5344 __ vextracti128h($tmp$$XMMRegister, $tmp3$$XMMRegister); 5345 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); 5346 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5347 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5348 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5349 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5350 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5351 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5352 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5353 %} 5354 ins_pipe( pipe_slow ); 5355 %} 5356 5357 #ifdef _LP64 5358 instruct rvmul2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, regF tmp, regF tmp2) %{ 5359 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 5360 match(Set dst (MulReductionVL src1 src2)); 5361 effect(TEMP tmp, TEMP tmp2); 5362 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5363 "vpmullq $tmp,$src2,$tmp2\n\t" 5364 "movdq $tmp2,$src1\n\t" 5365 "vpmullq $tmp2,$tmp,$tmp2\n\t" 5366 "movdq $dst,$tmp2\t! mul reduction2L" %} 5367 ins_encode %{ 5368 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5369 __ vpmullq($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, 0); 5370 __ movdq($tmp2$$XMMRegister, $src1$$Register); 5371 __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5372 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5373 %} 5374 ins_pipe( pipe_slow ); 5375 %} 5376 5377 instruct rvmul4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{ 5378 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 5379 match(Set dst (MulReductionVL src1 src2)); 5380 effect(TEMP tmp, TEMP tmp2); 5381 format %{ "vextracti128 $tmp,$src2\n\t" 5382 "vpmullq $tmp2,$tmp,$src2\n\t" 5383 "pshufd $tmp,$tmp2,0xE\n\t" 5384 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5385 "movdq $tmp,$src1\n\t" 5386 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5387 "movdq $dst,$tmp2\t! mul reduction4L" %} 5388 ins_encode %{ 5389 __ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister); 5390 __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); 5391 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5392 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5393 __ movdq($tmp$$XMMRegister, $src1$$Register); 5394 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5395 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5396 %} 5397 ins_pipe( pipe_slow ); 5398 %} 5399 5400 instruct rvmul8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{ 5401 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 5402 match(Set dst (MulReductionVL src1 src2)); 5403 effect(TEMP tmp, TEMP tmp2); 5404 format %{ "vextracti64x4 $tmp2,$src2,0x1\n\t" 5405 "vpmullq $tmp2,$tmp2,$src2\n\t" 5406 "vextracti128 $tmp,$tmp2\n\t" 5407 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5408 "pshufd $tmp,$tmp2,0xE\n\t" 5409 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5410 "movdq $tmp,$src1\n\t" 5411 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5412 "movdq $dst,$tmp2\t! mul reduction8L" %} 5413 ins_encode %{ 5414 __ vextracti64x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 1); 5415 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 5416 __ vextracti128h($tmp$$XMMRegister, $tmp2$$XMMRegister); 5417 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5418 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5419 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5420 __ movdq($tmp$$XMMRegister, $src1$$Register); 5421 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5422 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5423 %} 5424 ins_pipe( pipe_slow ); 5425 %} 5426 #endif 5427 5428 instruct rsmul2F_reduction(regF dst, vecD src2, regF tmp) %{ 5429 predicate(UseSSE >= 1 && UseAVX == 0); 5430 match(Set dst (MulReductionVF dst src2)); 5431 effect(TEMP dst, TEMP tmp); 5432 format %{ "mulss $dst,$src2\n\t" 5433 "pshufd $tmp,$src2,0x01\n\t" 5434 "mulss $dst,$tmp\t! mul reduction2F" %} 5435 ins_encode %{ 5436 __ mulss($dst$$XMMRegister, $src2$$XMMRegister); 5437 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5438 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5439 %} 5440 ins_pipe( pipe_slow ); 5441 %} 5442 5443 instruct rvmul2F_reduction_reg(regF dst, vecD src2, regF tmp) %{ 5444 predicate(UseAVX > 0); 5445 match(Set dst (MulReductionVF dst src2)); 5446 effect(TEMP tmp, TEMP dst); 5447 format %{ "vmulss $dst,$dst,$src2\n\t" 5448 "pshufd $tmp,$src2,0x01\n\t" 5449 "vmulss $dst,$dst,$tmp\t! mul reduction2F" %} 5450 ins_encode %{ 5451 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5452 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5453 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5454 %} 5455 ins_pipe( pipe_slow ); 5456 %} 5457 5458 instruct rsmul4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ 5459 predicate(UseSSE >= 1 && UseAVX == 0); 5460 match(Set dst (MulReductionVF dst src2)); 5461 effect(TEMP dst, TEMP tmp); 5462 format %{ "mulss $dst,$src2\n\t" 5463 "pshufd $tmp,$src2,0x01\n\t" 5464 "mulss $dst,$tmp\n\t" 5465 "pshufd $tmp,$src2,0x02\n\t" 5466 "mulss $dst,$tmp\n\t" 5467 "pshufd $tmp,$src2,0x03\n\t" 5468 "mulss $dst,$tmp\t! mul reduction4F" %} 5469 ins_encode %{ 5470 __ mulss($dst$$XMMRegister, $src2$$XMMRegister); 5471 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5472 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5473 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5474 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5475 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5476 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5477 %} 5478 ins_pipe( pipe_slow ); 5479 %} 5480 5481 instruct rvmul4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ 5482 predicate(UseAVX > 0); 5483 match(Set dst (MulReductionVF dst src2)); 5484 effect(TEMP tmp, TEMP dst); 5485 format %{ "vmulss $dst,$dst,$src2\n\t" 5486 "pshufd $tmp,$src2,0x01\n\t" 5487 "vmulss $dst,$dst,$tmp\n\t" 5488 "pshufd $tmp,$src2,0x02\n\t" 5489 "vmulss $dst,$dst,$tmp\n\t" 5490 "pshufd $tmp,$src2,0x03\n\t" 5491 "vmulss $dst,$dst,$tmp\t! mul reduction4F" %} 5492 ins_encode %{ 5493 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5494 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5495 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5496 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5497 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5498 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5499 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5500 %} 5501 ins_pipe( pipe_slow ); 5502 %} 5503 5504 instruct rvmul8F_reduction_reg(regF dst, vecY src2, regF tmp, regF tmp2) %{ 5505 predicate(UseAVX > 0); 5506 match(Set dst (MulReductionVF dst src2)); 5507 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5508 format %{ "vmulss $dst,$dst,$src2\n\t" 5509 "pshufd $tmp,$src2,0x01\n\t" 5510 "vmulss $dst,$dst,$tmp\n\t" 5511 "pshufd $tmp,$src2,0x02\n\t" 5512 "vmulss $dst,$dst,$tmp\n\t" 5513 "pshufd $tmp,$src2,0x03\n\t" 5514 "vmulss $dst,$dst,$tmp\n\t" 5515 "vextractf128 $tmp2,$src2\n\t" 5516 "vmulss $dst,$dst,$tmp2\n\t" 5517 "pshufd $tmp,$tmp2,0x01\n\t" 5518 "vmulss $dst,$dst,$tmp\n\t" 5519 "pshufd $tmp,$tmp2,0x02\n\t" 5520 "vmulss $dst,$dst,$tmp\n\t" 5521 "pshufd $tmp,$tmp2,0x03\n\t" 5522 "vmulss $dst,$dst,$tmp\t! mul reduction8F" %} 5523 ins_encode %{ 5524 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5525 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5526 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5527 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5528 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5529 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5530 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5531 __ vextractf128h($tmp2$$XMMRegister, $src2$$XMMRegister); 5532 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5533 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5534 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5535 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5536 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5537 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5538 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5539 %} 5540 ins_pipe( pipe_slow ); 5541 %} 5542 5543 instruct rvmul16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{ 5544 predicate(UseAVX > 2); 5545 match(Set dst (MulReductionVF dst src2)); 5546 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5547 format %{ "vmulss $dst,$dst,$src2\n\t" 5548 "pshufd $tmp,$src2,0x01\n\t" 5549 "vmulss $dst,$dst,$tmp\n\t" 5550 "pshufd $tmp,$src2,0x02\n\t" 5551 "vmulss $dst,$dst,$tmp\n\t" 5552 "pshufd $tmp,$src2,0x03\n\t" 5553 "vmulss $dst,$dst,$tmp\n\t" 5554 "vextractf32x4 $tmp2,$src2, 0x1\n\t" 5555 "vmulss $dst,$dst,$tmp2\n\t" 5556 "pshufd $tmp,$tmp2,0x01\n\t" 5557 "vmulss $dst,$dst,$tmp\n\t" 5558 "pshufd $tmp,$tmp2,0x02\n\t" 5559 "vmulss $dst,$dst,$tmp\n\t" 5560 "pshufd $tmp,$tmp2,0x03\n\t" 5561 "vmulss $dst,$dst,$tmp\n\t" 5562 "vextractf32x4 $tmp2,$src2, 0x2\n\t" 5563 "vmulss $dst,$dst,$tmp2\n\t" 5564 "pshufd $tmp,$tmp2,0x01\n\t" 5565 "vmulss $dst,$dst,$tmp\n\t" 5566 "pshufd $tmp,$tmp2,0x02\n\t" 5567 "vmulss $dst,$dst,$tmp\n\t" 5568 "pshufd $tmp,$tmp2,0x03\n\t" 5569 "vmulss $dst,$dst,$tmp\n\t" 5570 "vextractf32x4 $tmp2,$src2, 0x3\n\t" 5571 "vmulss $dst,$dst,$tmp2\n\t" 5572 "pshufd $tmp,$tmp2,0x01\n\t" 5573 "vmulss $dst,$dst,$tmp\n\t" 5574 "pshufd $tmp,$tmp2,0x02\n\t" 5575 "vmulss $dst,$dst,$tmp\n\t" 5576 "pshufd $tmp,$tmp2,0x03\n\t" 5577 "vmulss $dst,$dst,$tmp\t! mul reduction16F" %} 5578 ins_encode %{ 5579 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5580 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5581 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5582 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5583 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5584 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5585 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5586 __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5587 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5588 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5589 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5590 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5591 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5592 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5593 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5594 __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 5595 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5596 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5597 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5598 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5599 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5600 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5601 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5602 __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 5603 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5604 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5605 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5606 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5607 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5608 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5609 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5610 %} 5611 ins_pipe( pipe_slow ); 5612 %} 5613 5614 instruct rsmul2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ 5615 predicate(UseSSE >= 1 && UseAVX == 0); 5616 match(Set dst (MulReductionVD dst src2)); 5617 effect(TEMP dst, TEMP tmp); 5618 format %{ "mulsd $dst,$src2\n\t" 5619 "pshufd $tmp,$src2,0xE\n\t" 5620 "mulsd $dst,$tmp\t! mul reduction2D" %} 5621 ins_encode %{ 5622 __ mulsd($dst$$XMMRegister, $src2$$XMMRegister); 5623 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5624 __ mulsd($dst$$XMMRegister, $tmp$$XMMRegister); 5625 %} 5626 ins_pipe( pipe_slow ); 5627 %} 5628 5629 instruct rvmul2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ 5630 predicate(UseAVX > 0); 5631 match(Set dst (MulReductionVD dst src2)); 5632 effect(TEMP tmp, TEMP dst); 5633 format %{ "vmulsd $dst,$dst,$src2\n\t" 5634 "pshufd $tmp,$src2,0xE\n\t" 5635 "vmulsd $dst,$dst,$tmp\t! mul reduction2D" %} 5636 ins_encode %{ 5637 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5638 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5639 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5640 %} 5641 ins_pipe( pipe_slow ); 5642 %} 5643 5644 instruct rvmul4D_reduction_reg(regD dst, vecY src2, regD tmp, regD tmp2) %{ 5645 predicate(UseAVX > 0); 5646 match(Set dst (MulReductionVD dst src2)); 5647 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5648 format %{ "vmulsd $dst,$dst,$src2\n\t" 5649 "pshufd $tmp,$src2,0xE\n\t" 5650 "vmulsd $dst,$dst,$tmp\n\t" 5651 "vextractf128 $tmp2,$src2\n\t" 5652 "vmulsd $dst,$dst,$tmp2\n\t" 5653 "pshufd $tmp,$tmp2,0xE\n\t" 5654 "vmulsd $dst,$dst,$tmp\t! mul reduction4D" %} 5655 ins_encode %{ 5656 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5657 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5658 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5659 __ vextractf128h($tmp2$$XMMRegister, $src2$$XMMRegister); 5660 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5661 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5662 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5663 %} 5664 ins_pipe( pipe_slow ); 5665 %} 5666 5667 instruct rvmul8D_reduction_reg(regD dst, vecZ src2, regD tmp, regD tmp2) %{ 5668 predicate(UseAVX > 2); 5669 match(Set dst (MulReductionVD dst src2)); 5670 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5671 format %{ "vmulsd $dst,$dst,$src2\n\t" 5672 "pshufd $tmp,$src2,0xE\n\t" 5673 "vmulsd $dst,$dst,$tmp\n\t" 5674 "vextractf32x4 $tmp2,$src2, 0x1\n\t" 5675 "vmulsd $dst,$dst,$tmp2\n\t" 5676 "pshufd $tmp,$src2,0xE\n\t" 5677 "vmulsd $dst,$dst,$tmp\n\t" 5678 "vextractf32x4 $tmp2,$src2, 0x2\n\t" 5679 "vmulsd $dst,$dst,$tmp2\n\t" 5680 "pshufd $tmp,$tmp2,0xE\n\t" 5681 "vmulsd $dst,$dst,$tmp\n\t" 5682 "vextractf32x4 $tmp2,$src2, 0x3\n\t" 5683 "vmulsd $dst,$dst,$tmp2\n\t" 5684 "pshufd $tmp,$tmp2,0xE\n\t" 5685 "vmulsd $dst,$dst,$tmp\t! mul reduction8D" %} 5686 ins_encode %{ 5687 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5688 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5689 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5690 __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5691 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5692 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5693 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5694 __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 5695 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5696 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5697 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5698 __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 5699 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5700 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5701 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5702 %} 5703 ins_pipe( pipe_slow ); 5704 %} 5705 5706 // ====================VECTOR ARITHMETIC======================================= 5707 5708 // --------------------------------- ADD -------------------------------------- 5709 5710 // Bytes vector add 5711 instruct vadd4B(vecS dst, vecS src) %{ 5712 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 5713 match(Set dst (AddVB dst src)); 5714 format %{ "paddb $dst,$src\t! add packed4B" %} 5715 ins_encode %{ 5716 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5717 %} 5718 ins_pipe( pipe_slow ); 5719 %} 5720 5721 instruct vadd4B_reg_avx(vecS dst, vecS src1, vecS src2) %{ 5722 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 4); 5723 match(Set dst (AddVB src1 src2)); 5724 format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %} 5725 ins_encode %{ 5726 int vector_len = 0; 5727 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5728 %} 5729 ins_pipe( pipe_slow ); 5730 %} 5731 5732 instruct vadd4B_reg_evex(vecS dst, vecS src1, vecS src2) %{ 5733 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 5734 match(Set dst (AddVB src1 src2)); 5735 format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %} 5736 ins_encode %{ 5737 int vector_len = 0; 5738 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5739 %} 5740 ins_pipe( pipe_slow ); 5741 %} 5742 5743 instruct vadd4B_reg_evex_special(vecS dst, vecS src1, vecS src2) %{ 5744 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 5745 match(Set dst (AddVB dst src2)); 5746 effect(TEMP src1); 5747 format %{ "vpaddb $dst,$dst,$src2\t! add packed4B" %} 5748 ins_encode %{ 5749 int vector_len = 0; 5750 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5751 %} 5752 ins_pipe( pipe_slow ); 5753 %} 5754 5755 instruct vadd4B_mem_avx(vecS dst, vecS src, memory mem) %{ 5756 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 4); 5757 match(Set dst (AddVB src (LoadVector mem))); 5758 format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %} 5759 ins_encode %{ 5760 int vector_len = 0; 5761 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5762 %} 5763 ins_pipe( pipe_slow ); 5764 %} 5765 5766 instruct vadd4B_mem_evex(vecS dst, vecS src, memory mem) %{ 5767 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 5768 match(Set dst (AddVB src (LoadVector mem))); 5769 format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %} 5770 ins_encode %{ 5771 int vector_len = 0; 5772 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5773 %} 5774 ins_pipe( pipe_slow ); 5775 %} 5776 5777 instruct vadd4B_mem_evex_special(vecS dst, vecS src, memory mem) %{ 5778 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 5779 match(Set dst (AddVB dst (LoadVector mem))); 5780 effect(TEMP src); 5781 format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %} 5782 ins_encode %{ 5783 int vector_len = 0; 5784 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5785 %} 5786 ins_pipe( pipe_slow ); 5787 %} 5788 5789 instruct vadd8B(vecD dst, vecD src) %{ 5790 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 5791 match(Set dst (AddVB dst src)); 5792 format %{ "paddb $dst,$src\t! add packed8B" %} 5793 ins_encode %{ 5794 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5795 %} 5796 ins_pipe( pipe_slow ); 5797 %} 5798 5799 instruct vadd8B_reg_avx(vecD dst, vecD src1, vecS src2) %{ 5800 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 8); 5801 match(Set dst (AddVB src1 src2)); 5802 format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %} 5803 ins_encode %{ 5804 int vector_len = 0; 5805 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5806 %} 5807 ins_pipe( pipe_slow ); 5808 %} 5809 5810 instruct vadd8B_reg_evex(vecD dst, vecD src1, vecD src2) %{ 5811 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 5812 match(Set dst (AddVB src1 src2)); 5813 format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %} 5814 ins_encode %{ 5815 int vector_len = 0; 5816 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5817 %} 5818 ins_pipe( pipe_slow ); 5819 %} 5820 5821 instruct vadd8B_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 5822 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 5823 match(Set dst (AddVB dst src2)); 5824 effect(TEMP src1); 5825 format %{ "vpaddb $dst,$dst,$src2\t! add packed8B" %} 5826 ins_encode %{ 5827 int vector_len = 0; 5828 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5829 %} 5830 ins_pipe( pipe_slow ); 5831 %} 5832 5833 instruct vadd8B_mem_avx(vecD dst, vecD src, memory mem) %{ 5834 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 8); 5835 match(Set dst (AddVB src (LoadVector mem))); 5836 format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %} 5837 ins_encode %{ 5838 int vector_len = 0; 5839 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5840 %} 5841 ins_pipe( pipe_slow ); 5842 %} 5843 5844 instruct vadd8B_mem_evex(vecD dst, vecD src, memory mem) %{ 5845 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 5846 match(Set dst (AddVB src (LoadVector mem))); 5847 format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %} 5848 ins_encode %{ 5849 int vector_len = 0; 5850 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5851 %} 5852 ins_pipe( pipe_slow ); 5853 %} 5854 5855 instruct vadd8B_mem_evex_special(vecD dst, vecD src, memory mem) %{ 5856 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 5857 match(Set dst (AddVB dst (LoadVector mem))); 5858 effect(TEMP src); 5859 format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %} 5860 ins_encode %{ 5861 int vector_len = 0; 5862 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5863 %} 5864 ins_pipe( pipe_slow ); 5865 %} 5866 5867 instruct vadd16B(vecX dst, vecX src) %{ 5868 predicate(UseAVX == 0 && n->as_Vector()->length() == 16); 5869 match(Set dst (AddVB dst src)); 5870 format %{ "paddb $dst,$src\t! add packed16B" %} 5871 ins_encode %{ 5872 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5873 %} 5874 ins_pipe( pipe_slow ); 5875 %} 5876 5877 instruct vadd16B_reg_avx(vecX dst, vecX src1, vecX src2) %{ 5878 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 5879 match(Set dst (AddVB src1 src2)); 5880 format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %} 5881 ins_encode %{ 5882 int vector_len = 0; 5883 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5884 %} 5885 ins_pipe( pipe_slow ); 5886 %} 5887 5888 instruct vadd16B_reg_evex(vecX dst, vecX src1, vecX src2) %{ 5889 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 5890 match(Set dst (AddVB src1 src2)); 5891 format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %} 5892 ins_encode %{ 5893 int vector_len = 0; 5894 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5895 %} 5896 ins_pipe( pipe_slow ); 5897 %} 5898 5899 instruct vadd16B_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 5900 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 5901 match(Set dst (AddVB dst src2)); 5902 effect(TEMP src1); 5903 format %{ "vpaddb $dst,$dst,$src2\t! add packed16B" %} 5904 ins_encode %{ 5905 int vector_len = 0; 5906 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5907 %} 5908 ins_pipe( pipe_slow ); 5909 %} 5910 5911 instruct vadd16B_mem_avx(vecX dst, vecX src, memory mem) %{ 5912 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 5913 match(Set dst (AddVB src (LoadVector mem))); 5914 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} 5915 ins_encode %{ 5916 int vector_len = 0; 5917 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5918 %} 5919 ins_pipe( pipe_slow ); 5920 %} 5921 5922 instruct vadd16B_mem_evex(vecX dst, vecX src, memory mem) %{ 5923 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 5924 match(Set dst (AddVB src (LoadVector mem))); 5925 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} 5926 ins_encode %{ 5927 int vector_len = 0; 5928 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5929 %} 5930 ins_pipe( pipe_slow ); 5931 %} 5932 5933 instruct vadd16B_mem_evex_special(vecX dst, vecX src, memory mem) %{ 5934 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 5935 match(Set dst (AddVB dst (LoadVector mem))); 5936 effect(TEMP src); 5937 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} 5938 ins_encode %{ 5939 int vector_len = 0; 5940 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5941 %} 5942 ins_pipe( pipe_slow ); 5943 %} 5944 5945 instruct vadd32B_reg_avx(vecY dst, vecY src1, vecY src2) %{ 5946 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32); 5947 match(Set dst (AddVB src1 src2)); 5948 format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %} 5949 ins_encode %{ 5950 int vector_len = 1; 5951 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5952 %} 5953 ins_pipe( pipe_slow ); 5954 %} 5955 5956 instruct vadd32B_reg_evex(vecY dst, vecY src1, vecY src2) %{ 5957 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 5958 match(Set dst (AddVB src1 src2)); 5959 format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %} 5960 ins_encode %{ 5961 int vector_len = 1; 5962 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5963 %} 5964 ins_pipe( pipe_slow ); 5965 %} 5966 5967 instruct vadd32B_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 5968 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32); 5969 match(Set dst (AddVB dst src2)); 5970 effect(TEMP src1); 5971 format %{ "vpaddb $dst,$dst,$src2\t! add packed32B" %} 5972 ins_encode %{ 5973 int vector_len = 1; 5974 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5975 %} 5976 ins_pipe( pipe_slow ); 5977 %} 5978 5979 instruct vadd32B_mem_avx(vecY dst, vecY src, memory mem) %{ 5980 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32); 5981 match(Set dst (AddVB src (LoadVector mem))); 5982 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} 5983 ins_encode %{ 5984 int vector_len = 1; 5985 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5986 %} 5987 ins_pipe( pipe_slow ); 5988 %} 5989 5990 instruct vadd32B_mem_evex(vecY dst, vecY src, memory mem) %{ 5991 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 5992 match(Set dst (AddVB src (LoadVector mem))); 5993 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} 5994 ins_encode %{ 5995 int vector_len = 1; 5996 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5997 %} 5998 ins_pipe( pipe_slow ); 5999 %} 6000 6001 instruct vadd32B_mem_evex_special(vecY dst, vecY src, memory mem) %{ 6002 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6003 match(Set dst (AddVB dst (LoadVector mem))); 6004 effect(TEMP src); 6005 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} 6006 ins_encode %{ 6007 int vector_len = 1; 6008 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6009 %} 6010 ins_pipe( pipe_slow ); 6011 %} 6012 6013 instruct vadd64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6014 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 6015 match(Set dst (AddVB src1 src2)); 6016 format %{ "vpaddb $dst,$src1,$src2\t! add packed64B" %} 6017 ins_encode %{ 6018 int vector_len = 2; 6019 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6020 %} 6021 ins_pipe( pipe_slow ); 6022 %} 6023 6024 instruct vadd64B_mem(vecZ dst, vecZ src, memory mem) %{ 6025 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 6026 match(Set dst (AddVB src (LoadVector mem))); 6027 format %{ "vpaddb $dst,$src,$mem\t! add packed64B" %} 6028 ins_encode %{ 6029 int vector_len = 2; 6030 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6031 %} 6032 ins_pipe( pipe_slow ); 6033 %} 6034 6035 // Shorts/Chars vector add 6036 instruct vadd2S(vecS dst, vecS src) %{ 6037 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 6038 match(Set dst (AddVS dst src)); 6039 format %{ "paddw $dst,$src\t! add packed2S" %} 6040 ins_encode %{ 6041 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 6042 %} 6043 ins_pipe( pipe_slow ); 6044 %} 6045 6046 instruct vadd2S_reg_avx(vecS dst, vecS src1, vecS src2) %{ 6047 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 2); 6048 match(Set dst (AddVS src1 src2)); 6049 format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %} 6050 ins_encode %{ 6051 int vector_len = 0; 6052 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6053 %} 6054 ins_pipe( pipe_slow ); 6055 %} 6056 6057 instruct vadd2S_reg_evex(vecS dst, vecS src1, vecS src2) %{ 6058 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 6059 match(Set dst (AddVS src1 src2)); 6060 format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %} 6061 ins_encode %{ 6062 int vector_len = 0; 6063 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6064 %} 6065 ins_pipe( pipe_slow ); 6066 %} 6067 6068 instruct vadd2S_reg_evex_special(vecS dst, vecS src1, vecS src2) %{ 6069 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 6070 match(Set dst (AddVS dst src2)); 6071 effect(TEMP src1); 6072 format %{ "vpaddw $dst,$dst,$src2\t! add packed2S" %} 6073 ins_encode %{ 6074 int vector_len = 0; 6075 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6076 %} 6077 ins_pipe( pipe_slow ); 6078 %} 6079 6080 instruct vadd2S_mem_avx(vecS dst, vecS src, memory mem) %{ 6081 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 2); 6082 match(Set dst (AddVS src (LoadVector mem))); 6083 format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %} 6084 ins_encode %{ 6085 int vector_len = 0; 6086 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6087 %} 6088 ins_pipe( pipe_slow ); 6089 %} 6090 6091 instruct vadd2S_mem_evex(vecS dst, vecS src, memory mem) %{ 6092 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 6093 match(Set dst (AddVS src (LoadVector mem))); 6094 format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %} 6095 ins_encode %{ 6096 int vector_len = 0; 6097 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6098 %} 6099 ins_pipe( pipe_slow ); 6100 %} 6101 6102 instruct vadd2S_mem_evex_special(vecS dst, vecS src, memory mem) %{ 6103 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 6104 match(Set dst (AddVS dst (LoadVector mem))); 6105 effect(TEMP src); 6106 format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %} 6107 ins_encode %{ 6108 int vector_len = 0; 6109 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6110 %} 6111 ins_pipe( pipe_slow ); 6112 %} 6113 6114 instruct vadd4S(vecD dst, vecD src) %{ 6115 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 6116 match(Set dst (AddVS dst src)); 6117 format %{ "paddw $dst,$src\t! add packed4S" %} 6118 ins_encode %{ 6119 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 6120 %} 6121 ins_pipe( pipe_slow ); 6122 %} 6123 6124 instruct vadd4S_reg_avx(vecD dst, vecD src1, vecS src2) %{ 6125 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 4); 6126 match(Set dst (AddVS src1 src2)); 6127 format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %} 6128 ins_encode %{ 6129 int vector_len = 0; 6130 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6131 %} 6132 ins_pipe( pipe_slow ); 6133 %} 6134 6135 instruct vadd4S_reg_evex(vecD dst, vecD src1, vecD src2) %{ 6136 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 6137 match(Set dst (AddVS src1 src2)); 6138 format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %} 6139 ins_encode %{ 6140 int vector_len = 0; 6141 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6142 %} 6143 ins_pipe( pipe_slow ); 6144 %} 6145 6146 instruct vadd4S_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 6147 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 6148 match(Set dst (AddVS dst src2)); 6149 effect(TEMP src1); 6150 format %{ "vpaddw $dst,$dst,$src2\t! add packed4S" %} 6151 ins_encode %{ 6152 int vector_len = 0; 6153 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6154 %} 6155 ins_pipe( pipe_slow ); 6156 %} 6157 6158 instruct vadd4S_mem_avx(vecD dst, vecD src, memory mem) %{ 6159 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 4); 6160 match(Set dst (AddVS src (LoadVector mem))); 6161 format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %} 6162 ins_encode %{ 6163 int vector_len = 0; 6164 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6165 %} 6166 ins_pipe( pipe_slow ); 6167 %} 6168 6169 instruct vadd4S_mem_evex(vecD dst, vecD src, memory mem) %{ 6170 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 6171 match(Set dst (AddVS src (LoadVector mem))); 6172 format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %} 6173 ins_encode %{ 6174 int vector_len = 0; 6175 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6176 %} 6177 ins_pipe( pipe_slow ); 6178 %} 6179 6180 instruct vadd4S_mem_evex_special(vecD dst, vecD src, memory mem) %{ 6181 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 6182 match(Set dst (AddVS dst (LoadVector mem))); 6183 effect(TEMP src); 6184 format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %} 6185 ins_encode %{ 6186 int vector_len = 0; 6187 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6188 %} 6189 ins_pipe( pipe_slow ); 6190 %} 6191 6192 instruct vadd8S(vecX dst, vecX src) %{ 6193 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 6194 match(Set dst (AddVS dst src)); 6195 format %{ "paddw $dst,$src\t! add packed8S" %} 6196 ins_encode %{ 6197 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 6198 %} 6199 ins_pipe( pipe_slow ); 6200 %} 6201 6202 instruct vadd8S_reg_avx(vecX dst, vecX src1, vecX src2) %{ 6203 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 8); 6204 match(Set dst (AddVS src1 src2)); 6205 format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %} 6206 ins_encode %{ 6207 int vector_len = 0; 6208 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6209 %} 6210 ins_pipe( pipe_slow ); 6211 %} 6212 6213 instruct vadd8S_reg_evex(vecX dst, vecX src1, vecX src2) %{ 6214 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 6215 match(Set dst (AddVS src1 src2)); 6216 format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %} 6217 ins_encode %{ 6218 int vector_len = 0; 6219 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6220 %} 6221 ins_pipe( pipe_slow ); 6222 %} 6223 6224 instruct vadd8S_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 6225 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 6226 match(Set dst (AddVS dst src2)); 6227 effect(TEMP src1); 6228 format %{ "vpaddw $dst,$dst,$src2\t! add packed8S" %} 6229 ins_encode %{ 6230 int vector_len = 0; 6231 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6232 %} 6233 ins_pipe( pipe_slow ); 6234 %} 6235 6236 instruct vadd8S_mem_avx(vecX dst, vecX src, memory mem) %{ 6237 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 8); 6238 match(Set dst (AddVS src (LoadVector mem))); 6239 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} 6240 ins_encode %{ 6241 int vector_len = 0; 6242 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6243 %} 6244 ins_pipe( pipe_slow ); 6245 %} 6246 6247 instruct vadd8S_mem_evex(vecX dst, vecX src, memory mem) %{ 6248 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 6249 match(Set dst (AddVS src (LoadVector mem))); 6250 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} 6251 ins_encode %{ 6252 int vector_len = 0; 6253 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6254 %} 6255 ins_pipe( pipe_slow ); 6256 %} 6257 6258 instruct vadd8S_mem_evex_special(vecX dst, vecX src, memory mem) %{ 6259 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 6260 match(Set dst (AddVS dst (LoadVector mem))); 6261 effect(TEMP src); 6262 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} 6263 ins_encode %{ 6264 int vector_len = 0; 6265 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6266 %} 6267 ins_pipe( pipe_slow ); 6268 %} 6269 6270 instruct vadd16S_reg_avx(vecY dst, vecY src1, vecY src2) %{ 6271 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 6272 match(Set dst (AddVS src1 src2)); 6273 format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %} 6274 ins_encode %{ 6275 int vector_len = 1; 6276 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6277 %} 6278 ins_pipe( pipe_slow ); 6279 %} 6280 6281 instruct vadd16S_reg_evex(vecY dst, vecY src1, vecY src2) %{ 6282 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 6283 match(Set dst (AddVS src1 src2)); 6284 format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %} 6285 ins_encode %{ 6286 int vector_len = 1; 6287 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6288 %} 6289 ins_pipe( pipe_slow ); 6290 %} 6291 6292 instruct vadd16S_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 6293 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 6294 match(Set dst (AddVS dst src2)); 6295 effect(TEMP src1); 6296 format %{ "vpaddw $dst,$dst,$src2\t! add packed16S" %} 6297 ins_encode %{ 6298 int vector_len = 1; 6299 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6300 %} 6301 ins_pipe( pipe_slow ); 6302 %} 6303 6304 instruct vadd16S_mem_avx(vecY dst, vecY src, memory mem) %{ 6305 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 6306 match(Set dst (AddVS src (LoadVector mem))); 6307 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} 6308 ins_encode %{ 6309 int vector_len = 1; 6310 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6311 %} 6312 ins_pipe( pipe_slow ); 6313 %} 6314 6315 instruct vadd16S_mem_evex(vecY dst, vecY src, memory mem) %{ 6316 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 6317 match(Set dst (AddVS src (LoadVector mem))); 6318 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} 6319 ins_encode %{ 6320 int vector_len = 1; 6321 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6322 %} 6323 ins_pipe( pipe_slow ); 6324 %} 6325 6326 instruct vadd16S_mem_evex_special(vecY dst, vecY src, memory mem) %{ 6327 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 6328 match(Set dst (AddVS dst (LoadVector mem))); 6329 effect(TEMP src); 6330 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} 6331 ins_encode %{ 6332 int vector_len = 1; 6333 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6334 %} 6335 ins_pipe( pipe_slow ); 6336 %} 6337 6338 instruct vadd32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6339 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6340 match(Set dst (AddVS src1 src2)); 6341 format %{ "vpaddw $dst,$src1,$src2\t! add packed32S" %} 6342 ins_encode %{ 6343 int vector_len = 2; 6344 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6345 %} 6346 ins_pipe( pipe_slow ); 6347 %} 6348 6349 instruct vadd32S_mem(vecZ dst, vecZ src, memory mem) %{ 6350 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6351 match(Set dst (AddVS src (LoadVector mem))); 6352 format %{ "vpaddw $dst,$src,$mem\t! add packed32S" %} 6353 ins_encode %{ 6354 int vector_len = 2; 6355 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6356 %} 6357 ins_pipe( pipe_slow ); 6358 %} 6359 6360 // Integers vector add 6361 instruct vadd2I(vecD dst, vecD src) %{ 6362 predicate(n->as_Vector()->length() == 2); 6363 match(Set dst (AddVI dst src)); 6364 format %{ "paddd $dst,$src\t! add packed2I" %} 6365 ins_encode %{ 6366 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 6367 %} 6368 ins_pipe( pipe_slow ); 6369 %} 6370 6371 instruct vadd2I_reg(vecD dst, vecD src1, vecD src2) %{ 6372 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6373 match(Set dst (AddVI src1 src2)); 6374 format %{ "vpaddd $dst,$src1,$src2\t! add packed2I" %} 6375 ins_encode %{ 6376 int vector_len = 0; 6377 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6378 %} 6379 ins_pipe( pipe_slow ); 6380 %} 6381 6382 instruct vadd2I_mem(vecD dst, vecD src, memory mem) %{ 6383 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6384 match(Set dst (AddVI src (LoadVector mem))); 6385 format %{ "vpaddd $dst,$src,$mem\t! add packed2I" %} 6386 ins_encode %{ 6387 int vector_len = 0; 6388 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6389 %} 6390 ins_pipe( pipe_slow ); 6391 %} 6392 6393 instruct vadd4I(vecX dst, vecX src) %{ 6394 predicate(n->as_Vector()->length() == 4); 6395 match(Set dst (AddVI dst src)); 6396 format %{ "paddd $dst,$src\t! add packed4I" %} 6397 ins_encode %{ 6398 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 6399 %} 6400 ins_pipe( pipe_slow ); 6401 %} 6402 6403 instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{ 6404 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6405 match(Set dst (AddVI src1 src2)); 6406 format %{ "vpaddd $dst,$src1,$src2\t! add packed4I" %} 6407 ins_encode %{ 6408 int vector_len = 0; 6409 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6410 %} 6411 ins_pipe( pipe_slow ); 6412 %} 6413 6414 instruct vadd4I_mem(vecX dst, vecX src, memory mem) %{ 6415 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6416 match(Set dst (AddVI src (LoadVector mem))); 6417 format %{ "vpaddd $dst,$src,$mem\t! add packed4I" %} 6418 ins_encode %{ 6419 int vector_len = 0; 6420 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6421 %} 6422 ins_pipe( pipe_slow ); 6423 %} 6424 6425 instruct vadd8I_reg(vecY dst, vecY src1, vecY src2) %{ 6426 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 6427 match(Set dst (AddVI src1 src2)); 6428 format %{ "vpaddd $dst,$src1,$src2\t! add packed8I" %} 6429 ins_encode %{ 6430 int vector_len = 1; 6431 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6432 %} 6433 ins_pipe( pipe_slow ); 6434 %} 6435 6436 instruct vadd8I_mem(vecY dst, vecY src, memory mem) %{ 6437 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 6438 match(Set dst (AddVI src (LoadVector mem))); 6439 format %{ "vpaddd $dst,$src,$mem\t! add packed8I" %} 6440 ins_encode %{ 6441 int vector_len = 1; 6442 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6443 %} 6444 ins_pipe( pipe_slow ); 6445 %} 6446 6447 instruct vadd16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6448 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6449 match(Set dst (AddVI src1 src2)); 6450 format %{ "vpaddd $dst,$src1,$src2\t! add packed16I" %} 6451 ins_encode %{ 6452 int vector_len = 2; 6453 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6454 %} 6455 ins_pipe( pipe_slow ); 6456 %} 6457 6458 instruct vadd16I_mem(vecZ dst, vecZ src, memory mem) %{ 6459 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6460 match(Set dst (AddVI src (LoadVector mem))); 6461 format %{ "vpaddd $dst,$src,$mem\t! add packed16I" %} 6462 ins_encode %{ 6463 int vector_len = 2; 6464 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6465 %} 6466 ins_pipe( pipe_slow ); 6467 %} 6468 6469 // Longs vector add 6470 instruct vadd2L(vecX dst, vecX src) %{ 6471 predicate(n->as_Vector()->length() == 2); 6472 match(Set dst (AddVL dst src)); 6473 format %{ "paddq $dst,$src\t! add packed2L" %} 6474 ins_encode %{ 6475 __ paddq($dst$$XMMRegister, $src$$XMMRegister); 6476 %} 6477 ins_pipe( pipe_slow ); 6478 %} 6479 6480 instruct vadd2L_reg(vecX dst, vecX src1, vecX src2) %{ 6481 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6482 match(Set dst (AddVL src1 src2)); 6483 format %{ "vpaddq $dst,$src1,$src2\t! add packed2L" %} 6484 ins_encode %{ 6485 int vector_len = 0; 6486 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6487 %} 6488 ins_pipe( pipe_slow ); 6489 %} 6490 6491 instruct vadd2L_mem(vecX dst, vecX src, memory mem) %{ 6492 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6493 match(Set dst (AddVL src (LoadVector mem))); 6494 format %{ "vpaddq $dst,$src,$mem\t! add packed2L" %} 6495 ins_encode %{ 6496 int vector_len = 0; 6497 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6498 %} 6499 ins_pipe( pipe_slow ); 6500 %} 6501 6502 instruct vadd4L_reg(vecY dst, vecY src1, vecY src2) %{ 6503 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 6504 match(Set dst (AddVL src1 src2)); 6505 format %{ "vpaddq $dst,$src1,$src2\t! add packed4L" %} 6506 ins_encode %{ 6507 int vector_len = 1; 6508 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6509 %} 6510 ins_pipe( pipe_slow ); 6511 %} 6512 6513 instruct vadd4L_mem(vecY dst, vecY src, memory mem) %{ 6514 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 6515 match(Set dst (AddVL src (LoadVector mem))); 6516 format %{ "vpaddq $dst,$src,$mem\t! add packed4L" %} 6517 ins_encode %{ 6518 int vector_len = 1; 6519 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6520 %} 6521 ins_pipe( pipe_slow ); 6522 %} 6523 6524 instruct vadd8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6525 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6526 match(Set dst (AddVL src1 src2)); 6527 format %{ "vpaddq $dst,$src1,$src2\t! add packed8L" %} 6528 ins_encode %{ 6529 int vector_len = 2; 6530 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6531 %} 6532 ins_pipe( pipe_slow ); 6533 %} 6534 6535 instruct vadd8L_mem(vecZ dst, vecZ src, memory mem) %{ 6536 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6537 match(Set dst (AddVL src (LoadVector mem))); 6538 format %{ "vpaddq $dst,$src,$mem\t! add packed8L" %} 6539 ins_encode %{ 6540 int vector_len = 2; 6541 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6542 %} 6543 ins_pipe( pipe_slow ); 6544 %} 6545 6546 // Floats vector add 6547 instruct vadd2F(vecD dst, vecD src) %{ 6548 predicate(n->as_Vector()->length() == 2); 6549 match(Set dst (AddVF dst src)); 6550 format %{ "addps $dst,$src\t! add packed2F" %} 6551 ins_encode %{ 6552 __ addps($dst$$XMMRegister, $src$$XMMRegister); 6553 %} 6554 ins_pipe( pipe_slow ); 6555 %} 6556 6557 instruct vadd2F_reg(vecD dst, vecD src1, vecD src2) %{ 6558 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6559 match(Set dst (AddVF src1 src2)); 6560 format %{ "vaddps $dst,$src1,$src2\t! add packed2F" %} 6561 ins_encode %{ 6562 int vector_len = 0; 6563 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6564 %} 6565 ins_pipe( pipe_slow ); 6566 %} 6567 6568 instruct vadd2F_mem(vecD dst, vecD src, memory mem) %{ 6569 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6570 match(Set dst (AddVF src (LoadVector mem))); 6571 format %{ "vaddps $dst,$src,$mem\t! add packed2F" %} 6572 ins_encode %{ 6573 int vector_len = 0; 6574 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6575 %} 6576 ins_pipe( pipe_slow ); 6577 %} 6578 6579 instruct vadd4F(vecX dst, vecX src) %{ 6580 predicate(n->as_Vector()->length() == 4); 6581 match(Set dst (AddVF dst src)); 6582 format %{ "addps $dst,$src\t! add packed4F" %} 6583 ins_encode %{ 6584 __ addps($dst$$XMMRegister, $src$$XMMRegister); 6585 %} 6586 ins_pipe( pipe_slow ); 6587 %} 6588 6589 instruct vadd4F_reg(vecX dst, vecX src1, vecX src2) %{ 6590 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6591 match(Set dst (AddVF src1 src2)); 6592 format %{ "vaddps $dst,$src1,$src2\t! add packed4F" %} 6593 ins_encode %{ 6594 int vector_len = 0; 6595 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6596 %} 6597 ins_pipe( pipe_slow ); 6598 %} 6599 6600 instruct vadd4F_mem(vecX dst, vecX src, memory mem) %{ 6601 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6602 match(Set dst (AddVF src (LoadVector mem))); 6603 format %{ "vaddps $dst,$src,$mem\t! add packed4F" %} 6604 ins_encode %{ 6605 int vector_len = 0; 6606 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6607 %} 6608 ins_pipe( pipe_slow ); 6609 %} 6610 6611 instruct vadd8F_reg(vecY dst, vecY src1, vecY src2) %{ 6612 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6613 match(Set dst (AddVF src1 src2)); 6614 format %{ "vaddps $dst,$src1,$src2\t! add packed8F" %} 6615 ins_encode %{ 6616 int vector_len = 1; 6617 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6618 %} 6619 ins_pipe( pipe_slow ); 6620 %} 6621 6622 instruct vadd8F_mem(vecY dst, vecY src, memory mem) %{ 6623 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6624 match(Set dst (AddVF src (LoadVector mem))); 6625 format %{ "vaddps $dst,$src,$mem\t! add packed8F" %} 6626 ins_encode %{ 6627 int vector_len = 1; 6628 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6629 %} 6630 ins_pipe( pipe_slow ); 6631 %} 6632 6633 instruct vadd16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6634 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6635 match(Set dst (AddVF src1 src2)); 6636 format %{ "vaddps $dst,$src1,$src2\t! add packed16F" %} 6637 ins_encode %{ 6638 int vector_len = 2; 6639 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6640 %} 6641 ins_pipe( pipe_slow ); 6642 %} 6643 6644 instruct vadd16F_mem(vecZ dst, vecZ src, memory mem) %{ 6645 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6646 match(Set dst (AddVF src (LoadVector mem))); 6647 format %{ "vaddps $dst,$src,$mem\t! add packed16F" %} 6648 ins_encode %{ 6649 int vector_len = 2; 6650 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6651 %} 6652 ins_pipe( pipe_slow ); 6653 %} 6654 6655 // Doubles vector add 6656 instruct vadd2D(vecX dst, vecX src) %{ 6657 predicate(n->as_Vector()->length() == 2); 6658 match(Set dst (AddVD dst src)); 6659 format %{ "addpd $dst,$src\t! add packed2D" %} 6660 ins_encode %{ 6661 __ addpd($dst$$XMMRegister, $src$$XMMRegister); 6662 %} 6663 ins_pipe( pipe_slow ); 6664 %} 6665 6666 instruct vadd2D_reg(vecX dst, vecX src1, vecX src2) %{ 6667 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6668 match(Set dst (AddVD src1 src2)); 6669 format %{ "vaddpd $dst,$src1,$src2\t! add packed2D" %} 6670 ins_encode %{ 6671 int vector_len = 0; 6672 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6673 %} 6674 ins_pipe( pipe_slow ); 6675 %} 6676 6677 instruct vadd2D_mem(vecX dst, vecX src, memory mem) %{ 6678 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6679 match(Set dst (AddVD src (LoadVector mem))); 6680 format %{ "vaddpd $dst,$src,$mem\t! add packed2D" %} 6681 ins_encode %{ 6682 int vector_len = 0; 6683 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6684 %} 6685 ins_pipe( pipe_slow ); 6686 %} 6687 6688 instruct vadd4D_reg(vecY dst, vecY src1, vecY src2) %{ 6689 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6690 match(Set dst (AddVD src1 src2)); 6691 format %{ "vaddpd $dst,$src1,$src2\t! add packed4D" %} 6692 ins_encode %{ 6693 int vector_len = 1; 6694 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6695 %} 6696 ins_pipe( pipe_slow ); 6697 %} 6698 6699 instruct vadd4D_mem(vecY dst, vecY src, memory mem) %{ 6700 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6701 match(Set dst (AddVD src (LoadVector mem))); 6702 format %{ "vaddpd $dst,$src,$mem\t! add packed4D" %} 6703 ins_encode %{ 6704 int vector_len = 1; 6705 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6706 %} 6707 ins_pipe( pipe_slow ); 6708 %} 6709 6710 instruct vadd8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6711 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6712 match(Set dst (AddVD src1 src2)); 6713 format %{ "vaddpd $dst,$src1,$src2\t! add packed8D" %} 6714 ins_encode %{ 6715 int vector_len = 2; 6716 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6717 %} 6718 ins_pipe( pipe_slow ); 6719 %} 6720 6721 instruct vadd8D_mem(vecZ dst, vecZ src, memory mem) %{ 6722 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6723 match(Set dst (AddVD src (LoadVector mem))); 6724 format %{ "vaddpd $dst,$src,$mem\t! add packed8D" %} 6725 ins_encode %{ 6726 int vector_len = 2; 6727 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6728 %} 6729 ins_pipe( pipe_slow ); 6730 %} 6731 6732 // --------------------------------- SUB -------------------------------------- 6733 6734 // Bytes vector sub 6735 instruct vsub4B(vecS dst, vecS src) %{ 6736 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 6737 match(Set dst (SubVB dst src)); 6738 format %{ "psubb $dst,$src\t! sub packed4B" %} 6739 ins_encode %{ 6740 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 6741 %} 6742 ins_pipe( pipe_slow ); 6743 %} 6744 6745 instruct vsub4B_reg_avx(vecS dst, vecS src1, vecS src2) %{ 6746 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 4); 6747 match(Set dst (SubVB src1 src2)); 6748 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} 6749 ins_encode %{ 6750 int vector_len = 0; 6751 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6752 %} 6753 ins_pipe( pipe_slow ); 6754 %} 6755 6756 instruct vsub4B_reg_evex(vecS dst, vecS src1, vecS src2) %{ 6757 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 6758 match(Set dst (SubVB src1 src2)); 6759 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} 6760 ins_encode %{ 6761 int vector_len = 0; 6762 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6763 %} 6764 ins_pipe( pipe_slow ); 6765 %} 6766 6767 instruct vsub4B_reg_exex_special(vecS dst, vecS src1, vecS src2) %{ 6768 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 6769 match(Set dst (SubVB dst src2)); 6770 effect(TEMP src1); 6771 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} 6772 ins_encode %{ 6773 int vector_len = 0; 6774 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6775 %} 6776 ins_pipe( pipe_slow ); 6777 %} 6778 6779 instruct vsub4B_mem_avx(vecS dst, vecS src, memory mem) %{ 6780 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 4); 6781 match(Set dst (SubVB src (LoadVector mem))); 6782 format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %} 6783 ins_encode %{ 6784 int vector_len = 0; 6785 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6786 %} 6787 ins_pipe( pipe_slow ); 6788 %} 6789 6790 instruct vsub4B_mem_evex(vecS dst, vecS src, memory mem) %{ 6791 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 6792 match(Set dst (SubVB src (LoadVector mem))); 6793 format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %} 6794 ins_encode %{ 6795 int vector_len = 0; 6796 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6797 %} 6798 ins_pipe( pipe_slow ); 6799 %} 6800 6801 instruct vsub4B_mem_evex_special(vecS dst, vecS src, memory mem) %{ 6802 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 6803 match(Set dst (SubVB dst (LoadVector mem))); 6804 effect(TEMP src); 6805 format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %} 6806 ins_encode %{ 6807 int vector_len = 0; 6808 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6809 %} 6810 ins_pipe( pipe_slow ); 6811 %} 6812 6813 instruct vsub8B(vecD dst, vecD src) %{ 6814 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 6815 match(Set dst (SubVB dst src)); 6816 format %{ "psubb $dst,$src\t! sub packed8B" %} 6817 ins_encode %{ 6818 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 6819 %} 6820 ins_pipe( pipe_slow ); 6821 %} 6822 6823 instruct vsub8B_reg_avx(vecD dst, vecD src1, vecD src2) %{ 6824 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 8); 6825 match(Set dst (SubVB src1 src2)); 6826 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} 6827 ins_encode %{ 6828 int vector_len = 0; 6829 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6830 %} 6831 ins_pipe( pipe_slow ); 6832 %} 6833 6834 instruct vsub8B_reg_evex(vecD dst, vecD src1, vecD src2) %{ 6835 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 6836 match(Set dst (SubVB src1 src2)); 6837 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} 6838 ins_encode %{ 6839 int vector_len = 0; 6840 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6841 %} 6842 ins_pipe( pipe_slow ); 6843 %} 6844 6845 instruct vsub8B_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 6846 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 6847 match(Set dst (SubVB dst src2)); 6848 effect(TEMP src1); 6849 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} 6850 ins_encode %{ 6851 int vector_len = 0; 6852 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6853 %} 6854 ins_pipe( pipe_slow ); 6855 %} 6856 6857 instruct vsub8B_mem_avx(vecD dst, vecD src, memory mem) %{ 6858 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 8); 6859 match(Set dst (SubVB src (LoadVector mem))); 6860 format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %} 6861 ins_encode %{ 6862 int vector_len = 0; 6863 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6864 %} 6865 ins_pipe( pipe_slow ); 6866 %} 6867 6868 instruct vsub8B_mem_evex(vecD dst, vecD src, memory mem) %{ 6869 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 6870 match(Set dst (SubVB src (LoadVector mem))); 6871 format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %} 6872 ins_encode %{ 6873 int vector_len = 0; 6874 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6875 %} 6876 ins_pipe( pipe_slow ); 6877 %} 6878 6879 instruct vsub8B_mem_evex_special(vecD dst, vecD src, memory mem) %{ 6880 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 6881 match(Set dst (SubVB dst (LoadVector mem))); 6882 effect(TEMP src); 6883 format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %} 6884 ins_encode %{ 6885 int vector_len = 0; 6886 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6887 %} 6888 ins_pipe( pipe_slow ); 6889 %} 6890 6891 instruct vsub16B(vecX dst, vecX src) %{ 6892 predicate(UseAVX == 0 && n->as_Vector()->length() == 16); 6893 match(Set dst (SubVB dst src)); 6894 format %{ "psubb $dst,$src\t! sub packed16B" %} 6895 ins_encode %{ 6896 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 6897 %} 6898 ins_pipe( pipe_slow ); 6899 %} 6900 6901 instruct vsub16B_reg_avx(vecX dst, vecX src1, vecX src2) %{ 6902 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 6903 match(Set dst (SubVB src1 src2)); 6904 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} 6905 ins_encode %{ 6906 int vector_len = 0; 6907 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6908 %} 6909 ins_pipe( pipe_slow ); 6910 %} 6911 6912 instruct vsub16B_reg_evex(vecX dst, vecX src1, vecX src2) %{ 6913 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 6914 match(Set dst (SubVB src1 src2)); 6915 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} 6916 ins_encode %{ 6917 int vector_len = 0; 6918 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6919 %} 6920 ins_pipe( pipe_slow ); 6921 %} 6922 6923 instruct vsub16B_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 6924 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 6925 match(Set dst (SubVB dst src2)); 6926 effect(TEMP src1); 6927 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} 6928 ins_encode %{ 6929 int vector_len = 0; 6930 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6931 %} 6932 ins_pipe( pipe_slow ); 6933 %} 6934 6935 instruct vsub16B_mem_avx(vecX dst, vecX src, memory mem) %{ 6936 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 6937 match(Set dst (SubVB src (LoadVector mem))); 6938 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} 6939 ins_encode %{ 6940 int vector_len = 0; 6941 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6942 %} 6943 ins_pipe( pipe_slow ); 6944 %} 6945 6946 instruct vsub16B_mem_evex(vecX dst, vecX src, memory mem) %{ 6947 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 6948 match(Set dst (SubVB src (LoadVector mem))); 6949 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} 6950 ins_encode %{ 6951 int vector_len = 0; 6952 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6953 %} 6954 ins_pipe( pipe_slow ); 6955 %} 6956 6957 instruct vsub16B_mem_evex_special(vecX dst, vecX src, memory mem) %{ 6958 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 6959 match(Set dst (SubVB dst (LoadVector mem))); 6960 effect(TEMP src); 6961 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} 6962 ins_encode %{ 6963 int vector_len = 0; 6964 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6965 %} 6966 ins_pipe( pipe_slow ); 6967 %} 6968 6969 instruct vsub32B_reg_avx(vecY dst, vecY src1, vecY src2) %{ 6970 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32); 6971 match(Set dst (SubVB src1 src2)); 6972 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} 6973 ins_encode %{ 6974 int vector_len = 1; 6975 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6976 %} 6977 ins_pipe( pipe_slow ); 6978 %} 6979 6980 instruct vsub32B_reg_evex(vecY dst, vecY src1, vecY src2) %{ 6981 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6982 match(Set dst (SubVB src1 src2)); 6983 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} 6984 ins_encode %{ 6985 int vector_len = 1; 6986 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6987 %} 6988 ins_pipe( pipe_slow ); 6989 %} 6990 6991 instruct vsub32B_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 6992 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32); 6993 match(Set dst (SubVB dst src2)); 6994 effect(TEMP src1); 6995 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} 6996 ins_encode %{ 6997 int vector_len = 1; 6998 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6999 %} 7000 ins_pipe( pipe_slow ); 7001 %} 7002 7003 instruct vsub32B_mem_avx(vecY dst, vecY src, memory mem) %{ 7004 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32); 7005 match(Set dst (SubVB src (LoadVector mem))); 7006 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} 7007 ins_encode %{ 7008 int vector_len = 1; 7009 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7010 %} 7011 ins_pipe( pipe_slow ); 7012 %} 7013 7014 instruct vsub32B_mem_evex(vecY dst, vecY src, memory mem) %{ 7015 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 7016 match(Set dst (SubVB src (LoadVector mem))); 7017 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} 7018 ins_encode %{ 7019 int vector_len = 1; 7020 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7021 %} 7022 ins_pipe( pipe_slow ); 7023 %} 7024 7025 instruct vsub32B_mem_evex_special(vecY dst, vecY src, memory mem) %{ 7026 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32); 7027 match(Set dst (SubVB dst (LoadVector mem))); 7028 effect(TEMP src); 7029 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} 7030 ins_encode %{ 7031 int vector_len = 1; 7032 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7033 %} 7034 ins_pipe( pipe_slow ); 7035 %} 7036 7037 instruct vsub64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7038 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 7039 match(Set dst (SubVB src1 src2)); 7040 format %{ "vpsubb $dst,$src1,$src2\t! sub packed64B" %} 7041 ins_encode %{ 7042 int vector_len = 2; 7043 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7044 %} 7045 ins_pipe( pipe_slow ); 7046 %} 7047 7048 instruct vsub64B_mem(vecZ dst, vecZ src, memory mem) %{ 7049 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 7050 match(Set dst (SubVB src (LoadVector mem))); 7051 format %{ "vpsubb $dst,$src,$mem\t! sub packed64B" %} 7052 ins_encode %{ 7053 int vector_len = 2; 7054 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7055 %} 7056 ins_pipe( pipe_slow ); 7057 %} 7058 7059 // Shorts/Chars vector sub 7060 instruct vsub2S(vecS dst, vecS src) %{ 7061 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 7062 match(Set dst (SubVS dst src)); 7063 format %{ "psubw $dst,$src\t! sub packed2S" %} 7064 ins_encode %{ 7065 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 7066 %} 7067 ins_pipe( pipe_slow ); 7068 %} 7069 7070 instruct vsub2S_reg_avx(vecS dst, vecS src1, vecS src2) %{ 7071 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 2); 7072 match(Set dst (SubVS src1 src2)); 7073 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} 7074 ins_encode %{ 7075 int vector_len = 0; 7076 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7077 %} 7078 ins_pipe( pipe_slow ); 7079 %} 7080 7081 instruct vsub2S_reg_evex(vecS dst, vecS src1, vecS src2) %{ 7082 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 7083 match(Set dst (SubVS src1 src2)); 7084 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} 7085 ins_encode %{ 7086 int vector_len = 0; 7087 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7088 %} 7089 ins_pipe( pipe_slow ); 7090 %} 7091 7092 instruct vsub2S_reg_evex_special(vecS dst, vecS src1, vecS src2) %{ 7093 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 7094 match(Set dst (SubVS dst src2)); 7095 effect(TEMP src1); 7096 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} 7097 ins_encode %{ 7098 int vector_len = 0; 7099 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7100 %} 7101 ins_pipe( pipe_slow ); 7102 %} 7103 7104 instruct vsub2S_mem_avx(vecS dst, vecS src, memory mem) %{ 7105 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 2); 7106 match(Set dst (SubVS src (LoadVector mem))); 7107 format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %} 7108 ins_encode %{ 7109 int vector_len = 0; 7110 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7111 %} 7112 ins_pipe( pipe_slow ); 7113 %} 7114 7115 instruct vsub2S_mem_evex(vecS dst, vecS src, memory mem) %{ 7116 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 7117 match(Set dst (SubVS src (LoadVector mem))); 7118 format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %} 7119 ins_encode %{ 7120 int vector_len = 0; 7121 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7122 %} 7123 ins_pipe( pipe_slow ); 7124 %} 7125 7126 instruct vsub2S_mem_evex_special(vecS dst, vecS src, memory mem) %{ 7127 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 7128 match(Set dst (SubVS dst (LoadVector mem))); 7129 effect(TEMP src); 7130 format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %} 7131 ins_encode %{ 7132 int vector_len = 0; 7133 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7134 %} 7135 ins_pipe( pipe_slow ); 7136 %} 7137 7138 instruct vsub4S(vecD dst, vecD src) %{ 7139 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 7140 match(Set dst (SubVS dst src)); 7141 format %{ "psubw $dst,$src\t! sub packed4S" %} 7142 ins_encode %{ 7143 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 7144 %} 7145 ins_pipe( pipe_slow ); 7146 %} 7147 7148 instruct vsub4S_reg_avx(vecD dst, vecD src1, vecD src2) %{ 7149 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 4); 7150 match(Set dst (SubVS src1 src2)); 7151 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} 7152 ins_encode %{ 7153 int vector_len = 0; 7154 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7155 %} 7156 ins_pipe( pipe_slow ); 7157 %} 7158 7159 instruct vsub4S_reg_evex(vecD dst, vecD src1, vecD src2) %{ 7160 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 7161 match(Set dst (SubVS src1 src2)); 7162 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} 7163 ins_encode %{ 7164 int vector_len = 0; 7165 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7166 %} 7167 ins_pipe( pipe_slow ); 7168 %} 7169 7170 instruct vsub4S_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 7171 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 7172 match(Set dst (SubVS dst src2)); 7173 effect(TEMP src1); 7174 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} 7175 ins_encode %{ 7176 int vector_len = 0; 7177 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7178 %} 7179 ins_pipe( pipe_slow ); 7180 %} 7181 7182 instruct vsub4S_mem_avx(vecD dst, vecD src, memory mem) %{ 7183 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 4); 7184 match(Set dst (SubVS src (LoadVector mem))); 7185 format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %} 7186 ins_encode %{ 7187 int vector_len = 0; 7188 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7189 %} 7190 ins_pipe( pipe_slow ); 7191 %} 7192 7193 instruct vsub4S_mem_evex(vecD dst, vecD src, memory mem) %{ 7194 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 7195 match(Set dst (SubVS src (LoadVector mem))); 7196 format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %} 7197 ins_encode %{ 7198 int vector_len = 0; 7199 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7200 %} 7201 ins_pipe( pipe_slow ); 7202 %} 7203 7204 instruct vsub4S_mem_evex_special(vecD dst, vecD src, memory mem) %{ 7205 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 7206 match(Set dst (SubVS dst (LoadVector mem))); 7207 effect(TEMP src); 7208 format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %} 7209 ins_encode %{ 7210 int vector_len = 0; 7211 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7212 %} 7213 ins_pipe( pipe_slow ); 7214 %} 7215 7216 instruct vsub8S(vecX dst, vecX src) %{ 7217 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 7218 match(Set dst (SubVS dst src)); 7219 format %{ "psubw $dst,$src\t! sub packed8S" %} 7220 ins_encode %{ 7221 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 7222 %} 7223 ins_pipe( pipe_slow ); 7224 %} 7225 7226 instruct vsub8S_reg_avx(vecX dst, vecX src1, vecX src2) %{ 7227 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 8); 7228 match(Set dst (SubVS src1 src2)); 7229 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} 7230 ins_encode %{ 7231 int vector_len = 0; 7232 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7233 %} 7234 ins_pipe( pipe_slow ); 7235 %} 7236 7237 instruct vsub8S_reg_evex(vecX dst, vecX src1, vecX src2) %{ 7238 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 7239 match(Set dst (SubVS src1 src2)); 7240 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} 7241 ins_encode %{ 7242 int vector_len = 0; 7243 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7244 %} 7245 ins_pipe( pipe_slow ); 7246 %} 7247 7248 instruct vsub8S_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 7249 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 7250 match(Set dst (SubVS dst src2)); 7251 effect(TEMP src1); 7252 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} 7253 ins_encode %{ 7254 int vector_len = 0; 7255 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7256 %} 7257 ins_pipe( pipe_slow ); 7258 %} 7259 7260 instruct vsub8S_mem_avx(vecX dst, vecX src, memory mem) %{ 7261 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 8); 7262 match(Set dst (SubVS src (LoadVector mem))); 7263 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} 7264 ins_encode %{ 7265 int vector_len = 0; 7266 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7267 %} 7268 ins_pipe( pipe_slow ); 7269 %} 7270 7271 instruct vsub8S_mem_evex(vecX dst, vecX src, memory mem) %{ 7272 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 7273 match(Set dst (SubVS src (LoadVector mem))); 7274 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} 7275 ins_encode %{ 7276 int vector_len = 0; 7277 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7278 %} 7279 ins_pipe( pipe_slow ); 7280 %} 7281 7282 instruct vsub8S_mem_evex_special(vecX dst, vecX src, memory mem) %{ 7283 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 7284 match(Set dst (SubVS dst (LoadVector mem))); 7285 effect(TEMP src); 7286 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} 7287 ins_encode %{ 7288 int vector_len = 0; 7289 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7290 %} 7291 ins_pipe( pipe_slow ); 7292 %} 7293 7294 instruct vsub16S_reg_avx(vecY dst, vecY src1, vecY src2) %{ 7295 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 7296 match(Set dst (SubVS src1 src2)); 7297 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} 7298 ins_encode %{ 7299 int vector_len = 1; 7300 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7301 %} 7302 ins_pipe( pipe_slow ); 7303 %} 7304 7305 instruct vsub16S_reg_evex(vecY dst, vecY src1, vecY src2) %{ 7306 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 7307 match(Set dst (SubVS src1 src2)); 7308 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} 7309 ins_encode %{ 7310 int vector_len = 1; 7311 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7312 %} 7313 ins_pipe( pipe_slow ); 7314 %} 7315 7316 instruct vsub16S_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 7317 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 7318 match(Set dst (SubVS dst src2)); 7319 effect(TEMP src1); 7320 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} 7321 ins_encode %{ 7322 int vector_len = 1; 7323 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7324 %} 7325 ins_pipe( pipe_slow ); 7326 %} 7327 7328 instruct vsub16S_mem_avx(vecY dst, vecY src, memory mem) %{ 7329 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 7330 match(Set dst (SubVS src (LoadVector mem))); 7331 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} 7332 ins_encode %{ 7333 int vector_len = 1; 7334 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7335 %} 7336 ins_pipe( pipe_slow ); 7337 %} 7338 7339 instruct vsub16S_mem_evex(vecY dst, vecY src, memory mem) %{ 7340 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 7341 match(Set dst (SubVS src (LoadVector mem))); 7342 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} 7343 ins_encode %{ 7344 int vector_len = 1; 7345 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7346 %} 7347 ins_pipe( pipe_slow ); 7348 %} 7349 7350 instruct vsub16S_mem_evex_special(vecY dst, vecY src, memory mem) %{ 7351 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 7352 match(Set dst (SubVS dst (LoadVector mem))); 7353 effect(TEMP src); 7354 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} 7355 ins_encode %{ 7356 int vector_len = 1; 7357 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7358 %} 7359 ins_pipe( pipe_slow ); 7360 %} 7361 7362 instruct vsub32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7363 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 7364 match(Set dst (SubVS src1 src2)); 7365 format %{ "vpsubw $dst,$src1,$src2\t! sub packed32S" %} 7366 ins_encode %{ 7367 int vector_len = 2; 7368 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7369 %} 7370 ins_pipe( pipe_slow ); 7371 %} 7372 7373 instruct vsub32S_mem(vecZ dst, vecZ src, memory mem) %{ 7374 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 7375 match(Set dst (SubVS src (LoadVector mem))); 7376 format %{ "vpsubw $dst,$src,$mem\t! sub packed32S" %} 7377 ins_encode %{ 7378 int vector_len = 2; 7379 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7380 %} 7381 ins_pipe( pipe_slow ); 7382 %} 7383 7384 // Integers vector sub 7385 instruct vsub2I(vecD dst, vecD src) %{ 7386 predicate(n->as_Vector()->length() == 2); 7387 match(Set dst (SubVI dst src)); 7388 format %{ "psubd $dst,$src\t! sub packed2I" %} 7389 ins_encode %{ 7390 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 7391 %} 7392 ins_pipe( pipe_slow ); 7393 %} 7394 7395 instruct vsub2I_reg(vecD dst, vecD src1, vecD src2) %{ 7396 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7397 match(Set dst (SubVI src1 src2)); 7398 format %{ "vpsubd $dst,$src1,$src2\t! sub packed2I" %} 7399 ins_encode %{ 7400 int vector_len = 0; 7401 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7402 %} 7403 ins_pipe( pipe_slow ); 7404 %} 7405 7406 instruct vsub2I_mem(vecD dst, vecD src, memory mem) %{ 7407 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7408 match(Set dst (SubVI src (LoadVector mem))); 7409 format %{ "vpsubd $dst,$src,$mem\t! sub packed2I" %} 7410 ins_encode %{ 7411 int vector_len = 0; 7412 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7413 %} 7414 ins_pipe( pipe_slow ); 7415 %} 7416 7417 instruct vsub4I(vecX dst, vecX src) %{ 7418 predicate(n->as_Vector()->length() == 4); 7419 match(Set dst (SubVI dst src)); 7420 format %{ "psubd $dst,$src\t! sub packed4I" %} 7421 ins_encode %{ 7422 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 7423 %} 7424 ins_pipe( pipe_slow ); 7425 %} 7426 7427 instruct vsub4I_reg(vecX dst, vecX src1, vecX src2) %{ 7428 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7429 match(Set dst (SubVI src1 src2)); 7430 format %{ "vpsubd $dst,$src1,$src2\t! sub packed4I" %} 7431 ins_encode %{ 7432 int vector_len = 0; 7433 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7434 %} 7435 ins_pipe( pipe_slow ); 7436 %} 7437 7438 instruct vsub4I_mem(vecX dst, vecX src, memory mem) %{ 7439 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7440 match(Set dst (SubVI src (LoadVector mem))); 7441 format %{ "vpsubd $dst,$src,$mem\t! sub packed4I" %} 7442 ins_encode %{ 7443 int vector_len = 0; 7444 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7445 %} 7446 ins_pipe( pipe_slow ); 7447 %} 7448 7449 instruct vsub8I_reg(vecY dst, vecY src1, vecY src2) %{ 7450 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 7451 match(Set dst (SubVI src1 src2)); 7452 format %{ "vpsubd $dst,$src1,$src2\t! sub packed8I" %} 7453 ins_encode %{ 7454 int vector_len = 1; 7455 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7456 %} 7457 ins_pipe( pipe_slow ); 7458 %} 7459 7460 instruct vsub8I_mem(vecY dst, vecY src, memory mem) %{ 7461 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 7462 match(Set dst (SubVI src (LoadVector mem))); 7463 format %{ "vpsubd $dst,$src,$mem\t! sub packed8I" %} 7464 ins_encode %{ 7465 int vector_len = 1; 7466 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7467 %} 7468 ins_pipe( pipe_slow ); 7469 %} 7470 7471 instruct vsub16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7472 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7473 match(Set dst (SubVI src1 src2)); 7474 format %{ "vpsubd $dst,$src1,$src2\t! sub packed16I" %} 7475 ins_encode %{ 7476 int vector_len = 2; 7477 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7478 %} 7479 ins_pipe( pipe_slow ); 7480 %} 7481 7482 instruct vsub16I_mem(vecZ dst, vecZ src, memory mem) %{ 7483 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7484 match(Set dst (SubVI src (LoadVector mem))); 7485 format %{ "vpsubd $dst,$src,$mem\t! sub packed16I" %} 7486 ins_encode %{ 7487 int vector_len = 2; 7488 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7489 %} 7490 ins_pipe( pipe_slow ); 7491 %} 7492 7493 // Longs vector sub 7494 instruct vsub2L(vecX dst, vecX src) %{ 7495 predicate(n->as_Vector()->length() == 2); 7496 match(Set dst (SubVL dst src)); 7497 format %{ "psubq $dst,$src\t! sub packed2L" %} 7498 ins_encode %{ 7499 __ psubq($dst$$XMMRegister, $src$$XMMRegister); 7500 %} 7501 ins_pipe( pipe_slow ); 7502 %} 7503 7504 instruct vsub2L_reg(vecX dst, vecX src1, vecX src2) %{ 7505 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7506 match(Set dst (SubVL src1 src2)); 7507 format %{ "vpsubq $dst,$src1,$src2\t! sub packed2L" %} 7508 ins_encode %{ 7509 int vector_len = 0; 7510 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7511 %} 7512 ins_pipe( pipe_slow ); 7513 %} 7514 7515 instruct vsub2L_mem(vecX dst, vecX src, memory mem) %{ 7516 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7517 match(Set dst (SubVL src (LoadVector mem))); 7518 format %{ "vpsubq $dst,$src,$mem\t! sub packed2L" %} 7519 ins_encode %{ 7520 int vector_len = 0; 7521 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7522 %} 7523 ins_pipe( pipe_slow ); 7524 %} 7525 7526 instruct vsub4L_reg(vecY dst, vecY src1, vecY src2) %{ 7527 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 7528 match(Set dst (SubVL src1 src2)); 7529 format %{ "vpsubq $dst,$src1,$src2\t! sub packed4L" %} 7530 ins_encode %{ 7531 int vector_len = 1; 7532 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7533 %} 7534 ins_pipe( pipe_slow ); 7535 %} 7536 7537 instruct vsub4L_mem(vecY dst, vecY src, memory mem) %{ 7538 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 7539 match(Set dst (SubVL src (LoadVector mem))); 7540 format %{ "vpsubq $dst,$src,$mem\t! sub packed4L" %} 7541 ins_encode %{ 7542 int vector_len = 1; 7543 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7544 %} 7545 ins_pipe( pipe_slow ); 7546 %} 7547 7548 instruct vsub8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7549 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7550 match(Set dst (SubVL src1 src2)); 7551 format %{ "vpsubq $dst,$src1,$src2\t! sub packed8L" %} 7552 ins_encode %{ 7553 int vector_len = 2; 7554 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7555 %} 7556 ins_pipe( pipe_slow ); 7557 %} 7558 7559 instruct vsub8L_mem(vecZ dst, vecZ src, memory mem) %{ 7560 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7561 match(Set dst (SubVL src (LoadVector mem))); 7562 format %{ "vpsubq $dst,$src,$mem\t! sub packed8L" %} 7563 ins_encode %{ 7564 int vector_len = 2; 7565 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7566 %} 7567 ins_pipe( pipe_slow ); 7568 %} 7569 7570 // Floats vector sub 7571 instruct vsub2F(vecD dst, vecD src) %{ 7572 predicate(n->as_Vector()->length() == 2); 7573 match(Set dst (SubVF dst src)); 7574 format %{ "subps $dst,$src\t! sub packed2F" %} 7575 ins_encode %{ 7576 __ subps($dst$$XMMRegister, $src$$XMMRegister); 7577 %} 7578 ins_pipe( pipe_slow ); 7579 %} 7580 7581 instruct vsub2F_reg(vecD dst, vecD src1, vecD src2) %{ 7582 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7583 match(Set dst (SubVF src1 src2)); 7584 format %{ "vsubps $dst,$src1,$src2\t! sub packed2F" %} 7585 ins_encode %{ 7586 int vector_len = 0; 7587 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7588 %} 7589 ins_pipe( pipe_slow ); 7590 %} 7591 7592 instruct vsub2F_mem(vecD dst, vecD src, memory mem) %{ 7593 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7594 match(Set dst (SubVF src (LoadVector mem))); 7595 format %{ "vsubps $dst,$src,$mem\t! sub packed2F" %} 7596 ins_encode %{ 7597 int vector_len = 0; 7598 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7599 %} 7600 ins_pipe( pipe_slow ); 7601 %} 7602 7603 instruct vsub4F(vecX dst, vecX src) %{ 7604 predicate(n->as_Vector()->length() == 4); 7605 match(Set dst (SubVF dst src)); 7606 format %{ "subps $dst,$src\t! sub packed4F" %} 7607 ins_encode %{ 7608 __ subps($dst$$XMMRegister, $src$$XMMRegister); 7609 %} 7610 ins_pipe( pipe_slow ); 7611 %} 7612 7613 instruct vsub4F_reg(vecX dst, vecX src1, vecX src2) %{ 7614 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7615 match(Set dst (SubVF src1 src2)); 7616 format %{ "vsubps $dst,$src1,$src2\t! sub packed4F" %} 7617 ins_encode %{ 7618 int vector_len = 0; 7619 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7620 %} 7621 ins_pipe( pipe_slow ); 7622 %} 7623 7624 instruct vsub4F_mem(vecX dst, vecX src, memory mem) %{ 7625 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7626 match(Set dst (SubVF src (LoadVector mem))); 7627 format %{ "vsubps $dst,$src,$mem\t! sub packed4F" %} 7628 ins_encode %{ 7629 int vector_len = 0; 7630 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7631 %} 7632 ins_pipe( pipe_slow ); 7633 %} 7634 7635 instruct vsub8F_reg(vecY dst, vecY src1, vecY src2) %{ 7636 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7637 match(Set dst (SubVF src1 src2)); 7638 format %{ "vsubps $dst,$src1,$src2\t! sub packed8F" %} 7639 ins_encode %{ 7640 int vector_len = 1; 7641 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7642 %} 7643 ins_pipe( pipe_slow ); 7644 %} 7645 7646 instruct vsub8F_mem(vecY dst, vecY src, memory mem) %{ 7647 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7648 match(Set dst (SubVF src (LoadVector mem))); 7649 format %{ "vsubps $dst,$src,$mem\t! sub packed8F" %} 7650 ins_encode %{ 7651 int vector_len = 1; 7652 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7653 %} 7654 ins_pipe( pipe_slow ); 7655 %} 7656 7657 instruct vsub16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7658 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7659 match(Set dst (SubVF src1 src2)); 7660 format %{ "vsubps $dst,$src1,$src2\t! sub packed16F" %} 7661 ins_encode %{ 7662 int vector_len = 2; 7663 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7664 %} 7665 ins_pipe( pipe_slow ); 7666 %} 7667 7668 instruct vsub16F_mem(vecZ dst, vecZ src, memory mem) %{ 7669 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7670 match(Set dst (SubVF src (LoadVector mem))); 7671 format %{ "vsubps $dst,$src,$mem\t! sub packed16F" %} 7672 ins_encode %{ 7673 int vector_len = 2; 7674 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7675 %} 7676 ins_pipe( pipe_slow ); 7677 %} 7678 7679 // Doubles vector sub 7680 instruct vsub2D(vecX dst, vecX src) %{ 7681 predicate(n->as_Vector()->length() == 2); 7682 match(Set dst (SubVD dst src)); 7683 format %{ "subpd $dst,$src\t! sub packed2D" %} 7684 ins_encode %{ 7685 __ subpd($dst$$XMMRegister, $src$$XMMRegister); 7686 %} 7687 ins_pipe( pipe_slow ); 7688 %} 7689 7690 instruct vsub2D_reg(vecX dst, vecX src1, vecX src2) %{ 7691 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7692 match(Set dst (SubVD src1 src2)); 7693 format %{ "vsubpd $dst,$src1,$src2\t! sub packed2D" %} 7694 ins_encode %{ 7695 int vector_len = 0; 7696 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7697 %} 7698 ins_pipe( pipe_slow ); 7699 %} 7700 7701 instruct vsub2D_mem(vecX dst, vecX src, memory mem) %{ 7702 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7703 match(Set dst (SubVD src (LoadVector mem))); 7704 format %{ "vsubpd $dst,$src,$mem\t! sub packed2D" %} 7705 ins_encode %{ 7706 int vector_len = 0; 7707 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7708 %} 7709 ins_pipe( pipe_slow ); 7710 %} 7711 7712 instruct vsub4D_reg(vecY dst, vecY src1, vecY src2) %{ 7713 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7714 match(Set dst (SubVD src1 src2)); 7715 format %{ "vsubpd $dst,$src1,$src2\t! sub packed4D" %} 7716 ins_encode %{ 7717 int vector_len = 1; 7718 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7719 %} 7720 ins_pipe( pipe_slow ); 7721 %} 7722 7723 instruct vsub4D_mem(vecY dst, vecY src, memory mem) %{ 7724 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7725 match(Set dst (SubVD src (LoadVector mem))); 7726 format %{ "vsubpd $dst,$src,$mem\t! sub packed4D" %} 7727 ins_encode %{ 7728 int vector_len = 1; 7729 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7730 %} 7731 ins_pipe( pipe_slow ); 7732 %} 7733 7734 instruct vsub8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7735 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7736 match(Set dst (SubVD src1 src2)); 7737 format %{ "vsubpd $dst,$src1,$src2\t! sub packed8D" %} 7738 ins_encode %{ 7739 int vector_len = 2; 7740 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7741 %} 7742 ins_pipe( pipe_slow ); 7743 %} 7744 7745 instruct vsub8D_mem(vecZ dst, vecZ src, memory mem) %{ 7746 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7747 match(Set dst (SubVD src (LoadVector mem))); 7748 format %{ "vsubpd $dst,$src,$mem\t! sub packed8D" %} 7749 ins_encode %{ 7750 int vector_len = 2; 7751 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7752 %} 7753 ins_pipe( pipe_slow ); 7754 %} 7755 7756 // --------------------------------- MUL -------------------------------------- 7757 7758 // Shorts/Chars vector mul 7759 instruct vmul2S(vecS dst, vecS src) %{ 7760 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 7761 match(Set dst (MulVS dst src)); 7762 format %{ "pmullw $dst,$src\t! mul packed2S" %} 7763 ins_encode %{ 7764 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 7765 %} 7766 ins_pipe( pipe_slow ); 7767 %} 7768 7769 instruct vmul2S_reg_avx(vecS dst, vecS src1, vecS src2) %{ 7770 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 2); 7771 match(Set dst (MulVS src1 src2)); 7772 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} 7773 ins_encode %{ 7774 int vector_len = 0; 7775 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7776 %} 7777 ins_pipe( pipe_slow ); 7778 %} 7779 7780 instruct vmul2S_reg_evex(vecS dst, vecS src1, vecS src2) %{ 7781 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 7782 match(Set dst (MulVS src1 src2)); 7783 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} 7784 ins_encode %{ 7785 int vector_len = 0; 7786 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7787 %} 7788 ins_pipe( pipe_slow ); 7789 %} 7790 7791 instruct vmul2S_evex_special(vecS dst, vecS src1, vecS src2) %{ 7792 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 7793 match(Set dst (MulVS dst src2)); 7794 effect(TEMP src1); 7795 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} 7796 ins_encode %{ 7797 int vector_len = 0; 7798 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7799 %} 7800 ins_pipe( pipe_slow ); 7801 %} 7802 7803 instruct vmul2S_mem_avx(vecS dst, vecS src, memory mem) %{ 7804 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 2); 7805 match(Set dst (MulVS src (LoadVector mem))); 7806 format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %} 7807 ins_encode %{ 7808 int vector_len = 0; 7809 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7810 %} 7811 ins_pipe( pipe_slow ); 7812 %} 7813 7814 instruct vmul2S_mem_evex(vecS dst, vecS src, memory mem) %{ 7815 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 7816 match(Set dst (MulVS src (LoadVector mem))); 7817 format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %} 7818 ins_encode %{ 7819 int vector_len = 0; 7820 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7821 %} 7822 ins_pipe( pipe_slow ); 7823 %} 7824 7825 instruct vmul2S_mem_evex_special(vecS dst, vecS src, memory mem) %{ 7826 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 7827 match(Set dst (MulVS dst (LoadVector mem))); 7828 effect(TEMP src); 7829 format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %} 7830 ins_encode %{ 7831 int vector_len = 0; 7832 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7833 %} 7834 ins_pipe( pipe_slow ); 7835 %} 7836 7837 instruct vmul4S(vecD dst, vecD src) %{ 7838 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 7839 match(Set dst (MulVS dst src)); 7840 format %{ "pmullw $dst,$src\t! mul packed4S" %} 7841 ins_encode %{ 7842 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 7843 %} 7844 ins_pipe( pipe_slow ); 7845 %} 7846 7847 instruct vmul4S_reg_avx(vecD dst, vecD src1, vecD src2) %{ 7848 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 4); 7849 match(Set dst (MulVS src1 src2)); 7850 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} 7851 ins_encode %{ 7852 int vector_len = 0; 7853 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7854 %} 7855 ins_pipe( pipe_slow ); 7856 %} 7857 7858 instruct vmul4S_reg_evex(vecD dst, vecD src1, vecD src2) %{ 7859 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 7860 match(Set dst (MulVS src1 src2)); 7861 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} 7862 ins_encode %{ 7863 int vector_len = 0; 7864 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7865 %} 7866 ins_pipe( pipe_slow ); 7867 %} 7868 7869 instruct vmul4S_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 7870 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 7871 match(Set dst (MulVS dst src2)); 7872 effect(TEMP src1); 7873 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} 7874 ins_encode %{ 7875 int vector_len = 0; 7876 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7877 %} 7878 ins_pipe( pipe_slow ); 7879 %} 7880 7881 instruct vmul4S_mem_avx(vecD dst, vecD src, memory mem) %{ 7882 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 4); 7883 match(Set dst (MulVS src (LoadVector mem))); 7884 format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %} 7885 ins_encode %{ 7886 int vector_len = 0; 7887 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7888 %} 7889 ins_pipe( pipe_slow ); 7890 %} 7891 7892 instruct vmul4S_mem_evex(vecD dst, vecD src, memory mem) %{ 7893 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 7894 match(Set dst (MulVS src (LoadVector mem))); 7895 format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %} 7896 ins_encode %{ 7897 int vector_len = 0; 7898 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7899 %} 7900 ins_pipe( pipe_slow ); 7901 %} 7902 7903 instruct vmul4S_mem_evex_special(vecD dst, vecD src, memory mem) %{ 7904 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 7905 match(Set dst (MulVS dst (LoadVector mem))); 7906 effect(TEMP src); 7907 format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %} 7908 ins_encode %{ 7909 int vector_len = 0; 7910 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7911 %} 7912 ins_pipe( pipe_slow ); 7913 %} 7914 7915 instruct vmul8S(vecX dst, vecX src) %{ 7916 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 7917 match(Set dst (MulVS dst src)); 7918 format %{ "pmullw $dst,$src\t! mul packed8S" %} 7919 ins_encode %{ 7920 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 7921 %} 7922 ins_pipe( pipe_slow ); 7923 %} 7924 7925 instruct vmul8S_reg_avx(vecX dst, vecX src1, vecX src2) %{ 7926 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 8); 7927 match(Set dst (MulVS src1 src2)); 7928 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} 7929 ins_encode %{ 7930 int vector_len = 0; 7931 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7932 %} 7933 ins_pipe( pipe_slow ); 7934 %} 7935 7936 instruct vmul8S_reg_evex(vecX dst, vecX src1, vecX src2) %{ 7937 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 7938 match(Set dst (MulVS src1 src2)); 7939 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} 7940 ins_encode %{ 7941 int vector_len = 0; 7942 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7943 %} 7944 ins_pipe( pipe_slow ); 7945 %} 7946 7947 instruct vmul8S_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 7948 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 7949 match(Set dst (MulVS dst src2)); 7950 effect(TEMP src1); 7951 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} 7952 ins_encode %{ 7953 int vector_len = 0; 7954 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7955 %} 7956 ins_pipe( pipe_slow ); 7957 %} 7958 7959 instruct vmul8S_mem_avx(vecX dst, vecX src, memory mem) %{ 7960 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 8); 7961 match(Set dst (MulVS src (LoadVector mem))); 7962 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} 7963 ins_encode %{ 7964 int vector_len = 0; 7965 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7966 %} 7967 ins_pipe( pipe_slow ); 7968 %} 7969 7970 instruct vmul8S_mem_evex(vecX dst, vecX src, memory mem) %{ 7971 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 7972 match(Set dst (MulVS src (LoadVector mem))); 7973 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} 7974 ins_encode %{ 7975 int vector_len = 0; 7976 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7977 %} 7978 ins_pipe( pipe_slow ); 7979 %} 7980 7981 instruct vmul8S_mem_evex_special(vecX dst, vecX src, memory mem) %{ 7982 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 7983 match(Set dst (MulVS dst (LoadVector mem))); 7984 effect(TEMP src); 7985 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} 7986 ins_encode %{ 7987 int vector_len = 0; 7988 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7989 %} 7990 ins_pipe( pipe_slow ); 7991 %} 7992 7993 instruct vmul16S_reg_avx(vecY dst, vecY src1, vecY src2) %{ 7994 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 7995 match(Set dst (MulVS src1 src2)); 7996 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} 7997 ins_encode %{ 7998 int vector_len = 1; 7999 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8000 %} 8001 ins_pipe( pipe_slow ); 8002 %} 8003 8004 instruct vmul16S_reg_evex(vecY dst, vecY src1, vecY src2) %{ 8005 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 8006 match(Set dst (MulVS src1 src2)); 8007 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} 8008 ins_encode %{ 8009 int vector_len = 1; 8010 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8011 %} 8012 ins_pipe( pipe_slow ); 8013 %} 8014 8015 instruct vmul16S_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 8016 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 8017 match(Set dst (MulVS dst src2)); 8018 effect(TEMP src1); 8019 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} 8020 ins_encode %{ 8021 int vector_len = 1; 8022 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8023 %} 8024 ins_pipe( pipe_slow ); 8025 %} 8026 8027 instruct vmul16S_mem_avx(vecY dst, vecY src, memory mem) %{ 8028 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 8029 match(Set dst (MulVS src (LoadVector mem))); 8030 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} 8031 ins_encode %{ 8032 int vector_len = 1; 8033 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8034 %} 8035 ins_pipe( pipe_slow ); 8036 %} 8037 8038 instruct vmul16S_mem_evex(vecY dst, vecY src, memory mem) %{ 8039 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 8040 match(Set dst (MulVS src (LoadVector mem))); 8041 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} 8042 ins_encode %{ 8043 int vector_len = 1; 8044 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8045 %} 8046 ins_pipe( pipe_slow ); 8047 %} 8048 8049 instruct vmul16S_mem_evex_special(vecY dst, vecY src, memory mem) %{ 8050 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 8051 match(Set dst (MulVS dst (LoadVector mem))); 8052 effect(TEMP src); 8053 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} 8054 ins_encode %{ 8055 int vector_len = 1; 8056 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8057 %} 8058 ins_pipe( pipe_slow ); 8059 %} 8060 8061 instruct vmul32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8062 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 8063 match(Set dst (MulVS src1 src2)); 8064 format %{ "vpmullw $dst,$src1,$src2\t! mul packed32S" %} 8065 ins_encode %{ 8066 int vector_len = 2; 8067 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8068 %} 8069 ins_pipe( pipe_slow ); 8070 %} 8071 8072 instruct vmul32S_mem(vecZ dst, vecZ src, memory mem) %{ 8073 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 8074 match(Set dst (MulVS src (LoadVector mem))); 8075 format %{ "vpmullw $dst,$src,$mem\t! mul packed32S" %} 8076 ins_encode %{ 8077 int vector_len = 2; 8078 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8079 %} 8080 ins_pipe( pipe_slow ); 8081 %} 8082 8083 // Integers vector mul (sse4_1) 8084 instruct vmul2I(vecD dst, vecD src) %{ 8085 predicate(UseSSE > 3 && n->as_Vector()->length() == 2); 8086 match(Set dst (MulVI dst src)); 8087 format %{ "pmulld $dst,$src\t! mul packed2I" %} 8088 ins_encode %{ 8089 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 8090 %} 8091 ins_pipe( pipe_slow ); 8092 %} 8093 8094 instruct vmul2I_reg(vecD dst, vecD src1, vecD src2) %{ 8095 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8096 match(Set dst (MulVI src1 src2)); 8097 format %{ "vpmulld $dst,$src1,$src2\t! mul packed2I" %} 8098 ins_encode %{ 8099 int vector_len = 0; 8100 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8101 %} 8102 ins_pipe( pipe_slow ); 8103 %} 8104 8105 instruct vmul2I_mem(vecD dst, vecD src, memory mem) %{ 8106 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8107 match(Set dst (MulVI src (LoadVector mem))); 8108 format %{ "vpmulld $dst,$src,$mem\t! mul packed2I" %} 8109 ins_encode %{ 8110 int vector_len = 0; 8111 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8112 %} 8113 ins_pipe( pipe_slow ); 8114 %} 8115 8116 instruct vmul4I(vecX dst, vecX src) %{ 8117 predicate(UseSSE > 3 && n->as_Vector()->length() == 4); 8118 match(Set dst (MulVI dst src)); 8119 format %{ "pmulld $dst,$src\t! mul packed4I" %} 8120 ins_encode %{ 8121 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 8122 %} 8123 ins_pipe( pipe_slow ); 8124 %} 8125 8126 instruct vmul4I_reg(vecX dst, vecX src1, vecX src2) %{ 8127 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8128 match(Set dst (MulVI src1 src2)); 8129 format %{ "vpmulld $dst,$src1,$src2\t! mul packed4I" %} 8130 ins_encode %{ 8131 int vector_len = 0; 8132 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8133 %} 8134 ins_pipe( pipe_slow ); 8135 %} 8136 8137 instruct vmul4I_mem(vecX dst, vecX src, memory mem) %{ 8138 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8139 match(Set dst (MulVI src (LoadVector mem))); 8140 format %{ "vpmulld $dst,$src,$mem\t! mul packed4I" %} 8141 ins_encode %{ 8142 int vector_len = 0; 8143 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8144 %} 8145 ins_pipe( pipe_slow ); 8146 %} 8147 8148 instruct vmul2L_reg(vecX dst, vecX src1, vecX src2) %{ 8149 predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && VM_Version::supports_avx512dq()); 8150 match(Set dst (MulVL src1 src2)); 8151 format %{ "vpmullq $dst,$src1,$src2\t! mul packed2L" %} 8152 ins_encode %{ 8153 int vector_len = 0; 8154 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8155 %} 8156 ins_pipe( pipe_slow ); 8157 %} 8158 8159 instruct vmul2L_mem(vecX dst, vecX src, memory mem) %{ 8160 predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && VM_Version::supports_avx512dq()); 8161 match(Set dst (MulVL src (LoadVector mem))); 8162 format %{ "vpmullq $dst,$src,$mem\t! mul packed2L" %} 8163 ins_encode %{ 8164 int vector_len = 0; 8165 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8166 %} 8167 ins_pipe( pipe_slow ); 8168 %} 8169 8170 instruct vmul4L_reg(vecY dst, vecY src1, vecY src2) %{ 8171 predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && VM_Version::supports_avx512dq()); 8172 match(Set dst (MulVL src1 src2)); 8173 format %{ "vpmullq $dst,$src1,$src2\t! mul packed4L" %} 8174 ins_encode %{ 8175 int vector_len = 1; 8176 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8177 %} 8178 ins_pipe( pipe_slow ); 8179 %} 8180 8181 instruct vmul4L_mem(vecY dst, vecY src, memory mem) %{ 8182 predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && VM_Version::supports_avx512dq()); 8183 match(Set dst (MulVL src (LoadVector mem))); 8184 format %{ "vpmullq $dst,$src,$mem\t! mul packed4L" %} 8185 ins_encode %{ 8186 int vector_len = 1; 8187 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8188 %} 8189 ins_pipe( pipe_slow ); 8190 %} 8191 8192 instruct vmul8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8193 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq()); 8194 match(Set dst (MulVL src1 src2)); 8195 format %{ "vpmullq $dst,$src1,$src2\t! mul packed8L" %} 8196 ins_encode %{ 8197 int vector_len = 2; 8198 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8199 %} 8200 ins_pipe( pipe_slow ); 8201 %} 8202 8203 instruct vmul8L_mem(vecZ dst, vecZ src, memory mem) %{ 8204 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq()); 8205 match(Set dst (MulVL src (LoadVector mem))); 8206 format %{ "vpmullq $dst,$src,$mem\t! mul packed8L" %} 8207 ins_encode %{ 8208 int vector_len = 2; 8209 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8210 %} 8211 ins_pipe( pipe_slow ); 8212 %} 8213 8214 instruct vmul8I_reg(vecY dst, vecY src1, vecY src2) %{ 8215 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 8216 match(Set dst (MulVI src1 src2)); 8217 format %{ "vpmulld $dst,$src1,$src2\t! mul packed8I" %} 8218 ins_encode %{ 8219 int vector_len = 1; 8220 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8221 %} 8222 ins_pipe( pipe_slow ); 8223 %} 8224 8225 instruct vmul8I_mem(vecY dst, vecY src, memory mem) %{ 8226 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 8227 match(Set dst (MulVI src (LoadVector mem))); 8228 format %{ "vpmulld $dst,$src,$mem\t! mul packed8I" %} 8229 ins_encode %{ 8230 int vector_len = 1; 8231 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8232 %} 8233 ins_pipe( pipe_slow ); 8234 %} 8235 8236 instruct vmul16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8237 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8238 match(Set dst (MulVI src1 src2)); 8239 format %{ "vpmulld $dst,$src1,$src2\t! mul packed16I" %} 8240 ins_encode %{ 8241 int vector_len = 2; 8242 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8243 %} 8244 ins_pipe( pipe_slow ); 8245 %} 8246 8247 instruct vmul16I_mem(vecZ dst, vecZ src, memory mem) %{ 8248 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8249 match(Set dst (MulVI src (LoadVector mem))); 8250 format %{ "vpmulld $dst,$src,$mem\t! mul packed16I" %} 8251 ins_encode %{ 8252 int vector_len = 2; 8253 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8254 %} 8255 ins_pipe( pipe_slow ); 8256 %} 8257 8258 // Floats vector mul 8259 instruct vmul2F(vecD dst, vecD src) %{ 8260 predicate(n->as_Vector()->length() == 2); 8261 match(Set dst (MulVF dst src)); 8262 format %{ "mulps $dst,$src\t! mul packed2F" %} 8263 ins_encode %{ 8264 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 8265 %} 8266 ins_pipe( pipe_slow ); 8267 %} 8268 8269 instruct vmul2F_reg(vecD dst, vecD src1, vecD src2) %{ 8270 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8271 match(Set dst (MulVF src1 src2)); 8272 format %{ "vmulps $dst,$src1,$src2\t! mul packed2F" %} 8273 ins_encode %{ 8274 int vector_len = 0; 8275 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8276 %} 8277 ins_pipe( pipe_slow ); 8278 %} 8279 8280 instruct vmul2F_mem(vecD dst, vecD src, memory mem) %{ 8281 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8282 match(Set dst (MulVF src (LoadVector mem))); 8283 format %{ "vmulps $dst,$src,$mem\t! mul packed2F" %} 8284 ins_encode %{ 8285 int vector_len = 0; 8286 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8287 %} 8288 ins_pipe( pipe_slow ); 8289 %} 8290 8291 instruct vmul4F(vecX dst, vecX src) %{ 8292 predicate(n->as_Vector()->length() == 4); 8293 match(Set dst (MulVF dst src)); 8294 format %{ "mulps $dst,$src\t! mul packed4F" %} 8295 ins_encode %{ 8296 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 8297 %} 8298 ins_pipe( pipe_slow ); 8299 %} 8300 8301 instruct vmul4F_reg(vecX dst, vecX src1, vecX src2) %{ 8302 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8303 match(Set dst (MulVF src1 src2)); 8304 format %{ "vmulps $dst,$src1,$src2\t! mul packed4F" %} 8305 ins_encode %{ 8306 int vector_len = 0; 8307 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8308 %} 8309 ins_pipe( pipe_slow ); 8310 %} 8311 8312 instruct vmul4F_mem(vecX dst, vecX src, memory mem) %{ 8313 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8314 match(Set dst (MulVF src (LoadVector mem))); 8315 format %{ "vmulps $dst,$src,$mem\t! mul packed4F" %} 8316 ins_encode %{ 8317 int vector_len = 0; 8318 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8319 %} 8320 ins_pipe( pipe_slow ); 8321 %} 8322 8323 instruct vmul8F_reg(vecY dst, vecY src1, vecY src2) %{ 8324 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8325 match(Set dst (MulVF src1 src2)); 8326 format %{ "vmulps $dst,$src1,$src2\t! mul packed8F" %} 8327 ins_encode %{ 8328 int vector_len = 1; 8329 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8330 %} 8331 ins_pipe( pipe_slow ); 8332 %} 8333 8334 instruct vmul8F_mem(vecY dst, vecY src, memory mem) %{ 8335 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8336 match(Set dst (MulVF src (LoadVector mem))); 8337 format %{ "vmulps $dst,$src,$mem\t! mul packed8F" %} 8338 ins_encode %{ 8339 int vector_len = 1; 8340 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8341 %} 8342 ins_pipe( pipe_slow ); 8343 %} 8344 8345 instruct vmul16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8346 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8347 match(Set dst (MulVF src1 src2)); 8348 format %{ "vmulps $dst,$src1,$src2\t! mul packed16F" %} 8349 ins_encode %{ 8350 int vector_len = 2; 8351 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8352 %} 8353 ins_pipe( pipe_slow ); 8354 %} 8355 8356 instruct vmul16F_mem(vecZ dst, vecZ src, memory mem) %{ 8357 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8358 match(Set dst (MulVF src (LoadVector mem))); 8359 format %{ "vmulps $dst,$src,$mem\t! mul packed16F" %} 8360 ins_encode %{ 8361 int vector_len = 2; 8362 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8363 %} 8364 ins_pipe( pipe_slow ); 8365 %} 8366 8367 // Doubles vector mul 8368 instruct vmul2D(vecX dst, vecX src) %{ 8369 predicate(n->as_Vector()->length() == 2); 8370 match(Set dst (MulVD dst src)); 8371 format %{ "mulpd $dst,$src\t! mul packed2D" %} 8372 ins_encode %{ 8373 __ mulpd($dst$$XMMRegister, $src$$XMMRegister); 8374 %} 8375 ins_pipe( pipe_slow ); 8376 %} 8377 8378 instruct vmul2D_reg(vecX dst, vecX src1, vecX src2) %{ 8379 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8380 match(Set dst (MulVD src1 src2)); 8381 format %{ "vmulpd $dst,$src1,$src2\t! mul packed2D" %} 8382 ins_encode %{ 8383 int vector_len = 0; 8384 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8385 %} 8386 ins_pipe( pipe_slow ); 8387 %} 8388 8389 instruct vmul2D_mem(vecX dst, vecX src, memory mem) %{ 8390 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8391 match(Set dst (MulVD src (LoadVector mem))); 8392 format %{ "vmulpd $dst,$src,$mem\t! mul packed2D" %} 8393 ins_encode %{ 8394 int vector_len = 0; 8395 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8396 %} 8397 ins_pipe( pipe_slow ); 8398 %} 8399 8400 instruct vmul4D_reg(vecY dst, vecY src1, vecY src2) %{ 8401 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8402 match(Set dst (MulVD src1 src2)); 8403 format %{ "vmulpd $dst,$src1,$src2\t! mul packed4D" %} 8404 ins_encode %{ 8405 int vector_len = 1; 8406 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8407 %} 8408 ins_pipe( pipe_slow ); 8409 %} 8410 8411 instruct vmul4D_mem(vecY dst, vecY src, memory mem) %{ 8412 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8413 match(Set dst (MulVD src (LoadVector mem))); 8414 format %{ "vmulpd $dst,$src,$mem\t! mul packed4D" %} 8415 ins_encode %{ 8416 int vector_len = 1; 8417 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8418 %} 8419 ins_pipe( pipe_slow ); 8420 %} 8421 8422 instruct vmul8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8423 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8424 match(Set dst (MulVD src1 src2)); 8425 format %{ "vmulpd $dst k0,$src1,$src2\t! mul packed8D" %} 8426 ins_encode %{ 8427 int vector_len = 2; 8428 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8429 %} 8430 ins_pipe( pipe_slow ); 8431 %} 8432 8433 instruct vmul8D_mem(vecZ dst, vecZ src, memory mem) %{ 8434 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8435 match(Set dst (MulVD src (LoadVector mem))); 8436 format %{ "vmulpd $dst k0,$src,$mem\t! mul packed8D" %} 8437 ins_encode %{ 8438 int vector_len = 2; 8439 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8440 %} 8441 ins_pipe( pipe_slow ); 8442 %} 8443 8444 instruct vcmov4D_reg(vecY dst, vecY src1, vecY src2, immI8 cop, cmpOp_vcmppd copnd) %{ 8445 predicate(UseAVX > 0 && UseAVX < 3 && n->as_Vector()->length() == 4); 8446 match(Set dst (CMoveVD (Binary copnd cop) (Binary src1 src2))); 8447 effect(TEMP dst, USE src1, USE src2); 8448 format %{ "cmppd.$copnd $dst, $src1, $src2 ! vcmovevd, cond=$cop\n\t" 8449 "vpblendd $dst,$src1,$src2,$dst ! vcmovevd\n\t" 8450 %} 8451 ins_encode %{ 8452 int vector_len = 1; 8453 int cond = (Assembler::Condition)($copnd$$cmpcode); 8454 __ cmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cond, vector_len); 8455 __ vpblendd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vector_len); 8456 %} 8457 ins_pipe( pipe_slow ); 8458 %} 8459 8460 // --------------------------------- DIV -------------------------------------- 8461 8462 // Floats vector div 8463 instruct vdiv2F(vecD dst, vecD src) %{ 8464 predicate(n->as_Vector()->length() == 2); 8465 match(Set dst (DivVF dst src)); 8466 format %{ "divps $dst,$src\t! div packed2F" %} 8467 ins_encode %{ 8468 __ divps($dst$$XMMRegister, $src$$XMMRegister); 8469 %} 8470 ins_pipe( pipe_slow ); 8471 %} 8472 8473 instruct vdiv2F_reg(vecD dst, vecD src1, vecD src2) %{ 8474 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8475 match(Set dst (DivVF src1 src2)); 8476 format %{ "vdivps $dst,$src1,$src2\t! div packed2F" %} 8477 ins_encode %{ 8478 int vector_len = 0; 8479 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8480 %} 8481 ins_pipe( pipe_slow ); 8482 %} 8483 8484 instruct vdiv2F_mem(vecD dst, vecD src, memory mem) %{ 8485 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8486 match(Set dst (DivVF src (LoadVector mem))); 8487 format %{ "vdivps $dst,$src,$mem\t! div packed2F" %} 8488 ins_encode %{ 8489 int vector_len = 0; 8490 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8491 %} 8492 ins_pipe( pipe_slow ); 8493 %} 8494 8495 instruct vdiv4F(vecX dst, vecX src) %{ 8496 predicate(n->as_Vector()->length() == 4); 8497 match(Set dst (DivVF dst src)); 8498 format %{ "divps $dst,$src\t! div packed4F" %} 8499 ins_encode %{ 8500 __ divps($dst$$XMMRegister, $src$$XMMRegister); 8501 %} 8502 ins_pipe( pipe_slow ); 8503 %} 8504 8505 instruct vdiv4F_reg(vecX dst, vecX src1, vecX src2) %{ 8506 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8507 match(Set dst (DivVF src1 src2)); 8508 format %{ "vdivps $dst,$src1,$src2\t! div packed4F" %} 8509 ins_encode %{ 8510 int vector_len = 0; 8511 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8512 %} 8513 ins_pipe( pipe_slow ); 8514 %} 8515 8516 instruct vdiv4F_mem(vecX dst, vecX src, memory mem) %{ 8517 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8518 match(Set dst (DivVF src (LoadVector mem))); 8519 format %{ "vdivps $dst,$src,$mem\t! div packed4F" %} 8520 ins_encode %{ 8521 int vector_len = 0; 8522 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8523 %} 8524 ins_pipe( pipe_slow ); 8525 %} 8526 8527 instruct vdiv8F_reg(vecY dst, vecY src1, vecY src2) %{ 8528 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8529 match(Set dst (DivVF src1 src2)); 8530 format %{ "vdivps $dst,$src1,$src2\t! div packed8F" %} 8531 ins_encode %{ 8532 int vector_len = 1; 8533 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8534 %} 8535 ins_pipe( pipe_slow ); 8536 %} 8537 8538 instruct vdiv8F_mem(vecY dst, vecY src, memory mem) %{ 8539 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8540 match(Set dst (DivVF src (LoadVector mem))); 8541 format %{ "vdivps $dst,$src,$mem\t! div packed8F" %} 8542 ins_encode %{ 8543 int vector_len = 1; 8544 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8545 %} 8546 ins_pipe( pipe_slow ); 8547 %} 8548 8549 instruct vdiv16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8550 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 8551 match(Set dst (DivVF src1 src2)); 8552 format %{ "vdivps $dst,$src1,$src2\t! div packed16F" %} 8553 ins_encode %{ 8554 int vector_len = 2; 8555 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8556 %} 8557 ins_pipe( pipe_slow ); 8558 %} 8559 8560 instruct vdiv16F_mem(vecZ dst, vecZ src, memory mem) %{ 8561 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 8562 match(Set dst (DivVF src (LoadVector mem))); 8563 format %{ "vdivps $dst,$src,$mem\t! div packed16F" %} 8564 ins_encode %{ 8565 int vector_len = 2; 8566 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8567 %} 8568 ins_pipe( pipe_slow ); 8569 %} 8570 8571 // Doubles vector div 8572 instruct vdiv2D(vecX dst, vecX src) %{ 8573 predicate(n->as_Vector()->length() == 2); 8574 match(Set dst (DivVD dst src)); 8575 format %{ "divpd $dst,$src\t! div packed2D" %} 8576 ins_encode %{ 8577 __ divpd($dst$$XMMRegister, $src$$XMMRegister); 8578 %} 8579 ins_pipe( pipe_slow ); 8580 %} 8581 8582 instruct vdiv2D_reg(vecX dst, vecX src1, vecX src2) %{ 8583 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8584 match(Set dst (DivVD src1 src2)); 8585 format %{ "vdivpd $dst,$src1,$src2\t! div packed2D" %} 8586 ins_encode %{ 8587 int vector_len = 0; 8588 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8589 %} 8590 ins_pipe( pipe_slow ); 8591 %} 8592 8593 instruct vdiv2D_mem(vecX dst, vecX src, memory mem) %{ 8594 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8595 match(Set dst (DivVD src (LoadVector mem))); 8596 format %{ "vdivpd $dst,$src,$mem\t! div packed2D" %} 8597 ins_encode %{ 8598 int vector_len = 0; 8599 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8600 %} 8601 ins_pipe( pipe_slow ); 8602 %} 8603 8604 instruct vdiv4D_reg(vecY dst, vecY src1, vecY src2) %{ 8605 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8606 match(Set dst (DivVD src1 src2)); 8607 format %{ "vdivpd $dst,$src1,$src2\t! div packed4D" %} 8608 ins_encode %{ 8609 int vector_len = 1; 8610 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8611 %} 8612 ins_pipe( pipe_slow ); 8613 %} 8614 8615 instruct vdiv4D_mem(vecY dst, vecY src, memory mem) %{ 8616 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8617 match(Set dst (DivVD src (LoadVector mem))); 8618 format %{ "vdivpd $dst,$src,$mem\t! div packed4D" %} 8619 ins_encode %{ 8620 int vector_len = 1; 8621 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8622 %} 8623 ins_pipe( pipe_slow ); 8624 %} 8625 8626 instruct vdiv8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8627 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8628 match(Set dst (DivVD src1 src2)); 8629 format %{ "vdivpd $dst,$src1,$src2\t! div packed8D" %} 8630 ins_encode %{ 8631 int vector_len = 2; 8632 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8633 %} 8634 ins_pipe( pipe_slow ); 8635 %} 8636 8637 instruct vdiv8D_mem(vecZ dst, vecZ src, memory mem) %{ 8638 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8639 match(Set dst (DivVD src (LoadVector mem))); 8640 format %{ "vdivpd $dst,$src,$mem\t! div packed8D" %} 8641 ins_encode %{ 8642 int vector_len = 2; 8643 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8644 %} 8645 ins_pipe( pipe_slow ); 8646 %} 8647 8648 // ------------------------------ Shift --------------------------------------- 8649 8650 // Left and right shift count vectors are the same on x86 8651 // (only lowest bits of xmm reg are used for count). 8652 instruct vshiftcnt(vecS dst, rRegI cnt) %{ 8653 match(Set dst (LShiftCntV cnt)); 8654 match(Set dst (RShiftCntV cnt)); 8655 format %{ "movd $dst,$cnt\t! load shift count" %} 8656 ins_encode %{ 8657 __ movdl($dst$$XMMRegister, $cnt$$Register); 8658 %} 8659 ins_pipe( pipe_slow ); 8660 %} 8661 8662 // --------------------------------- Sqrt -------------------------------------- 8663 8664 // Floating point vector sqrt - double precision only 8665 instruct vsqrt2D_reg(vecX dst, vecX src) %{ 8666 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8667 match(Set dst (SqrtVD src)); 8668 format %{ "vsqrtpd $dst,$src\t! sqrt packed2D" %} 8669 ins_encode %{ 8670 int vector_len = 0; 8671 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8672 %} 8673 ins_pipe( pipe_slow ); 8674 %} 8675 8676 instruct vsqrt2D_mem(vecX dst, memory mem) %{ 8677 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8678 match(Set dst (SqrtVD (LoadVector mem))); 8679 format %{ "vsqrtpd $dst,$mem\t! sqrt packed2D" %} 8680 ins_encode %{ 8681 int vector_len = 0; 8682 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); 8683 %} 8684 ins_pipe( pipe_slow ); 8685 %} 8686 8687 instruct vsqrt4D_reg(vecY dst, vecY src) %{ 8688 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8689 match(Set dst (SqrtVD src)); 8690 format %{ "vsqrtpd $dst,$src\t! sqrt packed4D" %} 8691 ins_encode %{ 8692 int vector_len = 1; 8693 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8694 %} 8695 ins_pipe( pipe_slow ); 8696 %} 8697 8698 instruct vsqrt4D_mem(vecY dst, memory mem) %{ 8699 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8700 match(Set dst (SqrtVD (LoadVector mem))); 8701 format %{ "vsqrtpd $dst,$mem\t! sqrt packed4D" %} 8702 ins_encode %{ 8703 int vector_len = 1; 8704 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); 8705 %} 8706 ins_pipe( pipe_slow ); 8707 %} 8708 8709 instruct vsqrt8D_reg(vecZ dst, vecZ src) %{ 8710 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8711 match(Set dst (SqrtVD src)); 8712 format %{ "vsqrtpd $dst,$src\t! sqrt packed8D" %} 8713 ins_encode %{ 8714 int vector_len = 2; 8715 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8716 %} 8717 ins_pipe( pipe_slow ); 8718 %} 8719 8720 instruct vsqrt8D_mem(vecZ dst, memory mem) %{ 8721 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8722 match(Set dst (SqrtVD (LoadVector mem))); 8723 format %{ "vsqrtpd $dst,$mem\t! sqrt packed8D" %} 8724 ins_encode %{ 8725 int vector_len = 2; 8726 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); 8727 %} 8728 ins_pipe( pipe_slow ); 8729 %} 8730 8731 // ------------------------------ LeftShift ----------------------------------- 8732 8733 // Shorts/Chars vector left shift 8734 instruct vsll2S(vecS dst, vecS shift) %{ 8735 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8736 match(Set dst (LShiftVS dst shift)); 8737 format %{ "psllw $dst,$shift\t! left shift packed2S" %} 8738 ins_encode %{ 8739 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 8740 %} 8741 ins_pipe( pipe_slow ); 8742 %} 8743 8744 instruct vsll2S_imm(vecS dst, immI8 shift) %{ 8745 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8746 match(Set dst (LShiftVS dst shift)); 8747 format %{ "psllw $dst,$shift\t! left shift packed2S" %} 8748 ins_encode %{ 8749 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 8750 %} 8751 ins_pipe( pipe_slow ); 8752 %} 8753 8754 instruct vsll2S_reg_avx(vecS dst, vecS src, vecS shift) %{ 8755 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 2); 8756 match(Set dst (LShiftVS src shift)); 8757 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 8758 ins_encode %{ 8759 int vector_len = 0; 8760 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8761 %} 8762 ins_pipe( pipe_slow ); 8763 %} 8764 8765 instruct vsll2S_reg_evex(vecS dst, vecS src, vecS shift) %{ 8766 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 8767 match(Set dst (LShiftVS src shift)); 8768 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 8769 ins_encode %{ 8770 int vector_len = 0; 8771 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8772 %} 8773 ins_pipe( pipe_slow ); 8774 %} 8775 8776 instruct vsll2S_reg_evex_special(vecS dst, vecS src, vecS shift) %{ 8777 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 8778 match(Set dst (LShiftVS dst shift)); 8779 effect(TEMP src); 8780 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 8781 ins_encode %{ 8782 int vector_len = 0; 8783 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8784 %} 8785 ins_pipe( pipe_slow ); 8786 %} 8787 8788 instruct vsll2S_reg_imm_avx(vecS dst, vecS src, immI8 shift) %{ 8789 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 2); 8790 match(Set dst (LShiftVS src shift)); 8791 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 8792 ins_encode %{ 8793 int vector_len = 0; 8794 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8795 %} 8796 ins_pipe( pipe_slow ); 8797 %} 8798 8799 instruct vsll2S_reg_imm_evex(vecS dst, vecS src, immI8 shift) %{ 8800 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 8801 match(Set dst (LShiftVS src shift)); 8802 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 8803 ins_encode %{ 8804 int vector_len = 0; 8805 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8806 %} 8807 ins_pipe( pipe_slow ); 8808 %} 8809 8810 instruct vsll2S_reg_imm_evex_special(vecS dst, vecS src, immI8 shift) %{ 8811 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 8812 match(Set dst (LShiftVS dst shift)); 8813 effect(TEMP src); 8814 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 8815 ins_encode %{ 8816 int vector_len = 0; 8817 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8818 %} 8819 ins_pipe( pipe_slow ); 8820 %} 8821 8822 instruct vsll4S(vecD dst, vecS shift) %{ 8823 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 8824 match(Set dst (LShiftVS dst shift)); 8825 format %{ "psllw $dst,$shift\t! left shift packed4S" %} 8826 ins_encode %{ 8827 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 8828 %} 8829 ins_pipe( pipe_slow ); 8830 %} 8831 8832 instruct vsll4S_imm(vecD dst, immI8 shift) %{ 8833 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 8834 match(Set dst (LShiftVS dst shift)); 8835 format %{ "psllw $dst,$shift\t! left shift packed4S" %} 8836 ins_encode %{ 8837 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 8838 %} 8839 ins_pipe( pipe_slow ); 8840 %} 8841 8842 instruct vsll4S_reg_avx(vecD dst, vecD src, vecS shift) %{ 8843 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 4); 8844 match(Set dst (LShiftVS src shift)); 8845 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 8846 ins_encode %{ 8847 int vector_len = 0; 8848 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8849 %} 8850 ins_pipe( pipe_slow ); 8851 %} 8852 8853 instruct vsll4S_reg_evex(vecD dst, vecD src, vecS shift) %{ 8854 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 8855 match(Set dst (LShiftVS src shift)); 8856 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 8857 ins_encode %{ 8858 int vector_len = 0; 8859 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8860 %} 8861 ins_pipe( pipe_slow ); 8862 %} 8863 8864 instruct vsll4S_reg_evex_special(vecD dst, vecD src, vecS shift) %{ 8865 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 8866 match(Set dst (LShiftVS dst shift)); 8867 effect(TEMP src); 8868 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 8869 ins_encode %{ 8870 int vector_len = 0; 8871 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8872 %} 8873 ins_pipe( pipe_slow ); 8874 %} 8875 8876 instruct vsll4S_reg_imm_avx(vecD dst, vecD src, immI8 shift) %{ 8877 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 4); 8878 match(Set dst (LShiftVS src shift)); 8879 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 8880 ins_encode %{ 8881 int vector_len = 0; 8882 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8883 %} 8884 ins_pipe( pipe_slow ); 8885 %} 8886 8887 instruct vsll4S_reg_imm_evex(vecD dst, vecD src, immI8 shift) %{ 8888 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 8889 match(Set dst (LShiftVS src shift)); 8890 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 8891 ins_encode %{ 8892 int vector_len = 0; 8893 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8894 %} 8895 ins_pipe( pipe_slow ); 8896 %} 8897 8898 instruct vsll4S_reg_imm_evex_special(vecD dst, vecD src, immI8 shift) %{ 8899 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 8900 match(Set dst (LShiftVS dst shift)); 8901 effect(TEMP src); 8902 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 8903 ins_encode %{ 8904 int vector_len = 0; 8905 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8906 %} 8907 ins_pipe( pipe_slow ); 8908 %} 8909 8910 instruct vsll8S(vecX dst, vecS shift) %{ 8911 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 8912 match(Set dst (LShiftVS dst shift)); 8913 format %{ "psllw $dst,$shift\t! left shift packed8S" %} 8914 ins_encode %{ 8915 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 8916 %} 8917 ins_pipe( pipe_slow ); 8918 %} 8919 8920 instruct vsll8S_imm(vecX dst, immI8 shift) %{ 8921 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 8922 match(Set dst (LShiftVS dst shift)); 8923 format %{ "psllw $dst,$shift\t! left shift packed8S" %} 8924 ins_encode %{ 8925 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 8926 %} 8927 ins_pipe( pipe_slow ); 8928 %} 8929 8930 instruct vsll8S_reg_avx(vecX dst, vecX src, vecS shift) %{ 8931 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 8); 8932 match(Set dst (LShiftVS src shift)); 8933 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 8934 ins_encode %{ 8935 int vector_len = 0; 8936 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8937 %} 8938 ins_pipe( pipe_slow ); 8939 %} 8940 8941 instruct vsll8S_reg_evex(vecX dst, vecX src, vecS shift) %{ 8942 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 8943 match(Set dst (LShiftVS src shift)); 8944 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 8945 ins_encode %{ 8946 int vector_len = 0; 8947 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8948 %} 8949 ins_pipe( pipe_slow ); 8950 %} 8951 8952 instruct vsll8S_reg_evex_special(vecX dst, vecX src, vecS shift) %{ 8953 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 8954 match(Set dst (LShiftVS dst shift)); 8955 effect(TEMP src); 8956 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 8957 ins_encode %{ 8958 int vector_len = 0; 8959 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8960 %} 8961 ins_pipe( pipe_slow ); 8962 %} 8963 8964 instruct vsll8S_reg_imm_avx(vecX dst, vecX src, immI8 shift) %{ 8965 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 8); 8966 match(Set dst (LShiftVS src shift)); 8967 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 8968 ins_encode %{ 8969 int vector_len = 0; 8970 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8971 %} 8972 ins_pipe( pipe_slow ); 8973 %} 8974 8975 instruct vsll8S_reg_imm_evex(vecX dst, vecX src, immI8 shift) %{ 8976 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 8977 match(Set dst (LShiftVS src shift)); 8978 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 8979 ins_encode %{ 8980 int vector_len = 0; 8981 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8982 %} 8983 ins_pipe( pipe_slow ); 8984 %} 8985 8986 instruct vsll8S_reg_imm_evex_special(vecX dst, vecX src, immI8 shift) %{ 8987 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 8988 match(Set dst (LShiftVS dst shift)); 8989 effect(TEMP src); 8990 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 8991 ins_encode %{ 8992 int vector_len = 0; 8993 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8994 %} 8995 ins_pipe( pipe_slow ); 8996 %} 8997 8998 instruct vsll16S_reg_avx(vecY dst, vecY src, vecS shift) %{ 8999 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 9000 match(Set dst (LShiftVS src shift)); 9001 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 9002 ins_encode %{ 9003 int vector_len = 1; 9004 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9005 %} 9006 ins_pipe( pipe_slow ); 9007 %} 9008 9009 instruct vsll16S_reg_evex(vecY dst, vecY src, vecS shift) %{ 9010 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 9011 match(Set dst (LShiftVS src shift)); 9012 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 9013 ins_encode %{ 9014 int vector_len = 1; 9015 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9016 %} 9017 ins_pipe( pipe_slow ); 9018 %} 9019 9020 instruct vsll16S_reg_evex_special(vecY dst, vecY src, vecS shift) %{ 9021 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 9022 match(Set dst (LShiftVS dst shift)); 9023 effect(TEMP src); 9024 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 9025 ins_encode %{ 9026 int vector_len = 1; 9027 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9028 %} 9029 ins_pipe( pipe_slow ); 9030 %} 9031 9032 instruct vsll16S_reg_imm_avx(vecY dst, vecY src, immI8 shift) %{ 9033 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 9034 match(Set dst (LShiftVS src shift)); 9035 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 9036 ins_encode %{ 9037 int vector_len = 1; 9038 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9039 %} 9040 ins_pipe( pipe_slow ); 9041 %} 9042 9043 instruct vsll16S_reg_imm_evex(vecY dst, vecY src, immI8 shift) %{ 9044 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 9045 match(Set dst (LShiftVS src shift)); 9046 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 9047 ins_encode %{ 9048 int vector_len = 1; 9049 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9050 %} 9051 ins_pipe( pipe_slow ); 9052 %} 9053 9054 instruct vsll16S_reg_imm_evex_special(vecY dst, vecY src, immI8 shift) %{ 9055 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 9056 match(Set dst (LShiftVS dst shift)); 9057 effect(TEMP src); 9058 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 9059 ins_encode %{ 9060 int vector_len = 1; 9061 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9062 %} 9063 ins_pipe( pipe_slow ); 9064 %} 9065 9066 instruct vsll32S_reg(vecZ dst, vecZ src, vecS shift) %{ 9067 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 9068 match(Set dst (LShiftVS src shift)); 9069 format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %} 9070 ins_encode %{ 9071 int vector_len = 2; 9072 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9073 %} 9074 ins_pipe( pipe_slow ); 9075 %} 9076 9077 instruct vsll32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 9078 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 9079 match(Set dst (LShiftVS src shift)); 9080 format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %} 9081 ins_encode %{ 9082 int vector_len = 2; 9083 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9084 %} 9085 ins_pipe( pipe_slow ); 9086 %} 9087 9088 // Integers vector left shift 9089 instruct vsll2I(vecD dst, vecS shift) %{ 9090 predicate(n->as_Vector()->length() == 2); 9091 match(Set dst (LShiftVI dst shift)); 9092 format %{ "pslld $dst,$shift\t! left shift packed2I" %} 9093 ins_encode %{ 9094 __ pslld($dst$$XMMRegister, $shift$$XMMRegister); 9095 %} 9096 ins_pipe( pipe_slow ); 9097 %} 9098 9099 instruct vsll2I_imm(vecD dst, immI8 shift) %{ 9100 predicate(n->as_Vector()->length() == 2); 9101 match(Set dst (LShiftVI dst shift)); 9102 format %{ "pslld $dst,$shift\t! left shift packed2I" %} 9103 ins_encode %{ 9104 __ pslld($dst$$XMMRegister, (int)$shift$$constant); 9105 %} 9106 ins_pipe( pipe_slow ); 9107 %} 9108 9109 instruct vsll2I_reg(vecD dst, vecD src, vecS shift) %{ 9110 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9111 match(Set dst (LShiftVI src shift)); 9112 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} 9113 ins_encode %{ 9114 int vector_len = 0; 9115 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9116 %} 9117 ins_pipe( pipe_slow ); 9118 %} 9119 9120 instruct vsll2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 9121 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9122 match(Set dst (LShiftVI src shift)); 9123 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} 9124 ins_encode %{ 9125 int vector_len = 0; 9126 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9127 %} 9128 ins_pipe( pipe_slow ); 9129 %} 9130 9131 instruct vsll4I(vecX dst, vecS shift) %{ 9132 predicate(n->as_Vector()->length() == 4); 9133 match(Set dst (LShiftVI dst shift)); 9134 format %{ "pslld $dst,$shift\t! left shift packed4I" %} 9135 ins_encode %{ 9136 __ pslld($dst$$XMMRegister, $shift$$XMMRegister); 9137 %} 9138 ins_pipe( pipe_slow ); 9139 %} 9140 9141 instruct vsll4I_imm(vecX dst, immI8 shift) %{ 9142 predicate(n->as_Vector()->length() == 4); 9143 match(Set dst (LShiftVI dst shift)); 9144 format %{ "pslld $dst,$shift\t! left shift packed4I" %} 9145 ins_encode %{ 9146 __ pslld($dst$$XMMRegister, (int)$shift$$constant); 9147 %} 9148 ins_pipe( pipe_slow ); 9149 %} 9150 9151 instruct vsll4I_reg(vecX dst, vecX src, vecS shift) %{ 9152 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9153 match(Set dst (LShiftVI src shift)); 9154 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} 9155 ins_encode %{ 9156 int vector_len = 0; 9157 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9158 %} 9159 ins_pipe( pipe_slow ); 9160 %} 9161 9162 instruct vsll4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 9163 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9164 match(Set dst (LShiftVI src shift)); 9165 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} 9166 ins_encode %{ 9167 int vector_len = 0; 9168 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9169 %} 9170 ins_pipe( pipe_slow ); 9171 %} 9172 9173 instruct vsll8I_reg(vecY dst, vecY src, vecS shift) %{ 9174 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 9175 match(Set dst (LShiftVI src shift)); 9176 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} 9177 ins_encode %{ 9178 int vector_len = 1; 9179 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9180 %} 9181 ins_pipe( pipe_slow ); 9182 %} 9183 9184 instruct vsll8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 9185 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 9186 match(Set dst (LShiftVI src shift)); 9187 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} 9188 ins_encode %{ 9189 int vector_len = 1; 9190 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9191 %} 9192 ins_pipe( pipe_slow ); 9193 %} 9194 9195 instruct vsll16I_reg(vecZ dst, vecZ src, vecS shift) %{ 9196 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 9197 match(Set dst (LShiftVI src shift)); 9198 format %{ "vpslld $dst,$src,$shift\t! left shift packed16I" %} 9199 ins_encode %{ 9200 int vector_len = 2; 9201 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9202 %} 9203 ins_pipe( pipe_slow ); 9204 %} 9205 9206 instruct vsll16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 9207 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 9208 match(Set dst (LShiftVI src shift)); 9209 format %{ "vpslld $dst,$src,$shift\t! left shift packed16I" %} 9210 ins_encode %{ 9211 int vector_len = 2; 9212 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9213 %} 9214 ins_pipe( pipe_slow ); 9215 %} 9216 9217 // Longs vector left shift 9218 instruct vsll2L(vecX dst, vecS shift) %{ 9219 predicate(n->as_Vector()->length() == 2); 9220 match(Set dst (LShiftVL dst shift)); 9221 format %{ "psllq $dst,$shift\t! left shift packed2L" %} 9222 ins_encode %{ 9223 __ psllq($dst$$XMMRegister, $shift$$XMMRegister); 9224 %} 9225 ins_pipe( pipe_slow ); 9226 %} 9227 9228 instruct vsll2L_imm(vecX dst, immI8 shift) %{ 9229 predicate(n->as_Vector()->length() == 2); 9230 match(Set dst (LShiftVL dst shift)); 9231 format %{ "psllq $dst,$shift\t! left shift packed2L" %} 9232 ins_encode %{ 9233 __ psllq($dst$$XMMRegister, (int)$shift$$constant); 9234 %} 9235 ins_pipe( pipe_slow ); 9236 %} 9237 9238 instruct vsll2L_reg(vecX dst, vecX src, vecS shift) %{ 9239 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9240 match(Set dst (LShiftVL src shift)); 9241 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} 9242 ins_encode %{ 9243 int vector_len = 0; 9244 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9245 %} 9246 ins_pipe( pipe_slow ); 9247 %} 9248 9249 instruct vsll2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ 9250 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9251 match(Set dst (LShiftVL src shift)); 9252 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} 9253 ins_encode %{ 9254 int vector_len = 0; 9255 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9256 %} 9257 ins_pipe( pipe_slow ); 9258 %} 9259 9260 instruct vsll4L_reg(vecY dst, vecY src, vecS shift) %{ 9261 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 9262 match(Set dst (LShiftVL src shift)); 9263 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} 9264 ins_encode %{ 9265 int vector_len = 1; 9266 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9267 %} 9268 ins_pipe( pipe_slow ); 9269 %} 9270 9271 instruct vsll4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ 9272 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 9273 match(Set dst (LShiftVL src shift)); 9274 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} 9275 ins_encode %{ 9276 int vector_len = 1; 9277 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9278 %} 9279 ins_pipe( pipe_slow ); 9280 %} 9281 9282 instruct vsll8L_reg(vecZ dst, vecZ src, vecS shift) %{ 9283 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 9284 match(Set dst (LShiftVL src shift)); 9285 format %{ "vpsllq $dst,$src,$shift\t! left shift packed8L" %} 9286 ins_encode %{ 9287 int vector_len = 2; 9288 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9289 %} 9290 ins_pipe( pipe_slow ); 9291 %} 9292 9293 instruct vsll8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 9294 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 9295 match(Set dst (LShiftVL src shift)); 9296 format %{ "vpsllq $dst,$src,$shift\t! left shift packed8L" %} 9297 ins_encode %{ 9298 int vector_len = 2; 9299 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9300 %} 9301 ins_pipe( pipe_slow ); 9302 %} 9303 9304 // ----------------------- LogicalRightShift ----------------------------------- 9305 9306 // Shorts vector logical right shift produces incorrect Java result 9307 // for negative data because java code convert short value into int with 9308 // sign extension before a shift. But char vectors are fine since chars are 9309 // unsigned values. 9310 9311 instruct vsrl2S(vecS dst, vecS shift) %{ 9312 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 9313 match(Set dst (URShiftVS dst shift)); 9314 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} 9315 ins_encode %{ 9316 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 9317 %} 9318 ins_pipe( pipe_slow ); 9319 %} 9320 9321 instruct vsrl2S_imm(vecS dst, immI8 shift) %{ 9322 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 9323 match(Set dst (URShiftVS dst shift)); 9324 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} 9325 ins_encode %{ 9326 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 9327 %} 9328 ins_pipe( pipe_slow ); 9329 %} 9330 9331 instruct vsrl2S_reg_avx(vecS dst, vecS src, vecS shift) %{ 9332 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 2); 9333 match(Set dst (URShiftVS src shift)); 9334 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 9335 ins_encode %{ 9336 int vector_len = 0; 9337 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9338 %} 9339 ins_pipe( pipe_slow ); 9340 %} 9341 9342 instruct vsrl2S_reg_evex(vecS dst, vecS src, vecS shift) %{ 9343 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 9344 match(Set dst (URShiftVS src shift)); 9345 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 9346 ins_encode %{ 9347 int vector_len = 0; 9348 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9349 %} 9350 ins_pipe( pipe_slow ); 9351 %} 9352 9353 instruct vsrl2S_reg_evex_special(vecS dst, vecS src, vecS shift) %{ 9354 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 9355 match(Set dst (URShiftVS dst shift)); 9356 effect(TEMP src); 9357 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 9358 ins_encode %{ 9359 int vector_len = 0; 9360 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9361 %} 9362 ins_pipe( pipe_slow ); 9363 %} 9364 9365 instruct vsrl2S_reg_imm_avx(vecS dst, vecS src, immI8 shift) %{ 9366 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 2); 9367 match(Set dst (URShiftVS src shift)); 9368 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 9369 ins_encode %{ 9370 int vector_len = 0; 9371 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9372 %} 9373 ins_pipe( pipe_slow ); 9374 %} 9375 9376 instruct vsrl2S_reg_imm_evex(vecS dst, vecS src, immI8 shift) %{ 9377 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 9378 match(Set dst (URShiftVS src shift)); 9379 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 9380 ins_encode %{ 9381 int vector_len = 0; 9382 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9383 %} 9384 ins_pipe( pipe_slow ); 9385 %} 9386 9387 instruct vsrl2S_reg_imm_evex_special(vecS dst, vecS src, immI8 shift) %{ 9388 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 9389 match(Set dst (URShiftVS dst shift)); 9390 effect(TEMP src); 9391 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 9392 ins_encode %{ 9393 int vector_len = 0; 9394 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9395 %} 9396 ins_pipe( pipe_slow ); 9397 %} 9398 9399 instruct vsrl4S(vecD dst, vecS shift) %{ 9400 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 9401 match(Set dst (URShiftVS dst shift)); 9402 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} 9403 ins_encode %{ 9404 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 9405 %} 9406 ins_pipe( pipe_slow ); 9407 %} 9408 9409 instruct vsrl4S_imm(vecD dst, immI8 shift) %{ 9410 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 9411 match(Set dst (URShiftVS dst shift)); 9412 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} 9413 ins_encode %{ 9414 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 9415 %} 9416 ins_pipe( pipe_slow ); 9417 %} 9418 9419 instruct vsrl4S_reg_avx(vecD dst, vecD src, vecS shift) %{ 9420 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 4); 9421 match(Set dst (URShiftVS src shift)); 9422 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 9423 ins_encode %{ 9424 int vector_len = 0; 9425 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9426 %} 9427 ins_pipe( pipe_slow ); 9428 %} 9429 9430 instruct vsrl4S_reg_evex(vecD dst, vecD src, vecS shift) %{ 9431 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 9432 match(Set dst (URShiftVS src shift)); 9433 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 9434 ins_encode %{ 9435 int vector_len = 0; 9436 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9437 %} 9438 ins_pipe( pipe_slow ); 9439 %} 9440 9441 instruct vsrl4S_reg_evex_special(vecD dst, vecD src, vecS shift) %{ 9442 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 9443 match(Set dst (URShiftVS dst shift)); 9444 effect(TEMP src); 9445 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 9446 ins_encode %{ 9447 int vector_len = 0; 9448 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9449 %} 9450 ins_pipe( pipe_slow ); 9451 %} 9452 9453 instruct vsrl4S_reg_imm_avx(vecD dst, vecD src, immI8 shift) %{ 9454 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 4); 9455 match(Set dst (URShiftVS src shift)); 9456 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 9457 ins_encode %{ 9458 int vector_len = 0; 9459 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9460 %} 9461 ins_pipe( pipe_slow ); 9462 %} 9463 9464 instruct vsrl4S_reg_imm_evex(vecD dst, vecD src, immI8 shift) %{ 9465 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 9466 match(Set dst (URShiftVS src shift)); 9467 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 9468 ins_encode %{ 9469 int vector_len = 0; 9470 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9471 %} 9472 ins_pipe( pipe_slow ); 9473 %} 9474 9475 instruct vsrl4S_reg_imm_evex_special(vecD dst, vecD src, immI8 shift) %{ 9476 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 9477 match(Set dst (URShiftVS dst shift)); 9478 effect(TEMP src); 9479 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 9480 ins_encode %{ 9481 int vector_len = 0; 9482 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9483 %} 9484 ins_pipe( pipe_slow ); 9485 %} 9486 9487 instruct vsrl8S(vecX dst, vecS shift) %{ 9488 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 9489 match(Set dst (URShiftVS dst shift)); 9490 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} 9491 ins_encode %{ 9492 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 9493 %} 9494 ins_pipe( pipe_slow ); 9495 %} 9496 9497 instruct vsrl8S_imm(vecX dst, immI8 shift) %{ 9498 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 9499 match(Set dst (URShiftVS dst shift)); 9500 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} 9501 ins_encode %{ 9502 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 9503 %} 9504 ins_pipe( pipe_slow ); 9505 %} 9506 9507 instruct vsrl8S_reg_avx(vecX dst, vecX src, vecS shift) %{ 9508 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 8); 9509 match(Set dst (URShiftVS src shift)); 9510 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 9511 ins_encode %{ 9512 int vector_len = 0; 9513 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9514 %} 9515 ins_pipe( pipe_slow ); 9516 %} 9517 9518 instruct vsrl8S_reg_evex(vecX dst, vecX src, vecS shift) %{ 9519 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 9520 match(Set dst (URShiftVS src shift)); 9521 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 9522 ins_encode %{ 9523 int vector_len = 0; 9524 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9525 %} 9526 ins_pipe( pipe_slow ); 9527 %} 9528 9529 instruct vsrl8S_reg_evex_special(vecX dst, vecX src, vecS shift) %{ 9530 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 9531 match(Set dst (URShiftVS dst shift)); 9532 effect(TEMP src); 9533 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 9534 ins_encode %{ 9535 int vector_len = 0; 9536 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9537 %} 9538 ins_pipe( pipe_slow ); 9539 %} 9540 9541 instruct vsrl8S_reg_imm_avx(vecX dst, vecX src, immI8 shift) %{ 9542 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 8); 9543 match(Set dst (URShiftVS src shift)); 9544 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 9545 ins_encode %{ 9546 int vector_len = 0; 9547 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9548 %} 9549 ins_pipe( pipe_slow ); 9550 %} 9551 9552 instruct vsrl8S_reg_imm_evex(vecX dst, vecX src, immI8 shift) %{ 9553 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 9554 match(Set dst (URShiftVS src shift)); 9555 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 9556 ins_encode %{ 9557 int vector_len = 0; 9558 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9559 %} 9560 ins_pipe( pipe_slow ); 9561 %} 9562 9563 instruct vsrl8S_reg_imm_evex_special(vecX dst, vecX src, immI8 shift) %{ 9564 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 9565 match(Set dst (URShiftVS dst shift)); 9566 effect(TEMP src); 9567 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 9568 ins_encode %{ 9569 int vector_len = 0; 9570 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9571 %} 9572 ins_pipe( pipe_slow ); 9573 %} 9574 9575 instruct vsrl16S_reg_avx(vecY dst, vecY src, vecS shift) %{ 9576 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 9577 match(Set dst (URShiftVS src shift)); 9578 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 9579 ins_encode %{ 9580 int vector_len = 1; 9581 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9582 %} 9583 ins_pipe( pipe_slow ); 9584 %} 9585 9586 instruct vsrl16S_reg_evex(vecY dst, vecY src, vecS shift) %{ 9587 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 9588 match(Set dst (URShiftVS src shift)); 9589 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 9590 ins_encode %{ 9591 int vector_len = 1; 9592 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9593 %} 9594 ins_pipe( pipe_slow ); 9595 %} 9596 9597 instruct vsrl16S_reg_evex_special(vecY dst, vecY src, vecS shift) %{ 9598 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 9599 match(Set dst (URShiftVS dst shift)); 9600 effect(TEMP src); 9601 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 9602 ins_encode %{ 9603 int vector_len = 1; 9604 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9605 %} 9606 ins_pipe( pipe_slow ); 9607 %} 9608 9609 instruct vsrl16S_reg_imm_avx(vecY dst, vecY src, immI8 shift) %{ 9610 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 9611 match(Set dst (URShiftVS src shift)); 9612 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 9613 ins_encode %{ 9614 int vector_len = 1; 9615 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9616 %} 9617 ins_pipe( pipe_slow ); 9618 %} 9619 9620 instruct vsrl16S_reg_imm_evex(vecY dst, vecY src, immI8 shift) %{ 9621 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 9622 match(Set dst (URShiftVS src shift)); 9623 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 9624 ins_encode %{ 9625 int vector_len = 1; 9626 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9627 %} 9628 ins_pipe( pipe_slow ); 9629 %} 9630 9631 instruct vsrl16S_reg_imm_evex_special(vecY dst, vecY src, immI8 shift) %{ 9632 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 9633 match(Set dst (URShiftVS dst shift)); 9634 effect(TEMP src); 9635 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 9636 ins_encode %{ 9637 int vector_len = 1; 9638 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9639 %} 9640 ins_pipe( pipe_slow ); 9641 %} 9642 9643 instruct vsrl32S_reg(vecZ dst, vecZ src, vecS shift) %{ 9644 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 9645 match(Set dst (URShiftVS src shift)); 9646 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %} 9647 ins_encode %{ 9648 int vector_len = 2; 9649 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9650 %} 9651 ins_pipe( pipe_slow ); 9652 %} 9653 9654 instruct vsrl32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 9655 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 9656 match(Set dst (URShiftVS src shift)); 9657 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %} 9658 ins_encode %{ 9659 int vector_len = 2; 9660 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9661 %} 9662 ins_pipe( pipe_slow ); 9663 %} 9664 9665 // Integers vector logical right shift 9666 instruct vsrl2I(vecD dst, vecS shift) %{ 9667 predicate(n->as_Vector()->length() == 2); 9668 match(Set dst (URShiftVI dst shift)); 9669 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} 9670 ins_encode %{ 9671 __ psrld($dst$$XMMRegister, $shift$$XMMRegister); 9672 %} 9673 ins_pipe( pipe_slow ); 9674 %} 9675 9676 instruct vsrl2I_imm(vecD dst, immI8 shift) %{ 9677 predicate(n->as_Vector()->length() == 2); 9678 match(Set dst (URShiftVI dst shift)); 9679 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} 9680 ins_encode %{ 9681 __ psrld($dst$$XMMRegister, (int)$shift$$constant); 9682 %} 9683 ins_pipe( pipe_slow ); 9684 %} 9685 9686 instruct vsrl2I_reg(vecD dst, vecD src, vecS shift) %{ 9687 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9688 match(Set dst (URShiftVI src shift)); 9689 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} 9690 ins_encode %{ 9691 int vector_len = 0; 9692 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9693 %} 9694 ins_pipe( pipe_slow ); 9695 %} 9696 9697 instruct vsrl2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 9698 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9699 match(Set dst (URShiftVI src shift)); 9700 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} 9701 ins_encode %{ 9702 int vector_len = 0; 9703 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9704 %} 9705 ins_pipe( pipe_slow ); 9706 %} 9707 9708 instruct vsrl4I(vecX dst, vecS shift) %{ 9709 predicate(n->as_Vector()->length() == 4); 9710 match(Set dst (URShiftVI dst shift)); 9711 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} 9712 ins_encode %{ 9713 __ psrld($dst$$XMMRegister, $shift$$XMMRegister); 9714 %} 9715 ins_pipe( pipe_slow ); 9716 %} 9717 9718 instruct vsrl4I_imm(vecX dst, immI8 shift) %{ 9719 predicate(n->as_Vector()->length() == 4); 9720 match(Set dst (URShiftVI dst shift)); 9721 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} 9722 ins_encode %{ 9723 __ psrld($dst$$XMMRegister, (int)$shift$$constant); 9724 %} 9725 ins_pipe( pipe_slow ); 9726 %} 9727 9728 instruct vsrl4I_reg(vecX dst, vecX src, vecS shift) %{ 9729 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9730 match(Set dst (URShiftVI src shift)); 9731 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} 9732 ins_encode %{ 9733 int vector_len = 0; 9734 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9735 %} 9736 ins_pipe( pipe_slow ); 9737 %} 9738 9739 instruct vsrl4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 9740 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9741 match(Set dst (URShiftVI src shift)); 9742 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} 9743 ins_encode %{ 9744 int vector_len = 0; 9745 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9746 %} 9747 ins_pipe( pipe_slow ); 9748 %} 9749 9750 instruct vsrl8I_reg(vecY dst, vecY src, vecS shift) %{ 9751 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 9752 match(Set dst (URShiftVI src shift)); 9753 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} 9754 ins_encode %{ 9755 int vector_len = 1; 9756 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9757 %} 9758 ins_pipe( pipe_slow ); 9759 %} 9760 9761 instruct vsrl8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 9762 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 9763 match(Set dst (URShiftVI src shift)); 9764 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} 9765 ins_encode %{ 9766 int vector_len = 1; 9767 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9768 %} 9769 ins_pipe( pipe_slow ); 9770 %} 9771 9772 instruct vsrl16I_reg(vecZ dst, vecZ src, vecS shift) %{ 9773 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 9774 match(Set dst (URShiftVI src shift)); 9775 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed16I" %} 9776 ins_encode %{ 9777 int vector_len = 2; 9778 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9779 %} 9780 ins_pipe( pipe_slow ); 9781 %} 9782 9783 instruct vsrl16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 9784 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 9785 match(Set dst (URShiftVI src shift)); 9786 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed16I" %} 9787 ins_encode %{ 9788 int vector_len = 2; 9789 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9790 %} 9791 ins_pipe( pipe_slow ); 9792 %} 9793 9794 // Longs vector logical right shift 9795 instruct vsrl2L(vecX dst, vecS shift) %{ 9796 predicate(n->as_Vector()->length() == 2); 9797 match(Set dst (URShiftVL dst shift)); 9798 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} 9799 ins_encode %{ 9800 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister); 9801 %} 9802 ins_pipe( pipe_slow ); 9803 %} 9804 9805 instruct vsrl2L_imm(vecX dst, immI8 shift) %{ 9806 predicate(n->as_Vector()->length() == 2); 9807 match(Set dst (URShiftVL dst shift)); 9808 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} 9809 ins_encode %{ 9810 __ psrlq($dst$$XMMRegister, (int)$shift$$constant); 9811 %} 9812 ins_pipe( pipe_slow ); 9813 %} 9814 9815 instruct vsrl2L_reg(vecX dst, vecX src, vecS shift) %{ 9816 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9817 match(Set dst (URShiftVL src shift)); 9818 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} 9819 ins_encode %{ 9820 int vector_len = 0; 9821 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9822 %} 9823 ins_pipe( pipe_slow ); 9824 %} 9825 9826 instruct vsrl2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ 9827 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9828 match(Set dst (URShiftVL src shift)); 9829 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} 9830 ins_encode %{ 9831 int vector_len = 0; 9832 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9833 %} 9834 ins_pipe( pipe_slow ); 9835 %} 9836 9837 instruct vsrl4L_reg(vecY dst, vecY src, vecS shift) %{ 9838 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 9839 match(Set dst (URShiftVL src shift)); 9840 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} 9841 ins_encode %{ 9842 int vector_len = 1; 9843 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9844 %} 9845 ins_pipe( pipe_slow ); 9846 %} 9847 9848 instruct vsrl4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ 9849 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 9850 match(Set dst (URShiftVL src shift)); 9851 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} 9852 ins_encode %{ 9853 int vector_len = 1; 9854 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9855 %} 9856 ins_pipe( pipe_slow ); 9857 %} 9858 9859 instruct vsrl8L_reg(vecZ dst, vecZ src, vecS shift) %{ 9860 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 9861 match(Set dst (URShiftVL src shift)); 9862 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed8L" %} 9863 ins_encode %{ 9864 int vector_len = 2; 9865 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9866 %} 9867 ins_pipe( pipe_slow ); 9868 %} 9869 9870 instruct vsrl8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 9871 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 9872 match(Set dst (URShiftVL src shift)); 9873 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed8L" %} 9874 ins_encode %{ 9875 int vector_len = 2; 9876 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9877 %} 9878 ins_pipe( pipe_slow ); 9879 %} 9880 9881 // ------------------- ArithmeticRightShift ----------------------------------- 9882 9883 // Shorts/Chars vector arithmetic right shift 9884 instruct vsra2S(vecS dst, vecS shift) %{ 9885 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 9886 match(Set dst (RShiftVS dst shift)); 9887 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} 9888 ins_encode %{ 9889 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 9890 %} 9891 ins_pipe( pipe_slow ); 9892 %} 9893 9894 instruct vsra2S_imm(vecS dst, immI8 shift) %{ 9895 predicate(n->as_Vector()->length() == 2); 9896 match(Set dst (RShiftVS dst shift)); 9897 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} 9898 ins_encode %{ 9899 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 9900 %} 9901 ins_pipe( pipe_slow ); 9902 %} 9903 9904 instruct vsra2S_reg_avx(vecS dst, vecS src, vecS shift) %{ 9905 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 2); 9906 match(Set dst (RShiftVS src shift)); 9907 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 9908 ins_encode %{ 9909 int vector_len = 0; 9910 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9911 %} 9912 ins_pipe( pipe_slow ); 9913 %} 9914 9915 instruct vsra2S_reg_evex(vecS dst, vecS src, vecS shift) %{ 9916 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 9917 match(Set dst (RShiftVS src shift)); 9918 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 9919 ins_encode %{ 9920 int vector_len = 0; 9921 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9922 %} 9923 ins_pipe( pipe_slow ); 9924 %} 9925 9926 instruct vsra2S_reg_evex_special(vecS dst, vecS src, vecS shift) %{ 9927 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 9928 match(Set dst (RShiftVS dst shift)); 9929 effect(TEMP src); 9930 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 9931 ins_encode %{ 9932 int vector_len = 0; 9933 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9934 %} 9935 ins_pipe( pipe_slow ); 9936 %} 9937 9938 instruct vsra2S_reg_imm_avx(vecS dst, vecS src, immI8 shift) %{ 9939 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 2); 9940 match(Set dst (RShiftVS src shift)); 9941 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 9942 ins_encode %{ 9943 int vector_len = 0; 9944 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9945 %} 9946 ins_pipe( pipe_slow ); 9947 %} 9948 9949 instruct vsra2S_reg_imm_evex(vecS dst, vecS src, immI8 shift) %{ 9950 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 9951 match(Set dst (RShiftVS src shift)); 9952 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 9953 ins_encode %{ 9954 int vector_len = 0; 9955 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9956 %} 9957 ins_pipe( pipe_slow ); 9958 %} 9959 9960 instruct vsra2S_reg_imm_evex_special(vecS dst, vecS src, immI8 shift) %{ 9961 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 9962 match(Set dst (RShiftVS dst shift)); 9963 effect(TEMP src); 9964 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 9965 ins_encode %{ 9966 int vector_len = 0; 9967 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9968 %} 9969 ins_pipe( pipe_slow ); 9970 %} 9971 9972 instruct vsra4S(vecD dst, vecS shift) %{ 9973 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 9974 match(Set dst (RShiftVS dst shift)); 9975 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} 9976 ins_encode %{ 9977 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 9978 %} 9979 ins_pipe( pipe_slow ); 9980 %} 9981 9982 instruct vsra4S_imm(vecD dst, immI8 shift) %{ 9983 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 9984 match(Set dst (RShiftVS dst shift)); 9985 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} 9986 ins_encode %{ 9987 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 9988 %} 9989 ins_pipe( pipe_slow ); 9990 %} 9991 9992 instruct vsra4S_reg_avx(vecD dst, vecD src, vecS shift) %{ 9993 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 4); 9994 match(Set dst (RShiftVS src shift)); 9995 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 9996 ins_encode %{ 9997 int vector_len = 0; 9998 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9999 %} 10000 ins_pipe( pipe_slow ); 10001 %} 10002 10003 instruct vsra4S_reg_evex(vecD dst, vecD src, vecS shift) %{ 10004 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 10005 match(Set dst (RShiftVS src shift)); 10006 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 10007 ins_encode %{ 10008 int vector_len = 0; 10009 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10010 %} 10011 ins_pipe( pipe_slow ); 10012 %} 10013 10014 instruct vsra4S_reg_evex_special(vecD dst, vecD src, vecS shift) %{ 10015 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 10016 match(Set dst (RShiftVS dst shift)); 10017 effect(TEMP src); 10018 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 10019 ins_encode %{ 10020 int vector_len = 0; 10021 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10022 %} 10023 ins_pipe( pipe_slow ); 10024 %} 10025 10026 instruct vsra4S_reg_imm_avx(vecD dst, vecD src, immI8 shift) %{ 10027 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 4); 10028 match(Set dst (RShiftVS src shift)); 10029 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 10030 ins_encode %{ 10031 int vector_len = 0; 10032 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10033 %} 10034 ins_pipe( pipe_slow ); 10035 %} 10036 10037 instruct vsra4S_reg_imm_evex(vecD dst, vecD src, immI8 shift) %{ 10038 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 10039 match(Set dst (RShiftVS src shift)); 10040 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 10041 ins_encode %{ 10042 int vector_len = 0; 10043 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10044 %} 10045 ins_pipe( pipe_slow ); 10046 %} 10047 10048 instruct vsra4S_reg_imm_evex_special(vecD dst, vecD src, immI8 shift) %{ 10049 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 10050 match(Set dst (RShiftVS dst shift)); 10051 effect(TEMP src); 10052 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 10053 ins_encode %{ 10054 int vector_len = 0; 10055 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10056 %} 10057 ins_pipe( pipe_slow ); 10058 %} 10059 10060 instruct vsra8S(vecX dst, vecS shift) %{ 10061 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 10062 match(Set dst (RShiftVS dst shift)); 10063 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} 10064 ins_encode %{ 10065 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 10066 %} 10067 ins_pipe( pipe_slow ); 10068 %} 10069 10070 instruct vsra8S_imm(vecX dst, immI8 shift) %{ 10071 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 10072 match(Set dst (RShiftVS dst shift)); 10073 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} 10074 ins_encode %{ 10075 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 10076 %} 10077 ins_pipe( pipe_slow ); 10078 %} 10079 10080 instruct vsra8S_reg_avx(vecX dst, vecX src, vecS shift) %{ 10081 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 8); 10082 match(Set dst (RShiftVS src shift)); 10083 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 10084 ins_encode %{ 10085 int vector_len = 0; 10086 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10087 %} 10088 ins_pipe( pipe_slow ); 10089 %} 10090 10091 instruct vsra8S_reg_evex(vecX dst, vecX src, vecS shift) %{ 10092 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 10093 match(Set dst (RShiftVS src shift)); 10094 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 10095 ins_encode %{ 10096 int vector_len = 0; 10097 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10098 %} 10099 ins_pipe( pipe_slow ); 10100 %} 10101 10102 instruct vsra8S_reg_evex_special(vecX dst, vecX src, vecS shift) %{ 10103 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 10104 match(Set dst (RShiftVS dst shift)); 10105 effect(TEMP src); 10106 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 10107 ins_encode %{ 10108 int vector_len = 0; 10109 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10110 %} 10111 ins_pipe( pipe_slow ); 10112 %} 10113 10114 instruct vsra8S_reg_imm_avx(vecX dst, vecX src, immI8 shift) %{ 10115 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 8); 10116 match(Set dst (RShiftVS src shift)); 10117 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 10118 ins_encode %{ 10119 int vector_len = 0; 10120 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10121 %} 10122 ins_pipe( pipe_slow ); 10123 %} 10124 10125 instruct vsra8S_reg_imm_evex(vecX dst, vecX src, immI8 shift) %{ 10126 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 10127 match(Set dst (RShiftVS src shift)); 10128 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 10129 ins_encode %{ 10130 int vector_len = 0; 10131 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10132 %} 10133 ins_pipe( pipe_slow ); 10134 %} 10135 10136 instruct vsra8S_reg_imm_evex_special(vecX dst, vecX src, immI8 shift) %{ 10137 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 10138 match(Set dst (RShiftVS dst shift)); 10139 effect(TEMP src); 10140 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 10141 ins_encode %{ 10142 int vector_len = 0; 10143 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10144 %} 10145 ins_pipe( pipe_slow ); 10146 %} 10147 10148 instruct vsra16S_reg_avx(vecY dst, vecY src, vecS shift) %{ 10149 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 10150 match(Set dst (RShiftVS src shift)); 10151 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 10152 ins_encode %{ 10153 int vector_len = 1; 10154 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10155 %} 10156 ins_pipe( pipe_slow ); 10157 %} 10158 10159 instruct vsra16S_reg_evex(vecY dst, vecY src, vecS shift) %{ 10160 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 10161 match(Set dst (RShiftVS src shift)); 10162 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 10163 ins_encode %{ 10164 int vector_len = 1; 10165 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10166 %} 10167 ins_pipe( pipe_slow ); 10168 %} 10169 10170 instruct vsra16S_reg_evex_special(vecY dst, vecY src, vecS shift) %{ 10171 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 10172 match(Set dst (RShiftVS dst shift)); 10173 effect(TEMP src); 10174 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 10175 ins_encode %{ 10176 int vector_len = 1; 10177 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10178 %} 10179 ins_pipe( pipe_slow ); 10180 %} 10181 10182 instruct vsra16S_reg_imm_avx(vecY dst, vecY src, immI8 shift) %{ 10183 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 10184 match(Set dst (RShiftVS src shift)); 10185 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 10186 ins_encode %{ 10187 int vector_len = 1; 10188 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10189 %} 10190 ins_pipe( pipe_slow ); 10191 %} 10192 10193 instruct vsra16S_reg_imm_evex(vecY dst, vecY src, immI8 shift) %{ 10194 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 10195 match(Set dst (RShiftVS src shift)); 10196 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 10197 ins_encode %{ 10198 int vector_len = 1; 10199 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10200 %} 10201 ins_pipe( pipe_slow ); 10202 %} 10203 10204 instruct vsra16S_reg_imm_evex_special(vecY dst, vecY src, immI8 shift) %{ 10205 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 10206 match(Set dst (RShiftVS dst shift)); 10207 effect(TEMP src); 10208 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 10209 ins_encode %{ 10210 int vector_len = 1; 10211 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10212 %} 10213 ins_pipe( pipe_slow ); 10214 %} 10215 10216 instruct vsra32S_reg(vecZ dst, vecZ src, vecS shift) %{ 10217 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 10218 match(Set dst (RShiftVS src shift)); 10219 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %} 10220 ins_encode %{ 10221 int vector_len = 2; 10222 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10223 %} 10224 ins_pipe( pipe_slow ); 10225 %} 10226 10227 instruct vsra32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 10228 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 10229 match(Set dst (RShiftVS src shift)); 10230 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %} 10231 ins_encode %{ 10232 int vector_len = 2; 10233 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10234 %} 10235 ins_pipe( pipe_slow ); 10236 %} 10237 10238 // Integers vector arithmetic right shift 10239 instruct vsra2I(vecD dst, vecS shift) %{ 10240 predicate(n->as_Vector()->length() == 2); 10241 match(Set dst (RShiftVI dst shift)); 10242 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} 10243 ins_encode %{ 10244 __ psrad($dst$$XMMRegister, $shift$$XMMRegister); 10245 %} 10246 ins_pipe( pipe_slow ); 10247 %} 10248 10249 instruct vsra2I_imm(vecD dst, immI8 shift) %{ 10250 predicate(n->as_Vector()->length() == 2); 10251 match(Set dst (RShiftVI dst shift)); 10252 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} 10253 ins_encode %{ 10254 __ psrad($dst$$XMMRegister, (int)$shift$$constant); 10255 %} 10256 ins_pipe( pipe_slow ); 10257 %} 10258 10259 instruct vsra2I_reg(vecD dst, vecD src, vecS shift) %{ 10260 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 10261 match(Set dst (RShiftVI src shift)); 10262 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} 10263 ins_encode %{ 10264 int vector_len = 0; 10265 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10266 %} 10267 ins_pipe( pipe_slow ); 10268 %} 10269 10270 instruct vsra2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 10271 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 10272 match(Set dst (RShiftVI src shift)); 10273 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} 10274 ins_encode %{ 10275 int vector_len = 0; 10276 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10277 %} 10278 ins_pipe( pipe_slow ); 10279 %} 10280 10281 instruct vsra4I(vecX dst, vecS shift) %{ 10282 predicate(n->as_Vector()->length() == 4); 10283 match(Set dst (RShiftVI dst shift)); 10284 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} 10285 ins_encode %{ 10286 __ psrad($dst$$XMMRegister, $shift$$XMMRegister); 10287 %} 10288 ins_pipe( pipe_slow ); 10289 %} 10290 10291 instruct vsra4I_imm(vecX dst, immI8 shift) %{ 10292 predicate(n->as_Vector()->length() == 4); 10293 match(Set dst (RShiftVI dst shift)); 10294 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} 10295 ins_encode %{ 10296 __ psrad($dst$$XMMRegister, (int)$shift$$constant); 10297 %} 10298 ins_pipe( pipe_slow ); 10299 %} 10300 10301 instruct vsra4I_reg(vecX dst, vecX src, vecS shift) %{ 10302 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 10303 match(Set dst (RShiftVI src shift)); 10304 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} 10305 ins_encode %{ 10306 int vector_len = 0; 10307 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10308 %} 10309 ins_pipe( pipe_slow ); 10310 %} 10311 10312 instruct vsra4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 10313 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 10314 match(Set dst (RShiftVI src shift)); 10315 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} 10316 ins_encode %{ 10317 int vector_len = 0; 10318 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10319 %} 10320 ins_pipe( pipe_slow ); 10321 %} 10322 10323 instruct vsra8I_reg(vecY dst, vecY src, vecS shift) %{ 10324 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 10325 match(Set dst (RShiftVI src shift)); 10326 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} 10327 ins_encode %{ 10328 int vector_len = 1; 10329 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10330 %} 10331 ins_pipe( pipe_slow ); 10332 %} 10333 10334 instruct vsra8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 10335 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 10336 match(Set dst (RShiftVI src shift)); 10337 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} 10338 ins_encode %{ 10339 int vector_len = 1; 10340 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10341 %} 10342 ins_pipe( pipe_slow ); 10343 %} 10344 10345 instruct vsra16I_reg(vecZ dst, vecZ src, vecS shift) %{ 10346 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 10347 match(Set dst (RShiftVI src shift)); 10348 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed16I" %} 10349 ins_encode %{ 10350 int vector_len = 2; 10351 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10352 %} 10353 ins_pipe( pipe_slow ); 10354 %} 10355 10356 instruct vsra16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 10357 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 10358 match(Set dst (RShiftVI src shift)); 10359 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed16I" %} 10360 ins_encode %{ 10361 int vector_len = 2; 10362 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10363 %} 10364 ins_pipe( pipe_slow ); 10365 %} 10366 10367 // There are no longs vector arithmetic right shift instructions. 10368 10369 10370 // --------------------------------- AND -------------------------------------- 10371 10372 instruct vand4B(vecS dst, vecS src) %{ 10373 predicate(n->as_Vector()->length_in_bytes() == 4); 10374 match(Set dst (AndV dst src)); 10375 format %{ "pand $dst,$src\t! and vectors (4 bytes)" %} 10376 ins_encode %{ 10377 __ pand($dst$$XMMRegister, $src$$XMMRegister); 10378 %} 10379 ins_pipe( pipe_slow ); 10380 %} 10381 10382 instruct vand4B_reg(vecS dst, vecS src1, vecS src2) %{ 10383 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 10384 match(Set dst (AndV src1 src2)); 10385 format %{ "vpand $dst,$src1,$src2\t! and vectors (4 bytes)" %} 10386 ins_encode %{ 10387 int vector_len = 0; 10388 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10389 %} 10390 ins_pipe( pipe_slow ); 10391 %} 10392 10393 instruct vand4B_mem(vecS dst, vecS src, memory mem) %{ 10394 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 10395 match(Set dst (AndV src (LoadVector mem))); 10396 format %{ "vpand $dst,$src,$mem\t! and vectors (4 bytes)" %} 10397 ins_encode %{ 10398 int vector_len = 0; 10399 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10400 %} 10401 ins_pipe( pipe_slow ); 10402 %} 10403 10404 instruct vand8B(vecD dst, vecD src) %{ 10405 predicate(n->as_Vector()->length_in_bytes() == 8); 10406 match(Set dst (AndV dst src)); 10407 format %{ "pand $dst,$src\t! and vectors (8 bytes)" %} 10408 ins_encode %{ 10409 __ pand($dst$$XMMRegister, $src$$XMMRegister); 10410 %} 10411 ins_pipe( pipe_slow ); 10412 %} 10413 10414 instruct vand8B_reg(vecD dst, vecD src1, vecD src2) %{ 10415 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 10416 match(Set dst (AndV src1 src2)); 10417 format %{ "vpand $dst,$src1,$src2\t! and vectors (8 bytes)" %} 10418 ins_encode %{ 10419 int vector_len = 0; 10420 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10421 %} 10422 ins_pipe( pipe_slow ); 10423 %} 10424 10425 instruct vand8B_mem(vecD dst, vecD src, memory mem) %{ 10426 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 10427 match(Set dst (AndV src (LoadVector mem))); 10428 format %{ "vpand $dst,$src,$mem\t! and vectors (8 bytes)" %} 10429 ins_encode %{ 10430 int vector_len = 0; 10431 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10432 %} 10433 ins_pipe( pipe_slow ); 10434 %} 10435 10436 instruct vand16B(vecX dst, vecX src) %{ 10437 predicate(n->as_Vector()->length_in_bytes() == 16); 10438 match(Set dst (AndV dst src)); 10439 format %{ "pand $dst,$src\t! and vectors (16 bytes)" %} 10440 ins_encode %{ 10441 __ pand($dst$$XMMRegister, $src$$XMMRegister); 10442 %} 10443 ins_pipe( pipe_slow ); 10444 %} 10445 10446 instruct vand16B_reg(vecX dst, vecX src1, vecX src2) %{ 10447 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 10448 match(Set dst (AndV src1 src2)); 10449 format %{ "vpand $dst,$src1,$src2\t! and vectors (16 bytes)" %} 10450 ins_encode %{ 10451 int vector_len = 0; 10452 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10453 %} 10454 ins_pipe( pipe_slow ); 10455 %} 10456 10457 instruct vand16B_mem(vecX dst, vecX src, memory mem) %{ 10458 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 10459 match(Set dst (AndV src (LoadVector mem))); 10460 format %{ "vpand $dst,$src,$mem\t! and vectors (16 bytes)" %} 10461 ins_encode %{ 10462 int vector_len = 0; 10463 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10464 %} 10465 ins_pipe( pipe_slow ); 10466 %} 10467 10468 instruct vand32B_reg(vecY dst, vecY src1, vecY src2) %{ 10469 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 10470 match(Set dst (AndV src1 src2)); 10471 format %{ "vpand $dst,$src1,$src2\t! and vectors (32 bytes)" %} 10472 ins_encode %{ 10473 int vector_len = 1; 10474 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10475 %} 10476 ins_pipe( pipe_slow ); 10477 %} 10478 10479 instruct vand32B_mem(vecY dst, vecY src, memory mem) %{ 10480 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 10481 match(Set dst (AndV src (LoadVector mem))); 10482 format %{ "vpand $dst,$src,$mem\t! and vectors (32 bytes)" %} 10483 ins_encode %{ 10484 int vector_len = 1; 10485 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10486 %} 10487 ins_pipe( pipe_slow ); 10488 %} 10489 10490 instruct vand64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 10491 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 10492 match(Set dst (AndV src1 src2)); 10493 format %{ "vpand $dst,$src1,$src2\t! and vectors (64 bytes)" %} 10494 ins_encode %{ 10495 int vector_len = 2; 10496 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10497 %} 10498 ins_pipe( pipe_slow ); 10499 %} 10500 10501 instruct vand64B_mem(vecZ dst, vecZ src, memory mem) %{ 10502 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 10503 match(Set dst (AndV src (LoadVector mem))); 10504 format %{ "vpand $dst,$src,$mem\t! and vectors (64 bytes)" %} 10505 ins_encode %{ 10506 int vector_len = 2; 10507 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10508 %} 10509 ins_pipe( pipe_slow ); 10510 %} 10511 10512 // --------------------------------- OR --------------------------------------- 10513 10514 instruct vor4B(vecS dst, vecS src) %{ 10515 predicate(n->as_Vector()->length_in_bytes() == 4); 10516 match(Set dst (OrV dst src)); 10517 format %{ "por $dst,$src\t! or vectors (4 bytes)" %} 10518 ins_encode %{ 10519 __ por($dst$$XMMRegister, $src$$XMMRegister); 10520 %} 10521 ins_pipe( pipe_slow ); 10522 %} 10523 10524 instruct vor4B_reg(vecS dst, vecS src1, vecS src2) %{ 10525 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 10526 match(Set dst (OrV src1 src2)); 10527 format %{ "vpor $dst,$src1,$src2\t! or vectors (4 bytes)" %} 10528 ins_encode %{ 10529 int vector_len = 0; 10530 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10531 %} 10532 ins_pipe( pipe_slow ); 10533 %} 10534 10535 instruct vor4B_mem(vecS dst, vecS src, memory mem) %{ 10536 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 10537 match(Set dst (OrV src (LoadVector mem))); 10538 format %{ "vpor $dst,$src,$mem\t! or vectors (4 bytes)" %} 10539 ins_encode %{ 10540 int vector_len = 0; 10541 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10542 %} 10543 ins_pipe( pipe_slow ); 10544 %} 10545 10546 instruct vor8B(vecD dst, vecD src) %{ 10547 predicate(n->as_Vector()->length_in_bytes() == 8); 10548 match(Set dst (OrV dst src)); 10549 format %{ "por $dst,$src\t! or vectors (8 bytes)" %} 10550 ins_encode %{ 10551 __ por($dst$$XMMRegister, $src$$XMMRegister); 10552 %} 10553 ins_pipe( pipe_slow ); 10554 %} 10555 10556 instruct vor8B_reg(vecD dst, vecD src1, vecD src2) %{ 10557 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 10558 match(Set dst (OrV src1 src2)); 10559 format %{ "vpor $dst,$src1,$src2\t! or vectors (8 bytes)" %} 10560 ins_encode %{ 10561 int vector_len = 0; 10562 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10563 %} 10564 ins_pipe( pipe_slow ); 10565 %} 10566 10567 instruct vor8B_mem(vecD dst, vecD src, memory mem) %{ 10568 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 10569 match(Set dst (OrV src (LoadVector mem))); 10570 format %{ "vpor $dst,$src,$mem\t! or vectors (8 bytes)" %} 10571 ins_encode %{ 10572 int vector_len = 0; 10573 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10574 %} 10575 ins_pipe( pipe_slow ); 10576 %} 10577 10578 instruct vor16B(vecX dst, vecX src) %{ 10579 predicate(n->as_Vector()->length_in_bytes() == 16); 10580 match(Set dst (OrV dst src)); 10581 format %{ "por $dst,$src\t! or vectors (16 bytes)" %} 10582 ins_encode %{ 10583 __ por($dst$$XMMRegister, $src$$XMMRegister); 10584 %} 10585 ins_pipe( pipe_slow ); 10586 %} 10587 10588 instruct vor16B_reg(vecX dst, vecX src1, vecX src2) %{ 10589 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 10590 match(Set dst (OrV src1 src2)); 10591 format %{ "vpor $dst,$src1,$src2\t! or vectors (16 bytes)" %} 10592 ins_encode %{ 10593 int vector_len = 0; 10594 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10595 %} 10596 ins_pipe( pipe_slow ); 10597 %} 10598 10599 instruct vor16B_mem(vecX dst, vecX src, memory mem) %{ 10600 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 10601 match(Set dst (OrV src (LoadVector mem))); 10602 format %{ "vpor $dst,$src,$mem\t! or vectors (16 bytes)" %} 10603 ins_encode %{ 10604 int vector_len = 0; 10605 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10606 %} 10607 ins_pipe( pipe_slow ); 10608 %} 10609 10610 instruct vor32B_reg(vecY dst, vecY src1, vecY src2) %{ 10611 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 10612 match(Set dst (OrV src1 src2)); 10613 format %{ "vpor $dst,$src1,$src2\t! or vectors (32 bytes)" %} 10614 ins_encode %{ 10615 int vector_len = 1; 10616 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10617 %} 10618 ins_pipe( pipe_slow ); 10619 %} 10620 10621 instruct vor32B_mem(vecY dst, vecY src, memory mem) %{ 10622 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 10623 match(Set dst (OrV src (LoadVector mem))); 10624 format %{ "vpor $dst,$src,$mem\t! or vectors (32 bytes)" %} 10625 ins_encode %{ 10626 int vector_len = 1; 10627 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10628 %} 10629 ins_pipe( pipe_slow ); 10630 %} 10631 10632 instruct vor64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 10633 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 10634 match(Set dst (OrV src1 src2)); 10635 format %{ "vpor $dst,$src1,$src2\t! or vectors (64 bytes)" %} 10636 ins_encode %{ 10637 int vector_len = 2; 10638 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10639 %} 10640 ins_pipe( pipe_slow ); 10641 %} 10642 10643 instruct vor64B_mem(vecZ dst, vecZ src, memory mem) %{ 10644 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 10645 match(Set dst (OrV src (LoadVector mem))); 10646 format %{ "vpor $dst,$src,$mem\t! or vectors (64 bytes)" %} 10647 ins_encode %{ 10648 int vector_len = 2; 10649 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10650 %} 10651 ins_pipe( pipe_slow ); 10652 %} 10653 10654 // --------------------------------- XOR -------------------------------------- 10655 10656 instruct vxor4B(vecS dst, vecS src) %{ 10657 predicate(n->as_Vector()->length_in_bytes() == 4); 10658 match(Set dst (XorV dst src)); 10659 format %{ "pxor $dst,$src\t! xor vectors (4 bytes)" %} 10660 ins_encode %{ 10661 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 10662 %} 10663 ins_pipe( pipe_slow ); 10664 %} 10665 10666 instruct vxor4B_reg(vecS dst, vecS src1, vecS src2) %{ 10667 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 10668 match(Set dst (XorV src1 src2)); 10669 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (4 bytes)" %} 10670 ins_encode %{ 10671 int vector_len = 0; 10672 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10673 %} 10674 ins_pipe( pipe_slow ); 10675 %} 10676 10677 instruct vxor4B_mem(vecS dst, vecS src, memory mem) %{ 10678 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 10679 match(Set dst (XorV src (LoadVector mem))); 10680 format %{ "vpxor $dst,$src,$mem\t! xor vectors (4 bytes)" %} 10681 ins_encode %{ 10682 int vector_len = 0; 10683 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10684 %} 10685 ins_pipe( pipe_slow ); 10686 %} 10687 10688 instruct vxor8B(vecD dst, vecD src) %{ 10689 predicate(n->as_Vector()->length_in_bytes() == 8); 10690 match(Set dst (XorV dst src)); 10691 format %{ "pxor $dst,$src\t! xor vectors (8 bytes)" %} 10692 ins_encode %{ 10693 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 10694 %} 10695 ins_pipe( pipe_slow ); 10696 %} 10697 10698 instruct vxor8B_reg(vecD dst, vecD src1, vecD src2) %{ 10699 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 10700 match(Set dst (XorV src1 src2)); 10701 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (8 bytes)" %} 10702 ins_encode %{ 10703 int vector_len = 0; 10704 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10705 %} 10706 ins_pipe( pipe_slow ); 10707 %} 10708 10709 instruct vxor8B_mem(vecD dst, vecD src, memory mem) %{ 10710 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 10711 match(Set dst (XorV src (LoadVector mem))); 10712 format %{ "vpxor $dst,$src,$mem\t! xor vectors (8 bytes)" %} 10713 ins_encode %{ 10714 int vector_len = 0; 10715 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10716 %} 10717 ins_pipe( pipe_slow ); 10718 %} 10719 10720 instruct vxor16B(vecX dst, vecX src) %{ 10721 predicate(n->as_Vector()->length_in_bytes() == 16); 10722 match(Set dst (XorV dst src)); 10723 format %{ "pxor $dst,$src\t! xor vectors (16 bytes)" %} 10724 ins_encode %{ 10725 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 10726 %} 10727 ins_pipe( pipe_slow ); 10728 %} 10729 10730 instruct vxor16B_reg(vecX dst, vecX src1, vecX src2) %{ 10731 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 10732 match(Set dst (XorV src1 src2)); 10733 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (16 bytes)" %} 10734 ins_encode %{ 10735 int vector_len = 0; 10736 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10737 %} 10738 ins_pipe( pipe_slow ); 10739 %} 10740 10741 instruct vxor16B_mem(vecX dst, vecX src, memory mem) %{ 10742 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 10743 match(Set dst (XorV src (LoadVector mem))); 10744 format %{ "vpxor $dst,$src,$mem\t! xor vectors (16 bytes)" %} 10745 ins_encode %{ 10746 int vector_len = 0; 10747 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10748 %} 10749 ins_pipe( pipe_slow ); 10750 %} 10751 10752 instruct vxor32B_reg(vecY dst, vecY src1, vecY src2) %{ 10753 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 10754 match(Set dst (XorV src1 src2)); 10755 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (32 bytes)" %} 10756 ins_encode %{ 10757 int vector_len = 1; 10758 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10759 %} 10760 ins_pipe( pipe_slow ); 10761 %} 10762 10763 instruct vxor32B_mem(vecY dst, vecY src, memory mem) %{ 10764 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 10765 match(Set dst (XorV src (LoadVector mem))); 10766 format %{ "vpxor $dst,$src,$mem\t! xor vectors (32 bytes)" %} 10767 ins_encode %{ 10768 int vector_len = 1; 10769 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10770 %} 10771 ins_pipe( pipe_slow ); 10772 %} 10773 10774 instruct vxor64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 10775 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 10776 match(Set dst (XorV src1 src2)); 10777 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (64 bytes)" %} 10778 ins_encode %{ 10779 int vector_len = 2; 10780 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10781 %} 10782 ins_pipe( pipe_slow ); 10783 %} 10784 10785 instruct vxor64B_mem(vecZ dst, vecZ src, memory mem) %{ 10786 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 10787 match(Set dst (XorV src (LoadVector mem))); 10788 format %{ "vpxor $dst,$src,$mem\t! xor vectors (64 bytes)" %} 10789 ins_encode %{ 10790 int vector_len = 2; 10791 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10792 %} 10793 ins_pipe( pipe_slow ); 10794 %} 10795