1 // 2 // Copyright (c) 2011, 2014, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Common Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // archtecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // XMM registers. 512-bit registers or 8 words each, labeled (a)-p. 63 // Word a in each register holds a Float, words ab hold a Double. 64 // The whole registers are used in SSE4.2 version intrinsics, 65 // array copy stubs and superword operations (see UseSSE42Intrinsics, 66 // UseXMMForArrayCopy and UseSuperword flags). 67 // For pre EVEX enabled architectures: 68 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX) 69 // For EVEX enabled architectures: 70 // XMM8-XMM31 must be encoded with REX (EVEX for UseAVX). 71 // 72 // Linux ABI: No register preserved across function calls 73 // XMM0-XMM7 might hold parameters 74 // Windows ABI: XMM6-XMM31 preserved across function calls 75 // XMM0-XMM3 might hold parameters 76 77 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()); 78 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1)); 79 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2)); 80 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3)); 81 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4)); 82 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5)); 83 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6)); 84 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7)); 85 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8)); 86 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9)); 87 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10)); 88 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11)); 89 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12)); 90 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13)); 91 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14)); 92 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15)); 93 94 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()); 95 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1)); 96 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2)); 97 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3)); 98 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4)); 99 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5)); 100 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6)); 101 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7)); 102 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8)); 103 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9)); 104 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10)); 105 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11)); 106 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12)); 107 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13)); 108 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14)); 109 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15)); 110 111 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()); 112 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1)); 113 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2)); 114 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3)); 115 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4)); 116 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5)); 117 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6)); 118 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7)); 119 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8)); 120 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9)); 121 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10)); 122 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11)); 123 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12)); 124 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13)); 125 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14)); 126 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15)); 127 128 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()); 129 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1)); 130 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2)); 131 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3)); 132 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4)); 133 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5)); 134 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6)); 135 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7)); 136 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8)); 137 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9)); 138 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10)); 139 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11)); 140 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12)); 141 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13)); 142 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14)); 143 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15)); 144 145 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()); 146 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1)); 147 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2)); 148 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3)); 149 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4)); 150 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5)); 151 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6)); 152 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7)); 153 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8)); 154 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9)); 155 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10)); 156 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11)); 157 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12)); 158 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13)); 159 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14)); 160 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15)); 161 162 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()); 163 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1)); 164 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2)); 165 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3)); 166 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4)); 167 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5)); 168 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6)); 169 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7)); 170 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8)); 171 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9)); 172 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10)); 173 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11)); 174 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12)); 175 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13)); 176 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14)); 177 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15)); 178 179 #ifdef _WIN64 180 181 reg_def XMM6 ( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()); 182 reg_def XMM6b( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(1)); 183 reg_def XMM6c( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(2)); 184 reg_def XMM6d( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(3)); 185 reg_def XMM6e( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(4)); 186 reg_def XMM6f( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(5)); 187 reg_def XMM6g( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(6)); 188 reg_def XMM6h( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(7)); 189 reg_def XMM6i( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(8)); 190 reg_def XMM6j( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(9)); 191 reg_def XMM6k( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(10)); 192 reg_def XMM6l( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(11)); 193 reg_def XMM6m( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(12)); 194 reg_def XMM6n( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(13)); 195 reg_def XMM6o( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(14)); 196 reg_def XMM6p( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(15)); 197 198 reg_def XMM7 ( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()); 199 reg_def XMM7b( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(1)); 200 reg_def XMM7c( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(2)); 201 reg_def XMM7d( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(3)); 202 reg_def XMM7e( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(4)); 203 reg_def XMM7f( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(5)); 204 reg_def XMM7g( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(6)); 205 reg_def XMM7h( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(7)); 206 reg_def XMM7i( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(8)); 207 reg_def XMM7j( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(9)); 208 reg_def XMM7k( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(10)); 209 reg_def XMM7l( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(11)); 210 reg_def XMM7m( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(12)); 211 reg_def XMM7n( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(13)); 212 reg_def XMM7o( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(14)); 213 reg_def XMM7p( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(15)); 214 215 reg_def XMM8 ( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()); 216 reg_def XMM8b( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(1)); 217 reg_def XMM8c( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(2)); 218 reg_def XMM8d( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(3)); 219 reg_def XMM8e( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(4)); 220 reg_def XMM8f( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(5)); 221 reg_def XMM8g( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(6)); 222 reg_def XMM8h( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(7)); 223 reg_def XMM8i( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(8)); 224 reg_def XMM8j( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(9)); 225 reg_def XMM8k( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(10)); 226 reg_def XMM8l( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(11)); 227 reg_def XMM8m( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(12)); 228 reg_def XMM8n( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(13)); 229 reg_def XMM8o( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(14)); 230 reg_def XMM8p( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(15)); 231 232 reg_def XMM9 ( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()); 233 reg_def XMM9b( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(1)); 234 reg_def XMM9c( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(2)); 235 reg_def XMM9d( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(3)); 236 reg_def XMM9e( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(4)); 237 reg_def XMM9f( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(5)); 238 reg_def XMM9g( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(6)); 239 reg_def XMM9h( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(7)); 240 reg_def XMM9i( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(8)); 241 reg_def XMM9j( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(9)); 242 reg_def XMM9k( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(10)); 243 reg_def XMM9l( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(11)); 244 reg_def XMM9m( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(12)); 245 reg_def XMM9n( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(13)); 246 reg_def XMM9o( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(14)); 247 reg_def XMM9p( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(15)); 248 249 reg_def XMM10 ( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()); 250 reg_def XMM10b( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(1)); 251 reg_def XMM10c( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(2)); 252 reg_def XMM10d( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(3)); 253 reg_def XMM10e( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(4)); 254 reg_def XMM10f( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(5)); 255 reg_def XMM10g( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(6)); 256 reg_def XMM10h( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(7)); 257 reg_def XMM10i( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(8)); 258 reg_def XMM10j( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(9)); 259 reg_def XMM10k( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(10)); 260 reg_def XMM10l( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(11)); 261 reg_def XMM10m( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(12)); 262 reg_def XMM10n( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(13)); 263 reg_def XMM10o( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(14)); 264 reg_def XMM10p( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(15)); 265 266 reg_def XMM11 ( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()); 267 reg_def XMM11b( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(1)); 268 reg_def XMM11c( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(2)); 269 reg_def XMM11d( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(3)); 270 reg_def XMM11e( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(4)); 271 reg_def XMM11f( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(5)); 272 reg_def XMM11g( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(6)); 273 reg_def XMM11h( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(7)); 274 reg_def XMM11i( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(8)); 275 reg_def XMM11j( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(9)); 276 reg_def XMM11k( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(10)); 277 reg_def XMM11l( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(11)); 278 reg_def XMM11m( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(12)); 279 reg_def XMM11n( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(13)); 280 reg_def XMM11o( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(14)); 281 reg_def XMM11p( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(15)); 282 283 reg_def XMM12 ( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()); 284 reg_def XMM12b( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(1)); 285 reg_def XMM12c( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(2)); 286 reg_def XMM12d( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(3)); 287 reg_def XMM12e( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(4)); 288 reg_def XMM12f( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(5)); 289 reg_def XMM12g( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(6)); 290 reg_def XMM12h( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(7)); 291 reg_def XMM12i( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(8)); 292 reg_def XMM12j( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(9)); 293 reg_def XMM12k( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(10)); 294 reg_def XMM12l( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(11)); 295 reg_def XMM12m( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(12)); 296 reg_def XMM12n( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(13)); 297 reg_def XMM12o( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(14)); 298 reg_def XMM12p( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(15)); 299 300 reg_def XMM13 ( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()); 301 reg_def XMM13b( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(1)); 302 reg_def XMM13c( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(2)); 303 reg_def XMM13d( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(3)); 304 reg_def XMM13e( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(4)); 305 reg_def XMM13f( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(5)); 306 reg_def XMM13g( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(6)); 307 reg_def XMM13h( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(7)); 308 reg_def XMM13i( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(8)); 309 reg_def XMM13j( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(9)); 310 reg_def XMM13k( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(10)); 311 reg_def XMM13l( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(11)); 312 reg_def XMM13m( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(12)); 313 reg_def XMM13n( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(13)); 314 reg_def XMM13o( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(14)); 315 reg_def XMM13p( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(15)); 316 317 reg_def XMM14 ( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()); 318 reg_def XMM14b( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(1)); 319 reg_def XMM14c( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(2)); 320 reg_def XMM14d( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(3)); 321 reg_def XMM14e( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(4)); 322 reg_def XMM14f( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(5)); 323 reg_def XMM14g( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(6)); 324 reg_def XMM14h( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(7)); 325 reg_def XMM14i( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(8)); 326 reg_def XMM14j( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(9)); 327 reg_def XMM14k( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(10)); 328 reg_def XMM14l( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(11)); 329 reg_def XMM14m( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(12)); 330 reg_def XMM14n( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(13)); 331 reg_def XMM14o( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(14)); 332 reg_def XMM14p( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(15)); 333 334 reg_def XMM15 ( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()); 335 reg_def XMM15b( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(1)); 336 reg_def XMM15c( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(2)); 337 reg_def XMM15d( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(3)); 338 reg_def XMM15e( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(4)); 339 reg_def XMM15f( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(5)); 340 reg_def XMM15g( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(6)); 341 reg_def XMM15h( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(7)); 342 reg_def XMM15i( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(8)); 343 reg_def XMM15j( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(9)); 344 reg_def XMM15k( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(10)); 345 reg_def XMM15l( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(11)); 346 reg_def XMM15m( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(12)); 347 reg_def XMM15n( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(13)); 348 reg_def XMM15o( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(14)); 349 reg_def XMM15p( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(15)); 350 351 reg_def XMM16 ( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()); 352 reg_def XMM16b( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(1)); 353 reg_def XMM16c( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(2)); 354 reg_def XMM16d( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(3)); 355 reg_def XMM16e( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(4)); 356 reg_def XMM16f( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(5)); 357 reg_def XMM16g( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(6)); 358 reg_def XMM16h( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(7)); 359 reg_def XMM16i( SOC, SOE, Op_RegF, 16, xmm15->as_VMReg()->next(8)); 360 reg_def XMM16j( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(9)); 361 reg_def XMM16k( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(10)); 362 reg_def XMM16l( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(11)); 363 reg_def XMM16m( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(12)); 364 reg_def XMM16n( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(13)); 365 reg_def XMM16o( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(14)); 366 reg_def XMM16p( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(15)); 367 368 reg_def XMM17 ( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()); 369 reg_def XMM17b( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(1)); 370 reg_def XMM17c( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(2)); 371 reg_def XMM17d( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(3)); 372 reg_def XMM17e( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(4)); 373 reg_def XMM17f( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(5)); 374 reg_def XMM17g( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(6)); 375 reg_def XMM17h( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(7)); 376 reg_def XMM17i( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(8)); 377 reg_def XMM17j( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(9)); 378 reg_def XMM17k( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(10)); 379 reg_def XMM17l( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(11)); 380 reg_def XMM17m( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(12)); 381 reg_def XMM17n( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(13)); 382 reg_def XMM17o( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(14)); 383 reg_def XMM17p( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(15)); 384 385 reg_def XMM18 ( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()); 386 reg_def XMM18b( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(1)); 387 reg_def XMM18c( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(2)); 388 reg_def XMM18d( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(3)); 389 reg_def XMM18e( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(4)); 390 reg_def XMM18f( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(5)); 391 reg_def XMM18g( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(6)); 392 reg_def XMM18h( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(7)); 393 reg_def XMM18i( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(8)); 394 reg_def XMM18j( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(9)); 395 reg_def XMM18k( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(10)); 396 reg_def XMM18l( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(11)); 397 reg_def XMM18m( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(12)); 398 reg_def XMM18n( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(13)); 399 reg_def XMM18o( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(14)); 400 reg_def XMM18p( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(15)); 401 402 reg_def XMM19 ( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()); 403 reg_def XMM19b( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(1)); 404 reg_def XMM19c( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(2)); 405 reg_def XMM19d( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(3)); 406 reg_def XMM19e( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(4)); 407 reg_def XMM19f( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(5)); 408 reg_def XMM19g( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(6)); 409 reg_def XMM19h( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(7)); 410 reg_def XMM19i( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(8)); 411 reg_def XMM19j( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(9)); 412 reg_def XMM19k( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(10)); 413 reg_def XMM19l( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(11)); 414 reg_def XMM19m( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(12)); 415 reg_def XMM19n( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(13)); 416 reg_def XMM19o( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(14)); 417 reg_def XMM19p( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(15)); 418 419 reg_def XMM20 ( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()); 420 reg_def XMM20b( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(1)); 421 reg_def XMM20c( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(2)); 422 reg_def XMM20d( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(3)); 423 reg_def XMM20e( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(4)); 424 reg_def XMM20f( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(5)); 425 reg_def XMM20g( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(6)); 426 reg_def XMM20h( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(7)); 427 reg_def XMM20i( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(8)); 428 reg_def XMM20j( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(9)); 429 reg_def XMM20k( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(10)); 430 reg_def XMM20l( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(11)); 431 reg_def XMM20m( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(12)); 432 reg_def XMM20n( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(13)); 433 reg_def XMM20o( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(14)); 434 reg_def XMM20p( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(15)); 435 436 reg_def XMM21 ( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()); 437 reg_def XMM21b( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(1)); 438 reg_def XMM21c( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(2)); 439 reg_def XMM21d( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(3)); 440 reg_def XMM21e( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(4)); 441 reg_def XMM21f( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(5)); 442 reg_def XMM21g( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(6)); 443 reg_def XMM21h( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(7)); 444 reg_def XMM21i( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(8)); 445 reg_def XMM21j( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(9)); 446 reg_def XMM21k( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(10)); 447 reg_def XMM21l( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(11)); 448 reg_def XMM21m( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(12)); 449 reg_def XMM21n( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(13)); 450 reg_def XMM21o( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(14)); 451 reg_def XMM21p( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(15)); 452 453 reg_def XMM22 ( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()); 454 reg_def XMM22b( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(1)); 455 reg_def XMM22c( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(2)); 456 reg_def XMM22d( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(3)); 457 reg_def XMM22e( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(4)); 458 reg_def XMM22f( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(5)); 459 reg_def XMM22g( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(6)); 460 reg_def XMM22h( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(7)); 461 reg_def XMM22i( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(8)); 462 reg_def XMM22j( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(9)); 463 reg_def XMM22k( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(10)); 464 reg_def XMM22l( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(11)); 465 reg_def XMM22m( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(12)); 466 reg_def XMM22n( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(13)); 467 reg_def XMM22o( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(14)); 468 reg_def XMM22p( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(15)); 469 470 reg_def XMM23 ( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()); 471 reg_def XMM23b( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(1)); 472 reg_def XMM23c( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(2)); 473 reg_def XMM23d( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(3)); 474 reg_def XMM23e( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(4)); 475 reg_def XMM23f( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(5)); 476 reg_def XMM23g( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(6)); 477 reg_def XMM23h( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(7)); 478 reg_def XMM23i( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(8)); 479 reg_def XMM23j( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(9)); 480 reg_def XMM23k( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(10)); 481 reg_def XMM23l( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(11)); 482 reg_def XMM23m( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(12)); 483 reg_def XMM23n( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(13)); 484 reg_def XMM23o( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(14)); 485 reg_def XMM23p( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(15)); 486 487 reg_def XMM24 ( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()); 488 reg_def XMM24b( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(1)); 489 reg_def XMM24c( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(2)); 490 reg_def XMM24d( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(3)); 491 reg_def XMM24e( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(4)); 492 reg_def XMM24f( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(5)); 493 reg_def XMM24g( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(6)); 494 reg_def XMM24h( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(7)); 495 reg_def XMM24i( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(8)); 496 reg_def XMM24j( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(9)); 497 reg_def XMM24k( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(10)); 498 reg_def XMM24l( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(11)); 499 reg_def XMM24m( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(12)); 500 reg_def XMM24n( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(13)); 501 reg_def XMM24o( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(14)); 502 reg_def XMM24p( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(15)); 503 504 reg_def XMM25 ( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()); 505 reg_def XMM25b( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(1)); 506 reg_def XMM25c( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(2)); 507 reg_def XMM25d( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(3)); 508 reg_def XMM25e( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(4)); 509 reg_def XMM25f( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(5)); 510 reg_def XMM25g( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(6)); 511 reg_def XMM25h( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(7)); 512 reg_def XMM25i( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(8)); 513 reg_def XMM25j( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(9)); 514 reg_def XMM25k( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(10)); 515 reg_def XMM25l( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(11)); 516 reg_def XMM25m( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(12)); 517 reg_def XMM25n( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(13)); 518 reg_def XMM25o( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(14)); 519 reg_def XMM25p( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(15)); 520 521 reg_def XMM26 ( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()); 522 reg_def XMM26b( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(1)); 523 reg_def XMM26c( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(2)); 524 reg_def XMM26d( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(3)); 525 reg_def XMM26e( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(4)); 526 reg_def XMM26f( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(5)); 527 reg_def XMM26g( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(6)); 528 reg_def XMM26h( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(7)); 529 reg_def XMM26i( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(8)); 530 reg_def XMM26j( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(9)); 531 reg_def XMM26k( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(10)); 532 reg_def XMM26l( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(11)); 533 reg_def XMM26m( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(12)); 534 reg_def XMM26n( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(13)); 535 reg_def XMM26o( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(14)); 536 reg_def XMM26p( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(15)); 537 538 reg_def XMM27g( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(1)); 539 reg_def XMM27c( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(2)); 540 reg_def XMM27d( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(3)); 541 reg_def XMM27e( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(4)); 542 reg_def XMM27f( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(5)); 543 reg_def XMM27g( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(6)); 544 reg_def XMM27h( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(7)); 545 reg_def XMM27i( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(8)); 546 reg_def XMM27j( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(9)); 547 reg_def XMM27k( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(10)); 548 reg_def XMM27l( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(11)); 549 reg_def XMM27m( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(12)); 550 reg_def XMM27n( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(13)); 551 reg_def XMM27o( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(14)); 552 reg_def XMM27p( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(15)); 553 554 reg_def XMM28 ( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()); 555 reg_def XMM28b( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(1)); 556 reg_def XMM28c( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(2)); 557 reg_def XMM28d( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(3)); 558 reg_def XMM28e( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(4)); 559 reg_def XMM28f( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(5)); 560 reg_def XMM28g( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(6)); 561 reg_def XMM28h( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(7)); 562 reg_def XMM28i( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(8)); 563 reg_def XMM28j( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(9)); 564 reg_def XMM28k( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(10)); 565 reg_def XMM28l( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(11)); 566 reg_def XMM28m( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(12)); 567 reg_def XMM28n( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(13)); 568 reg_def XMM28o( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(14)); 569 reg_def XMM28p( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(15)); 570 571 reg_def XMM29 ( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()); 572 reg_def XMM29b( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(1)); 573 reg_def XMM29c( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(2)); 574 reg_def XMM29d( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(3)); 575 reg_def XMM29e( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(4)); 576 reg_def XMM29f( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(5)); 577 reg_def XMM29g( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(6)); 578 reg_def XMM29h( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(7)); 579 reg_def XMM29i( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(8)); 580 reg_def XMM29j( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(9)); 581 reg_def XMM29k( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(10)); 582 reg_def XMM29l( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(11)); 583 reg_def XMM29m( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(12)); 584 reg_def XMM29n( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(13)); 585 reg_def XMM29o( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(14)); 586 reg_def XMM29p( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(15)); 587 588 reg_def XMM30 ( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()); 589 reg_def XMM30b( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(1)); 590 reg_def XMM30c( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(2)); 591 reg_def XMM30d( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(3)); 592 reg_def XMM30e( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(4)); 593 reg_def XMM30f( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(5)); 594 reg_def XMM30g( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(6)); 595 reg_def XMM30h( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(7)); 596 reg_def XMM30i( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(8)); 597 reg_def XMM30j( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(9)); 598 reg_def XMM30k( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(10)); 599 reg_def XMM30l( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(11)); 600 reg_def XMM30m( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(12)); 601 reg_def XMM30n( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(13)); 602 reg_def XMM30o( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(14)); 603 reg_def XMM30p( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(15)); 604 605 reg_def XMM31 ( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()); 606 reg_def XMM31b( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(1)); 607 reg_def XMM31c( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(2)); 608 reg_def XMM31d( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(3)); 609 reg_def XMM31e( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(4)); 610 reg_def XMM31f( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(5)); 611 reg_def XMM31g( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(6)); 612 reg_def XMM31h( SOC, SOE, Op_RegF, 31, xmm31>-as_VMReg()->next(7)); 613 reg_def XMM31i( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(8)); 614 reg_def XMM31j( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(9)); 615 reg_def XMM31k( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(10)); 616 reg_def XMM31l( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(11)); 617 reg_def XMM31m( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(12)); 618 reg_def XMM31n( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(13)); 619 reg_def XMM31o( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(14)); 620 reg_def XMM31p( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(15)); 621 622 #else // _WIN64 623 624 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()); 625 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1)); 626 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2)); 627 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3)); 628 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4)); 629 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5)); 630 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6)); 631 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7)); 632 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8)); 633 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9)); 634 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10)); 635 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11)); 636 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12)); 637 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13)); 638 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14)); 639 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15)); 640 641 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()); 642 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1)); 643 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2)); 644 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3)); 645 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4)); 646 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5)); 647 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6)); 648 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7)); 649 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8)); 650 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9)); 651 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10)); 652 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11)); 653 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12)); 654 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13)); 655 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14)); 656 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15)); 657 658 #ifdef _LP64 659 660 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()); 661 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1)); 662 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2)); 663 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3)); 664 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4)); 665 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5)); 666 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6)); 667 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7)); 668 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8)); 669 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9)); 670 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10)); 671 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11)); 672 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12)); 673 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13)); 674 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14)); 675 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15)); 676 677 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()); 678 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1)); 679 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2)); 680 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3)); 681 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4)); 682 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5)); 683 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6)); 684 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7)); 685 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8)); 686 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9)); 687 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10)); 688 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11)); 689 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12)); 690 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13)); 691 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14)); 692 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15)); 693 694 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()); 695 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1)); 696 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2)); 697 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3)); 698 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4)); 699 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5)); 700 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6)); 701 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7)); 702 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8)); 703 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9)); 704 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10)); 705 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11)); 706 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12)); 707 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13)); 708 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14)); 709 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15)); 710 711 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()); 712 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1)); 713 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2)); 714 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3)); 715 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4)); 716 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5)); 717 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6)); 718 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7)); 719 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8)); 720 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9)); 721 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10)); 722 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11)); 723 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12)); 724 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13)); 725 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14)); 726 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15)); 727 728 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()); 729 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1)); 730 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2)); 731 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3)); 732 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4)); 733 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5)); 734 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6)); 735 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7)); 736 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8)); 737 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9)); 738 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10)); 739 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11)); 740 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12)); 741 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13)); 742 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14)); 743 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15)); 744 745 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()); 746 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1)); 747 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2)); 748 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3)); 749 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4)); 750 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5)); 751 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6)); 752 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7)); 753 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8)); 754 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9)); 755 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10)); 756 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11)); 757 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12)); 758 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13)); 759 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14)); 760 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15)); 761 762 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()); 763 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1)); 764 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2)); 765 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3)); 766 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4)); 767 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5)); 768 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6)); 769 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7)); 770 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8)); 771 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9)); 772 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10)); 773 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11)); 774 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12)); 775 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13)); 776 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14)); 777 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15)); 778 779 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()); 780 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1)); 781 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2)); 782 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3)); 783 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4)); 784 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5)); 785 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6)); 786 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7)); 787 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8)); 788 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9)); 789 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10)); 790 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11)); 791 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12)); 792 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13)); 793 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14)); 794 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15)); 795 796 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()); 797 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1)); 798 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2)); 799 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3)); 800 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4)); 801 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5)); 802 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6)); 803 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7)); 804 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8)); 805 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9)); 806 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10)); 807 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11)); 808 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12)); 809 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13)); 810 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14)); 811 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15)); 812 813 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()); 814 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1)); 815 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2)); 816 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3)); 817 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4)); 818 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5)); 819 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6)); 820 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7)); 821 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8)); 822 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9)); 823 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10)); 824 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11)); 825 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12)); 826 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13)); 827 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14)); 828 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15)); 829 830 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()); 831 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1)); 832 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2)); 833 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3)); 834 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4)); 835 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5)); 836 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6)); 837 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7)); 838 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8)); 839 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9)); 840 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10)); 841 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11)); 842 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12)); 843 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13)); 844 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14)); 845 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15)); 846 847 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()); 848 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1)); 849 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2)); 850 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3)); 851 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4)); 852 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5)); 853 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6)); 854 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7)); 855 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8)); 856 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9)); 857 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10)); 858 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11)); 859 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12)); 860 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13)); 861 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14)); 862 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15)); 863 864 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()); 865 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1)); 866 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2)); 867 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3)); 868 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4)); 869 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5)); 870 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6)); 871 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7)); 872 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8)); 873 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9)); 874 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10)); 875 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11)); 876 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12)); 877 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13)); 878 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14)); 879 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15)); 880 881 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()); 882 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1)); 883 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2)); 884 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3)); 885 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4)); 886 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5)); 887 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6)); 888 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7)); 889 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8)); 890 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9)); 891 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10)); 892 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11)); 893 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12)); 894 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13)); 895 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14)); 896 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15)); 897 898 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()); 899 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1)); 900 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2)); 901 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3)); 902 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4)); 903 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5)); 904 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6)); 905 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7)); 906 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8)); 907 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9)); 908 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10)); 909 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11)); 910 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12)); 911 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13)); 912 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14)); 913 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15)); 914 915 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()); 916 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1)); 917 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2)); 918 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3)); 919 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4)); 920 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5)); 921 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6)); 922 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7)); 923 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8)); 924 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9)); 925 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10)); 926 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11)); 927 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12)); 928 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13)); 929 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14)); 930 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15)); 931 932 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()); 933 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1)); 934 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2)); 935 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3)); 936 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4)); 937 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5)); 938 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6)); 939 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7)); 940 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8)); 941 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9)); 942 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10)); 943 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11)); 944 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12)); 945 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13)); 946 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14)); 947 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15)); 948 949 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()); 950 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1)); 951 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2)); 952 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3)); 953 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4)); 954 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5)); 955 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6)); 956 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7)); 957 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8)); 958 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9)); 959 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10)); 960 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11)); 961 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12)); 962 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13)); 963 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14)); 964 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15)); 965 966 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()); 967 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1)); 968 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2)); 969 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3)); 970 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4)); 971 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5)); 972 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6)); 973 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7)); 974 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8)); 975 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9)); 976 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10)); 977 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11)); 978 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12)); 979 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13)); 980 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14)); 981 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15)); 982 983 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()); 984 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1)); 985 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2)); 986 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3)); 987 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4)); 988 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5)); 989 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6)); 990 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7)); 991 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8)); 992 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9)); 993 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10)); 994 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11)); 995 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12)); 996 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13)); 997 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14)); 998 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15)); 999 1000 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()); 1001 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1)); 1002 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2)); 1003 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3)); 1004 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4)); 1005 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5)); 1006 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6)); 1007 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7)); 1008 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8)); 1009 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9)); 1010 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10)); 1011 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11)); 1012 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12)); 1013 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13)); 1014 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14)); 1015 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15)); 1016 1017 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()); 1018 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1)); 1019 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2)); 1020 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3)); 1021 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4)); 1022 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5)); 1023 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6)); 1024 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7)); 1025 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8)); 1026 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9)); 1027 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10)); 1028 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11)); 1029 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12)); 1030 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13)); 1031 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14)); 1032 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15)); 1033 1034 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()); 1035 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1)); 1036 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2)); 1037 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3)); 1038 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4)); 1039 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5)); 1040 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6)); 1041 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7)); 1042 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8)); 1043 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9)); 1044 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10)); 1045 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11)); 1046 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12)); 1047 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13)); 1048 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14)); 1049 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15)); 1050 1051 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()); 1052 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1)); 1053 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2)); 1054 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3)); 1055 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4)); 1056 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5)); 1057 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6)); 1058 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7)); 1059 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8)); 1060 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9)); 1061 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10)); 1062 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11)); 1063 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12)); 1064 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13)); 1065 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14)); 1066 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15)); 1067 1068 #endif // _LP64 1069 1070 #endif // _WIN64 1071 1072 #ifdef _LP64 1073 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad()); 1074 #else 1075 reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad()); 1076 #endif // _LP64 1077 1078 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1079 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1080 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1081 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1082 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1083 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1084 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1085 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1086 #ifdef _LP64 1087 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1088 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1089 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1090 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1091 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1092 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1093 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1094 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1095 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 1096 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 1097 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 1098 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 1099 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 1100 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 1101 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 1102 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 1103 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 1104 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 1105 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 1106 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 1107 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 1108 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 1109 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 1110 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 1111 #endif 1112 ); 1113 1114 // flags allocation class should be last. 1115 alloc_class chunk2(RFLAGS); 1116 1117 // Singleton class for condition codes 1118 reg_class int_flags(RFLAGS); 1119 1120 // Class for pre evex float registers 1121 reg_class float_reg_legacy(XMM0, 1122 XMM1, 1123 XMM2, 1124 XMM3, 1125 XMM4, 1126 XMM5, 1127 XMM6, 1128 XMM7 1129 #ifdef _LP64 1130 ,XMM8, 1131 XMM9, 1132 XMM10, 1133 XMM11, 1134 XMM12, 1135 XMM13, 1136 XMM14, 1137 XMM15 1138 #endif 1139 ); 1140 1141 // Class for evex float registers 1142 reg_class float_reg_evex(XMM0, 1143 XMM1, 1144 XMM2, 1145 XMM3, 1146 XMM4, 1147 XMM5, 1148 XMM6, 1149 XMM7 1150 #ifdef _LP64 1151 ,XMM8, 1152 XMM9, 1153 XMM10, 1154 XMM11, 1155 XMM12, 1156 XMM13, 1157 XMM14, 1158 XMM15, 1159 XMM16, 1160 XMM17, 1161 XMM18, 1162 XMM19, 1163 XMM20, 1164 XMM21, 1165 XMM22, 1166 XMM23, 1167 XMM24, 1168 XMM25, 1169 XMM26, 1170 XMM27, 1171 XMM28, 1172 XMM29, 1173 XMM30, 1174 XMM31 1175 #endif 1176 ); 1177 1178 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} ); 1179 1180 // Class for pre evex double registers 1181 reg_class double_reg_legacy(XMM0, XMM0b, 1182 XMM1, XMM1b, 1183 XMM2, XMM2b, 1184 XMM3, XMM3b, 1185 XMM4, XMM4b, 1186 XMM5, XMM5b, 1187 XMM6, XMM6b, 1188 XMM7, XMM7b 1189 #ifdef _LP64 1190 ,XMM8, XMM8b, 1191 XMM9, XMM9b, 1192 XMM10, XMM10b, 1193 XMM11, XMM11b, 1194 XMM12, XMM12b, 1195 XMM13, XMM13b, 1196 XMM14, XMM14b, 1197 XMM15, XMM15b 1198 #endif 1199 ); 1200 1201 // Class for evex double registers 1202 reg_class double_reg_evex(XMM0, XMM0b, 1203 XMM1, XMM1b, 1204 XMM2, XMM2b, 1205 XMM3, XMM3b, 1206 XMM4, XMM4b, 1207 XMM5, XMM5b, 1208 XMM6, XMM6b, 1209 XMM7, XMM7b 1210 #ifdef _LP64 1211 ,XMM8, XMM8b, 1212 XMM9, XMM9b, 1213 XMM10, XMM10b, 1214 XMM11, XMM11b, 1215 XMM12, XMM12b, 1216 XMM13, XMM13b, 1217 XMM14, XMM14b, 1218 XMM15, XMM15b, 1219 XMM16, XMM16b, 1220 XMM17, XMM17b, 1221 XMM18, XMM18b, 1222 XMM19, XMM19b, 1223 XMM20, XMM20b, 1224 XMM21, XMM21b, 1225 XMM22, XMM22b, 1226 XMM23, XMM23b, 1227 XMM24, XMM24b, 1228 XMM25, XMM25b, 1229 XMM26, XMM26b, 1230 XMM27, XMM27b, 1231 XMM28, XMM28b, 1232 XMM29, XMM29b, 1233 XMM30, XMM30b, 1234 XMM31, XMM31b 1235 #endif 1236 ); 1237 1238 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} ); 1239 1240 // Class for pre evex 32bit vector registers 1241 reg_class vectors_reg_legacy(XMM0, 1242 XMM1, 1243 XMM2, 1244 XMM3, 1245 XMM4, 1246 XMM5, 1247 XMM6, 1248 XMM7 1249 #ifdef _LP64 1250 ,XMM8, 1251 XMM9, 1252 XMM10, 1253 XMM11, 1254 XMM12, 1255 XMM13, 1256 XMM14, 1257 XMM15 1258 #endif 1259 ); 1260 1261 // Class for evex 32bit vector registers 1262 reg_class vectors_reg_evex(XMM0, 1263 XMM1, 1264 XMM2, 1265 XMM3, 1266 XMM4, 1267 XMM5, 1268 XMM6, 1269 XMM7 1270 #ifdef _LP64 1271 ,XMM8, 1272 XMM9, 1273 XMM10, 1274 XMM11, 1275 XMM12, 1276 XMM13, 1277 XMM14, 1278 XMM15, 1279 XMM16, 1280 XMM17, 1281 XMM18, 1282 XMM19, 1283 XMM20, 1284 XMM21, 1285 XMM22, 1286 XMM23, 1287 XMM24, 1288 XMM25, 1289 XMM26, 1290 XMM27, 1291 XMM28, 1292 XMM29, 1293 XMM30, 1294 XMM31 1295 #endif 1296 ); 1297 1298 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} ); 1299 1300 // Class for all 64bit vector registers 1301 reg_class vectord_reg_legacy(XMM0, XMM0b, 1302 XMM1, XMM1b, 1303 XMM2, XMM2b, 1304 XMM3, XMM3b, 1305 XMM4, XMM4b, 1306 XMM5, XMM5b, 1307 XMM6, XMM6b, 1308 XMM7, XMM7b 1309 #ifdef _LP64 1310 ,XMM8, XMM8b, 1311 XMM9, XMM9b, 1312 XMM10, XMM10b, 1313 XMM11, XMM11b, 1314 XMM12, XMM12b, 1315 XMM13, XMM13b, 1316 XMM14, XMM14b, 1317 XMM15, XMM15b 1318 #endif 1319 ); 1320 1321 // Class for all 64bit vector registers 1322 reg_class vectord_reg_evex(XMM0, XMM0b, 1323 XMM1, XMM1b, 1324 XMM2, XMM2b, 1325 XMM3, XMM3b, 1326 XMM4, XMM4b, 1327 XMM5, XMM5b, 1328 XMM6, XMM6b, 1329 XMM7, XMM7b 1330 #ifdef _LP64 1331 ,XMM8, XMM8b, 1332 XMM9, XMM9b, 1333 XMM10, XMM10b, 1334 XMM11, XMM11b, 1335 XMM12, XMM12b, 1336 XMM13, XMM13b, 1337 XMM14, XMM14b, 1338 XMM15, XMM15b, 1339 XMM16, XMM16b, 1340 XMM17, XMM17b, 1341 XMM18, XMM18b, 1342 XMM19, XMM19b, 1343 XMM20, XMM20b, 1344 XMM21, XMM21b, 1345 XMM22, XMM22b, 1346 XMM23, XMM23b, 1347 XMM24, XMM24b, 1348 XMM25, XMM25b, 1349 XMM26, XMM26b, 1350 XMM27, XMM27b, 1351 XMM28, XMM28b, 1352 XMM29, XMM29b, 1353 XMM30, XMM30b, 1354 XMM31, XMM31b 1355 #endif 1356 ); 1357 1358 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} ); 1359 1360 // Class for all 128bit vector registers 1361 reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, 1362 XMM1, XMM1b, XMM1c, XMM1d, 1363 XMM2, XMM2b, XMM2c, XMM2d, 1364 XMM3, XMM3b, XMM3c, XMM3d, 1365 XMM4, XMM4b, XMM4c, XMM4d, 1366 XMM5, XMM5b, XMM5c, XMM5d, 1367 XMM6, XMM6b, XMM6c, XMM6d, 1368 XMM7, XMM7b, XMM7c, XMM7d 1369 #ifdef _LP64 1370 ,XMM8, XMM8b, XMM8c, XMM8d, 1371 XMM9, XMM9b, XMM9c, XMM9d, 1372 XMM10, XMM10b, XMM10c, XMM10d, 1373 XMM11, XMM11b, XMM11c, XMM11d, 1374 XMM12, XMM12b, XMM12c, XMM12d, 1375 XMM13, XMM13b, XMM13c, XMM13d, 1376 XMM14, XMM14b, XMM14c, XMM14d, 1377 XMM15, XMM15b, XMM15c, XMM15d 1378 #endif 1379 ); 1380 1381 // Class for all 128bit vector registers 1382 reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, 1383 XMM1, XMM1b, XMM1c, XMM1d, 1384 XMM2, XMM2b, XMM2c, XMM2d, 1385 XMM3, XMM3b, XMM3c, XMM3d, 1386 XMM4, XMM4b, XMM4c, XMM4d, 1387 XMM5, XMM5b, XMM5c, XMM5d, 1388 XMM6, XMM6b, XMM6c, XMM6d, 1389 XMM7, XMM7b, XMM7c, XMM7d 1390 #ifdef _LP64 1391 ,XMM8, XMM8b, XMM8c, XMM8d, 1392 XMM9, XMM9b, XMM9c, XMM9d, 1393 XMM10, XMM10b, XMM10c, XMM10d, 1394 XMM11, XMM11b, XMM11c, XMM11d, 1395 XMM12, XMM12b, XMM12c, XMM12d, 1396 XMM13, XMM13b, XMM13c, XMM13d, 1397 XMM14, XMM14b, XMM14c, XMM14d, 1398 XMM15, XMM15b, XMM15c, XMM15d, 1399 XMM16, XMM16b, XMM16c, XMM16d, 1400 XMM17, XMM17b, XMM17c, XMM17d, 1401 XMM18, XMM18b, XMM18c, XMM18d, 1402 XMM19, XMM19b, XMM19c, XMM19d, 1403 XMM20, XMM20b, XMM20c, XMM20d, 1404 XMM21, XMM21b, XMM21c, XMM21d, 1405 XMM22, XMM22b, XMM22c, XMM22d, 1406 XMM23, XMM23b, XMM23c, XMM23d, 1407 XMM24, XMM24b, XMM24c, XMM24d, 1408 XMM25, XMM25b, XMM25c, XMM25d, 1409 XMM26, XMM26b, XMM26c, XMM26d, 1410 XMM27, XMM27b, XMM27c, XMM27d, 1411 XMM28, XMM28b, XMM28c, XMM28d, 1412 XMM29, XMM29b, XMM29c, XMM29d, 1413 XMM30, XMM30b, XMM30c, XMM30d, 1414 XMM31, XMM31b, XMM31c, XMM31d 1415 #endif 1416 ); 1417 1418 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} ); 1419 1420 // Class for all 256bit vector registers 1421 reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 1422 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 1423 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 1424 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 1425 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 1426 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 1427 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 1428 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 1429 #ifdef _LP64 1430 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 1431 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 1432 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 1433 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 1434 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 1435 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1436 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1437 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h 1438 #endif 1439 ); 1440 1441 // Class for all 256bit vector registers 1442 reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 1443 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 1444 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 1445 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 1446 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 1447 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 1448 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 1449 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 1450 #ifdef _LP64 1451 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 1452 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 1453 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 1454 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 1455 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 1456 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1457 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1458 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, 1459 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, 1460 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, 1461 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, 1462 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, 1463 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, 1464 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, 1465 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, 1466 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, 1467 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, 1468 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, 1469 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, 1470 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, 1471 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, 1472 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, 1473 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, 1474 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h 1475 #endif 1476 ); 1477 1478 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} ); 1479 1480 // Class for all 512bit vector registers 1481 reg_class vectorz_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1482 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1483 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1484 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1485 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1486 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1487 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1488 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1489 #ifdef _LP64 1490 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1491 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1492 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1493 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1494 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1495 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1496 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1497 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1498 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 1499 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 1500 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 1501 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 1502 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 1503 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 1504 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 1505 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 1506 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 1507 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 1508 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 1509 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 1510 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 1511 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 1512 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 1513 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 1514 #endif 1515 ); 1516 1517 %} 1518 1519 1520 //----------SOURCE BLOCK------------------------------------------------------- 1521 // This is a block of C++ code which provides values, functions, and 1522 // definitions necessary in the rest of the architecture description 1523 1524 source_hpp %{ 1525 // Header information of the source block. 1526 // Method declarations/definitions which are used outside 1527 // the ad-scope can conveniently be defined here. 1528 // 1529 // To keep related declarations/definitions/uses close together, 1530 // we switch between source %{ }% and source_hpp %{ }% freely as needed. 1531 1532 class NativeJump; 1533 1534 class CallStubImpl { 1535 1536 //-------------------------------------------------------------- 1537 //---< Used for optimization in Compile::shorten_branches >--- 1538 //-------------------------------------------------------------- 1539 1540 public: 1541 // Size of call trampoline stub. 1542 static uint size_call_trampoline() { 1543 return 0; // no call trampolines on this platform 1544 } 1545 1546 // number of relocations needed by a call trampoline stub 1547 static uint reloc_call_trampoline() { 1548 return 0; // no call trampolines on this platform 1549 } 1550 }; 1551 1552 class HandlerImpl { 1553 1554 public: 1555 1556 static int emit_exception_handler(CodeBuffer &cbuf); 1557 static int emit_deopt_handler(CodeBuffer& cbuf); 1558 1559 static uint size_exception_handler() { 1560 // NativeCall instruction size is the same as NativeJump. 1561 // exception handler starts out as jump and can be patched to 1562 // a call be deoptimization. (4932387) 1563 // Note that this value is also credited (in output.cpp) to 1564 // the size of the code section. 1565 return NativeJump::instruction_size; 1566 } 1567 1568 #ifdef _LP64 1569 static uint size_deopt_handler() { 1570 // three 5 byte instructions 1571 return 15; 1572 } 1573 #else 1574 static uint size_deopt_handler() { 1575 // NativeCall instruction size is the same as NativeJump. 1576 // exception handler starts out as jump and can be patched to 1577 // a call be deoptimization. (4932387) 1578 // Note that this value is also credited (in output.cpp) to 1579 // the size of the code section. 1580 return 5 + NativeJump::instruction_size; // pushl(); jmp; 1581 } 1582 #endif 1583 }; 1584 1585 %} // end source_hpp 1586 1587 source %{ 1588 1589 // Emit exception handler code. 1590 // Stuff framesize into a register and call a VM stub routine. 1591 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) { 1592 1593 // Note that the code buffer's insts_mark is always relative to insts. 1594 // That's why we must use the macroassembler to generate a handler. 1595 MacroAssembler _masm(&cbuf); 1596 address base = __ start_a_stub(size_exception_handler()); 1597 if (base == NULL) { 1598 ciEnv::current()->record_failure("CodeCache is full"); 1599 return 0; // CodeBuffer::expand failed 1600 } 1601 int offset = __ offset(); 1602 __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point())); 1603 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); 1604 __ end_a_stub(); 1605 return offset; 1606 } 1607 1608 // Emit deopt handler code. 1609 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) { 1610 1611 // Note that the code buffer's insts_mark is always relative to insts. 1612 // That's why we must use the macroassembler to generate a handler. 1613 MacroAssembler _masm(&cbuf); 1614 address base = __ start_a_stub(size_deopt_handler()); 1615 if (base == NULL) { 1616 ciEnv::current()->record_failure("CodeCache is full"); 1617 return 0; // CodeBuffer::expand failed 1618 } 1619 int offset = __ offset(); 1620 1621 #ifdef _LP64 1622 address the_pc = (address) __ pc(); 1623 Label next; 1624 // push a "the_pc" on the stack without destroying any registers 1625 // as they all may be live. 1626 1627 // push address of "next" 1628 __ call(next, relocInfo::none); // reloc none is fine since it is a disp32 1629 __ bind(next); 1630 // adjust it so it matches "the_pc" 1631 __ subptr(Address(rsp, 0), __ offset() - offset); 1632 #else 1633 InternalAddress here(__ pc()); 1634 __ pushptr(here.addr()); 1635 #endif 1636 1637 __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack())); 1638 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow"); 1639 __ end_a_stub(); 1640 return offset; 1641 } 1642 1643 1644 //============================================================================= 1645 1646 // Float masks come from different places depending on platform. 1647 #ifdef _LP64 1648 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); } 1649 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); } 1650 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); } 1651 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); } 1652 #else 1653 static address float_signmask() { return (address)float_signmask_pool; } 1654 static address float_signflip() { return (address)float_signflip_pool; } 1655 static address double_signmask() { return (address)double_signmask_pool; } 1656 static address double_signflip() { return (address)double_signflip_pool; } 1657 #endif 1658 1659 1660 const bool Matcher::match_rule_supported(int opcode) { 1661 if (!has_match_rule(opcode)) 1662 return false; 1663 1664 bool ret_value = true; 1665 switch (opcode) { 1666 case Op_PopCountI: 1667 case Op_PopCountL: 1668 if (!UsePopCountInstruction) 1669 ret_value = false; 1670 break; 1671 case Op_MulVI: 1672 if ((UseSSE < 4) && (UseAVX < 1)) // only with SSE4_1 or AVX 1673 ret_value = false; 1674 break; 1675 case Op_MulVL: 1676 case Op_MulReductionVL: 1677 if (VM_Version::supports_avx512dq() == false) 1678 ret_value = false; 1679 break; 1680 case Op_AddReductionVL: 1681 if (UseAVX < 3) // only EVEX : vector connectivity becomes an issue here 1682 ret_value = false; 1683 break; 1684 case Op_AddReductionVI: 1685 if (UseSSE < 3) // requires at least SSE3 1686 ret_value = false; 1687 break; 1688 case Op_MulReductionVI: 1689 if (UseSSE < 4) // requires at least SSE4 1690 ret_value = false; 1691 break; 1692 case Op_AddReductionVF: 1693 case Op_AddReductionVD: 1694 case Op_MulReductionVF: 1695 case Op_MulReductionVD: 1696 if (UseSSE < 1) // requires at least SSE 1697 ret_value = false; 1698 break; 1699 case Op_SqrtVD: 1700 if (UseAVX < 1) // enabled for AVX only 1701 ret_value = false; 1702 break; 1703 case Op_CompareAndSwapL: 1704 #ifdef _LP64 1705 case Op_CompareAndSwapP: 1706 #endif 1707 if (!VM_Version::supports_cx8()) 1708 ret_value = false; 1709 break; 1710 case Op_CMoveVD: 1711 if (UseAVX < 1 || UseAVX > 2) 1712 ret_value = false; 1713 break; 1714 case Op_StrIndexOf: 1715 if (!UseSSE42Intrinsics) 1716 ret_value = false; 1717 break; 1718 case Op_StrIndexOfChar: 1719 if (!(UseSSE > 4)) 1720 ret_value = false; 1721 break; 1722 } 1723 1724 return ret_value; // Per default match rules are supported. 1725 } 1726 1727 const bool Matcher::match_rule_supported_vector(int opcode, int vlen) { 1728 // identify extra cases that we might want to provide match rules for 1729 // e.g. Op_ vector nodes and other intrinsics while guarding with vlen 1730 bool ret_value = match_rule_supported(opcode); 1731 if (ret_value) { 1732 switch (opcode) { 1733 case Op_AddVB: 1734 case Op_SubVB: 1735 if ((vlen == 64) && (VM_Version::supports_avx512bw() == false)) 1736 ret_value = false; 1737 break; 1738 case Op_URShiftVS: 1739 case Op_RShiftVS: 1740 case Op_LShiftVS: 1741 case Op_MulVS: 1742 case Op_AddVS: 1743 case Op_SubVS: 1744 if ((vlen == 32) && (VM_Version::supports_avx512bw() == false)) 1745 ret_value = false; 1746 break; 1747 case Op_CMoveVD: 1748 if (vlen != 4) 1749 ret_value = false; 1750 break; 1751 } 1752 } 1753 1754 return ret_value; // Per default match rules are supported. 1755 } 1756 1757 const int Matcher::float_pressure(int default_pressure_threshold) { 1758 int float_pressure_threshold = default_pressure_threshold; 1759 #ifdef _LP64 1760 if (UseAVX > 2) { 1761 // Increase pressure threshold on machines with AVX3 which have 1762 // 2x more XMM registers. 1763 float_pressure_threshold = default_pressure_threshold * 2; 1764 } 1765 #endif 1766 return float_pressure_threshold; 1767 } 1768 1769 // Max vector size in bytes. 0 if not supported. 1770 const int Matcher::vector_width_in_bytes(BasicType bt) { 1771 assert(is_java_primitive(bt), "only primitive type vectors"); 1772 if (UseSSE < 2) return 0; 1773 // SSE2 supports 128bit vectors for all types. 1774 // AVX2 supports 256bit vectors for all types. 1775 // AVX2/EVEX supports 512bit vectors for all types. 1776 int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16; 1777 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE. 1778 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE)) 1779 size = (UseAVX > 2) ? 64 : 32; 1780 // Use flag to limit vector size. 1781 size = MIN2(size,(int)MaxVectorSize); 1782 // Minimum 2 values in vector (or 4 for bytes). 1783 switch (bt) { 1784 case T_DOUBLE: 1785 case T_LONG: 1786 if (size < 16) return 0; 1787 break; 1788 case T_FLOAT: 1789 case T_INT: 1790 if (size < 8) return 0; 1791 break; 1792 case T_BOOLEAN: 1793 if (size < 4) return 0; 1794 break; 1795 case T_CHAR: 1796 if (size < 4) return 0; 1797 break; 1798 case T_BYTE: 1799 if (size < 4) return 0; 1800 break; 1801 case T_SHORT: 1802 if (size < 4) return 0; 1803 break; 1804 default: 1805 ShouldNotReachHere(); 1806 } 1807 return size; 1808 } 1809 1810 // Limits on vector size (number of elements) loaded into vector. 1811 const int Matcher::max_vector_size(const BasicType bt) { 1812 return vector_width_in_bytes(bt)/type2aelembytes(bt); 1813 } 1814 const int Matcher::min_vector_size(const BasicType bt) { 1815 int max_size = max_vector_size(bt); 1816 // Min size which can be loaded into vector is 4 bytes. 1817 int size = (type2aelembytes(bt) == 1) ? 4 : 2; 1818 return MIN2(size,max_size); 1819 } 1820 1821 // Vector ideal reg corresponding to specidied size in bytes 1822 const int Matcher::vector_ideal_reg(int size) { 1823 assert(MaxVectorSize >= size, ""); 1824 switch(size) { 1825 case 4: return Op_VecS; 1826 case 8: return Op_VecD; 1827 case 16: return Op_VecX; 1828 case 32: return Op_VecY; 1829 case 64: return Op_VecZ; 1830 } 1831 ShouldNotReachHere(); 1832 return 0; 1833 } 1834 1835 // Only lowest bits of xmm reg are used for vector shift count. 1836 const int Matcher::vector_shift_count_ideal_reg(int size) { 1837 return Op_VecS; 1838 } 1839 1840 // x86 supports misaligned vectors store/load. 1841 const bool Matcher::misaligned_vectors_ok() { 1842 return !AlignVector; // can be changed by flag 1843 } 1844 1845 // x86 AES instructions are compatible with SunJCE expanded 1846 // keys, hence we do not need to pass the original key to stubs 1847 const bool Matcher::pass_original_key_for_aes() { 1848 return false; 1849 } 1850 1851 // Helper methods for MachSpillCopyNode::implementation(). 1852 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 1853 int src_hi, int dst_hi, uint ireg, outputStream* st) { 1854 // In 64-bit VM size calculation is very complex. Emitting instructions 1855 // into scratch buffer is used to get size in 64-bit VM. 1856 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 1857 assert(ireg == Op_VecS || // 32bit vector 1858 (src_lo & 1) == 0 && (src_lo + 1) == src_hi && 1859 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi, 1860 "no non-adjacent vector moves" ); 1861 if (cbuf) { 1862 MacroAssembler _masm(cbuf); 1863 int offset = __ offset(); 1864 switch (ireg) { 1865 case Op_VecS: // copy whole register 1866 case Op_VecD: 1867 case Op_VecX: 1868 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1869 break; 1870 case Op_VecY: 1871 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1872 break; 1873 case Op_VecZ: 1874 __ evmovdqul(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2); 1875 break; 1876 default: 1877 ShouldNotReachHere(); 1878 } 1879 int size = __ offset() - offset; 1880 #ifdef ASSERT 1881 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1882 assert(!do_size || size == 4, "incorrect size calculattion"); 1883 #endif 1884 return size; 1885 #ifndef PRODUCT 1886 } else if (!do_size) { 1887 switch (ireg) { 1888 case Op_VecS: 1889 case Op_VecD: 1890 case Op_VecX: 1891 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 1892 break; 1893 case Op_VecY: 1894 case Op_VecZ: 1895 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 1896 break; 1897 default: 1898 ShouldNotReachHere(); 1899 } 1900 #endif 1901 } 1902 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 1903 return (UseAVX > 2) ? 6 : 4; 1904 } 1905 1906 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, 1907 int stack_offset, int reg, uint ireg, outputStream* st) { 1908 // In 64-bit VM size calculation is very complex. Emitting instructions 1909 // into scratch buffer is used to get size in 64-bit VM. 1910 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 1911 if (cbuf) { 1912 MacroAssembler _masm(cbuf); 1913 int offset = __ offset(); 1914 if (is_load) { 1915 switch (ireg) { 1916 case Op_VecS: 1917 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1918 break; 1919 case Op_VecD: 1920 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1921 break; 1922 case Op_VecX: 1923 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1924 break; 1925 case Op_VecY: 1926 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1927 break; 1928 case Op_VecZ: 1929 __ evmovdqul(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2); 1930 break; 1931 default: 1932 ShouldNotReachHere(); 1933 } 1934 } else { // store 1935 switch (ireg) { 1936 case Op_VecS: 1937 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1938 break; 1939 case Op_VecD: 1940 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1941 break; 1942 case Op_VecX: 1943 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1944 break; 1945 case Op_VecY: 1946 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1947 break; 1948 case Op_VecZ: 1949 __ evmovdqul(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2); 1950 break; 1951 default: 1952 ShouldNotReachHere(); 1953 } 1954 } 1955 int size = __ offset() - offset; 1956 #ifdef ASSERT 1957 int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : (UseAVX > 2) ? 6 : 4); 1958 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1959 assert(!do_size || size == (5+offset_size), "incorrect size calculattion"); 1960 #endif 1961 return size; 1962 #ifndef PRODUCT 1963 } else if (!do_size) { 1964 if (is_load) { 1965 switch (ireg) { 1966 case Op_VecS: 1967 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1968 break; 1969 case Op_VecD: 1970 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1971 break; 1972 case Op_VecX: 1973 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1974 break; 1975 case Op_VecY: 1976 case Op_VecZ: 1977 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1978 break; 1979 default: 1980 ShouldNotReachHere(); 1981 } 1982 } else { // store 1983 switch (ireg) { 1984 case Op_VecS: 1985 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1986 break; 1987 case Op_VecD: 1988 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1989 break; 1990 case Op_VecX: 1991 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1992 break; 1993 case Op_VecY: 1994 case Op_VecZ: 1995 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1996 break; 1997 default: 1998 ShouldNotReachHere(); 1999 } 2000 } 2001 #endif 2002 } 2003 bool is_single_byte = false; 2004 int vec_len = 0; 2005 if ((UseAVX > 2) && (stack_offset != 0)) { 2006 int tuple_type = Assembler::EVEX_FVM; 2007 int input_size = Assembler::EVEX_32bit; 2008 switch (ireg) { 2009 case Op_VecS: 2010 tuple_type = Assembler::EVEX_T1S; 2011 break; 2012 case Op_VecD: 2013 tuple_type = Assembler::EVEX_T1S; 2014 input_size = Assembler::EVEX_64bit; 2015 break; 2016 case Op_VecX: 2017 break; 2018 case Op_VecY: 2019 vec_len = 1; 2020 break; 2021 case Op_VecZ: 2022 vec_len = 2; 2023 break; 2024 } 2025 is_single_byte = Assembler::query_compressed_disp_byte(stack_offset, true, vec_len, tuple_type, input_size, 0); 2026 } 2027 int offset_size = 0; 2028 int size = 5; 2029 if (UseAVX > 2 ) { 2030 if (VM_Version::supports_avx512novl() && (vec_len == 2)) { 2031 offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 2032 size += 2; // Need an additional two bytes for EVEX encoding 2033 } else if (VM_Version::supports_avx512novl() && (vec_len < 2)) { 2034 offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4); 2035 } else { 2036 offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 2037 size += 2; // Need an additional two bytes for EVEX encodding 2038 } 2039 } else { 2040 offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4); 2041 } 2042 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 2043 return size+offset_size; 2044 } 2045 2046 static inline jfloat replicate4_imm(int con, int width) { 2047 // Load a constant of "width" (in bytes) and replicate it to fill 32bit. 2048 assert(width == 1 || width == 2, "only byte or short types here"); 2049 int bit_width = width * 8; 2050 jint val = con; 2051 val &= (1 << bit_width) - 1; // mask off sign bits 2052 while(bit_width < 32) { 2053 val |= (val << bit_width); 2054 bit_width <<= 1; 2055 } 2056 jfloat fval = *((jfloat*) &val); // coerce to float type 2057 return fval; 2058 } 2059 2060 static inline jdouble replicate8_imm(int con, int width) { 2061 // Load a constant of "width" (in bytes) and replicate it to fill 64bit. 2062 assert(width == 1 || width == 2 || width == 4, "only byte, short or int types here"); 2063 int bit_width = width * 8; 2064 jlong val = con; 2065 val &= (((jlong) 1) << bit_width) - 1; // mask off sign bits 2066 while(bit_width < 64) { 2067 val |= (val << bit_width); 2068 bit_width <<= 1; 2069 } 2070 jdouble dval = *((jdouble*) &val); // coerce to double type 2071 return dval; 2072 } 2073 2074 #ifndef PRODUCT 2075 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const { 2076 st->print("nop \t# %d bytes pad for loops and calls", _count); 2077 } 2078 #endif 2079 2080 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const { 2081 MacroAssembler _masm(&cbuf); 2082 __ nop(_count); 2083 } 2084 2085 uint MachNopNode::size(PhaseRegAlloc*) const { 2086 return _count; 2087 } 2088 2089 #ifndef PRODUCT 2090 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const { 2091 st->print("# breakpoint"); 2092 } 2093 #endif 2094 2095 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const { 2096 MacroAssembler _masm(&cbuf); 2097 __ int3(); 2098 } 2099 2100 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const { 2101 return MachNode::size(ra_); 2102 } 2103 2104 %} 2105 2106 encode %{ 2107 2108 enc_class call_epilog %{ 2109 if (VerifyStackAtCalls) { 2110 // Check that stack depth is unchanged: find majik cookie on stack 2111 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word)); 2112 MacroAssembler _masm(&cbuf); 2113 Label L; 2114 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d); 2115 __ jccb(Assembler::equal, L); 2116 // Die if stack mismatch 2117 __ int3(); 2118 __ bind(L); 2119 } 2120 %} 2121 2122 %} 2123 2124 2125 //----------OPERANDS----------------------------------------------------------- 2126 // Operand definitions must precede instruction definitions for correct parsing 2127 // in the ADLC because operands constitute user defined types which are used in 2128 // instruction definitions. 2129 2130 // This one generically applies only for evex, so only one version 2131 operand vecZ() %{ 2132 constraint(ALLOC_IN_RC(vectorz_reg)); 2133 match(VecZ); 2134 2135 format %{ %} 2136 interface(REG_INTER); 2137 %} 2138 2139 // Comparison Code for FP conditional move 2140 operand cmpOp_vcmppd() %{ 2141 match(Bool); 2142 2143 predicate(n->as_Bool()->_test._test != BoolTest::overflow && 2144 n->as_Bool()->_test._test != BoolTest::no_overflow); 2145 format %{ "" %} 2146 interface(COND_INTER) %{ 2147 equal (0x0, "eq"); 2148 less (0x1, "lt"); 2149 less_equal (0x2, "le"); 2150 not_equal (0xC, "ne"); 2151 greater_equal(0xD, "ge"); 2152 greater (0xE, "gt"); 2153 //TODO cannot compile (adlc breaks) without two next lines with error: 2154 // x86_64.ad(13987) Syntax Error: :In operand cmpOp_vcmppd: Do not support this encode constant: ' %{ 2155 // equal' for overflow. 2156 overflow (0x20, "o"); // not really supported by the instruction 2157 no_overflow (0x21, "no"); // not really supported by the instruction 2158 %} 2159 %} 2160 2161 2162 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit) 2163 2164 // ============================================================================ 2165 2166 instruct ShouldNotReachHere() %{ 2167 match(Halt); 2168 format %{ "int3\t# ShouldNotReachHere" %} 2169 ins_encode %{ 2170 __ int3(); 2171 %} 2172 ins_pipe(pipe_slow); 2173 %} 2174 2175 // ============================================================================ 2176 2177 instruct addF_reg(regF dst, regF src) %{ 2178 predicate((UseSSE>=1) && (UseAVX == 0)); 2179 match(Set dst (AddF dst src)); 2180 2181 format %{ "addss $dst, $src" %} 2182 ins_cost(150); 2183 ins_encode %{ 2184 __ addss($dst$$XMMRegister, $src$$XMMRegister); 2185 %} 2186 ins_pipe(pipe_slow); 2187 %} 2188 2189 instruct addF_mem(regF dst, memory src) %{ 2190 predicate((UseSSE>=1) && (UseAVX == 0)); 2191 match(Set dst (AddF dst (LoadF src))); 2192 2193 format %{ "addss $dst, $src" %} 2194 ins_cost(150); 2195 ins_encode %{ 2196 __ addss($dst$$XMMRegister, $src$$Address); 2197 %} 2198 ins_pipe(pipe_slow); 2199 %} 2200 2201 instruct addF_imm(regF dst, immF con) %{ 2202 predicate((UseSSE>=1) && (UseAVX == 0)); 2203 match(Set dst (AddF dst con)); 2204 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2205 ins_cost(150); 2206 ins_encode %{ 2207 __ addss($dst$$XMMRegister, $constantaddress($con)); 2208 %} 2209 ins_pipe(pipe_slow); 2210 %} 2211 2212 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{ 2213 predicate(UseAVX > 0); 2214 match(Set dst (AddF src1 src2)); 2215 2216 format %{ "vaddss $dst, $src1, $src2" %} 2217 ins_cost(150); 2218 ins_encode %{ 2219 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2220 %} 2221 ins_pipe(pipe_slow); 2222 %} 2223 2224 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{ 2225 predicate(UseAVX > 0); 2226 match(Set dst (AddF src1 (LoadF src2))); 2227 2228 format %{ "vaddss $dst, $src1, $src2" %} 2229 ins_cost(150); 2230 ins_encode %{ 2231 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2232 %} 2233 ins_pipe(pipe_slow); 2234 %} 2235 2236 instruct addF_reg_imm(regF dst, regF src, immF con) %{ 2237 predicate(UseAVX > 0); 2238 match(Set dst (AddF src con)); 2239 2240 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2241 ins_cost(150); 2242 ins_encode %{ 2243 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2244 %} 2245 ins_pipe(pipe_slow); 2246 %} 2247 2248 instruct addD_reg(regD dst, regD src) %{ 2249 predicate((UseSSE>=2) && (UseAVX == 0)); 2250 match(Set dst (AddD dst src)); 2251 2252 format %{ "addsd $dst, $src" %} 2253 ins_cost(150); 2254 ins_encode %{ 2255 __ addsd($dst$$XMMRegister, $src$$XMMRegister); 2256 %} 2257 ins_pipe(pipe_slow); 2258 %} 2259 2260 instruct addD_mem(regD dst, memory src) %{ 2261 predicate((UseSSE>=2) && (UseAVX == 0)); 2262 match(Set dst (AddD dst (LoadD src))); 2263 2264 format %{ "addsd $dst, $src" %} 2265 ins_cost(150); 2266 ins_encode %{ 2267 __ addsd($dst$$XMMRegister, $src$$Address); 2268 %} 2269 ins_pipe(pipe_slow); 2270 %} 2271 2272 instruct addD_imm(regD dst, immD con) %{ 2273 predicate((UseSSE>=2) && (UseAVX == 0)); 2274 match(Set dst (AddD dst con)); 2275 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2276 ins_cost(150); 2277 ins_encode %{ 2278 __ addsd($dst$$XMMRegister, $constantaddress($con)); 2279 %} 2280 ins_pipe(pipe_slow); 2281 %} 2282 2283 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{ 2284 predicate(UseAVX > 0); 2285 match(Set dst (AddD src1 src2)); 2286 2287 format %{ "vaddsd $dst, $src1, $src2" %} 2288 ins_cost(150); 2289 ins_encode %{ 2290 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2291 %} 2292 ins_pipe(pipe_slow); 2293 %} 2294 2295 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{ 2296 predicate(UseAVX > 0); 2297 match(Set dst (AddD src1 (LoadD src2))); 2298 2299 format %{ "vaddsd $dst, $src1, $src2" %} 2300 ins_cost(150); 2301 ins_encode %{ 2302 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2303 %} 2304 ins_pipe(pipe_slow); 2305 %} 2306 2307 instruct addD_reg_imm(regD dst, regD src, immD con) %{ 2308 predicate(UseAVX > 0); 2309 match(Set dst (AddD src con)); 2310 2311 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2312 ins_cost(150); 2313 ins_encode %{ 2314 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2315 %} 2316 ins_pipe(pipe_slow); 2317 %} 2318 2319 instruct subF_reg(regF dst, regF src) %{ 2320 predicate((UseSSE>=1) && (UseAVX == 0)); 2321 match(Set dst (SubF dst src)); 2322 2323 format %{ "subss $dst, $src" %} 2324 ins_cost(150); 2325 ins_encode %{ 2326 __ subss($dst$$XMMRegister, $src$$XMMRegister); 2327 %} 2328 ins_pipe(pipe_slow); 2329 %} 2330 2331 instruct subF_mem(regF dst, memory src) %{ 2332 predicate((UseSSE>=1) && (UseAVX == 0)); 2333 match(Set dst (SubF dst (LoadF src))); 2334 2335 format %{ "subss $dst, $src" %} 2336 ins_cost(150); 2337 ins_encode %{ 2338 __ subss($dst$$XMMRegister, $src$$Address); 2339 %} 2340 ins_pipe(pipe_slow); 2341 %} 2342 2343 instruct subF_imm(regF dst, immF con) %{ 2344 predicate((UseSSE>=1) && (UseAVX == 0)); 2345 match(Set dst (SubF dst con)); 2346 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2347 ins_cost(150); 2348 ins_encode %{ 2349 __ subss($dst$$XMMRegister, $constantaddress($con)); 2350 %} 2351 ins_pipe(pipe_slow); 2352 %} 2353 2354 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{ 2355 predicate(UseAVX > 0); 2356 match(Set dst (SubF src1 src2)); 2357 2358 format %{ "vsubss $dst, $src1, $src2" %} 2359 ins_cost(150); 2360 ins_encode %{ 2361 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2362 %} 2363 ins_pipe(pipe_slow); 2364 %} 2365 2366 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{ 2367 predicate(UseAVX > 0); 2368 match(Set dst (SubF src1 (LoadF src2))); 2369 2370 format %{ "vsubss $dst, $src1, $src2" %} 2371 ins_cost(150); 2372 ins_encode %{ 2373 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2374 %} 2375 ins_pipe(pipe_slow); 2376 %} 2377 2378 instruct subF_reg_imm(regF dst, regF src, immF con) %{ 2379 predicate(UseAVX > 0); 2380 match(Set dst (SubF src con)); 2381 2382 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2383 ins_cost(150); 2384 ins_encode %{ 2385 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2386 %} 2387 ins_pipe(pipe_slow); 2388 %} 2389 2390 instruct subD_reg(regD dst, regD src) %{ 2391 predicate((UseSSE>=2) && (UseAVX == 0)); 2392 match(Set dst (SubD dst src)); 2393 2394 format %{ "subsd $dst, $src" %} 2395 ins_cost(150); 2396 ins_encode %{ 2397 __ subsd($dst$$XMMRegister, $src$$XMMRegister); 2398 %} 2399 ins_pipe(pipe_slow); 2400 %} 2401 2402 instruct subD_mem(regD dst, memory src) %{ 2403 predicate((UseSSE>=2) && (UseAVX == 0)); 2404 match(Set dst (SubD dst (LoadD src))); 2405 2406 format %{ "subsd $dst, $src" %} 2407 ins_cost(150); 2408 ins_encode %{ 2409 __ subsd($dst$$XMMRegister, $src$$Address); 2410 %} 2411 ins_pipe(pipe_slow); 2412 %} 2413 2414 instruct subD_imm(regD dst, immD con) %{ 2415 predicate((UseSSE>=2) && (UseAVX == 0)); 2416 match(Set dst (SubD dst con)); 2417 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2418 ins_cost(150); 2419 ins_encode %{ 2420 __ subsd($dst$$XMMRegister, $constantaddress($con)); 2421 %} 2422 ins_pipe(pipe_slow); 2423 %} 2424 2425 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{ 2426 predicate(UseAVX > 0); 2427 match(Set dst (SubD src1 src2)); 2428 2429 format %{ "vsubsd $dst, $src1, $src2" %} 2430 ins_cost(150); 2431 ins_encode %{ 2432 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2433 %} 2434 ins_pipe(pipe_slow); 2435 %} 2436 2437 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{ 2438 predicate(UseAVX > 0); 2439 match(Set dst (SubD src1 (LoadD src2))); 2440 2441 format %{ "vsubsd $dst, $src1, $src2" %} 2442 ins_cost(150); 2443 ins_encode %{ 2444 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2445 %} 2446 ins_pipe(pipe_slow); 2447 %} 2448 2449 instruct subD_reg_imm(regD dst, regD src, immD con) %{ 2450 predicate(UseAVX > 0); 2451 match(Set dst (SubD src con)); 2452 2453 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2454 ins_cost(150); 2455 ins_encode %{ 2456 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2457 %} 2458 ins_pipe(pipe_slow); 2459 %} 2460 2461 instruct mulF_reg(regF dst, regF src) %{ 2462 predicate((UseSSE>=1) && (UseAVX == 0)); 2463 match(Set dst (MulF dst src)); 2464 2465 format %{ "mulss $dst, $src" %} 2466 ins_cost(150); 2467 ins_encode %{ 2468 __ mulss($dst$$XMMRegister, $src$$XMMRegister); 2469 %} 2470 ins_pipe(pipe_slow); 2471 %} 2472 2473 instruct mulF_mem(regF dst, memory src) %{ 2474 predicate((UseSSE>=1) && (UseAVX == 0)); 2475 match(Set dst (MulF dst (LoadF src))); 2476 2477 format %{ "mulss $dst, $src" %} 2478 ins_cost(150); 2479 ins_encode %{ 2480 __ mulss($dst$$XMMRegister, $src$$Address); 2481 %} 2482 ins_pipe(pipe_slow); 2483 %} 2484 2485 instruct mulF_imm(regF dst, immF con) %{ 2486 predicate((UseSSE>=1) && (UseAVX == 0)); 2487 match(Set dst (MulF dst con)); 2488 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2489 ins_cost(150); 2490 ins_encode %{ 2491 __ mulss($dst$$XMMRegister, $constantaddress($con)); 2492 %} 2493 ins_pipe(pipe_slow); 2494 %} 2495 2496 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{ 2497 predicate(UseAVX > 0); 2498 match(Set dst (MulF src1 src2)); 2499 2500 format %{ "vmulss $dst, $src1, $src2" %} 2501 ins_cost(150); 2502 ins_encode %{ 2503 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2504 %} 2505 ins_pipe(pipe_slow); 2506 %} 2507 2508 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{ 2509 predicate(UseAVX > 0); 2510 match(Set dst (MulF src1 (LoadF src2))); 2511 2512 format %{ "vmulss $dst, $src1, $src2" %} 2513 ins_cost(150); 2514 ins_encode %{ 2515 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2516 %} 2517 ins_pipe(pipe_slow); 2518 %} 2519 2520 instruct mulF_reg_imm(regF dst, regF src, immF con) %{ 2521 predicate(UseAVX > 0); 2522 match(Set dst (MulF src con)); 2523 2524 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2525 ins_cost(150); 2526 ins_encode %{ 2527 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2528 %} 2529 ins_pipe(pipe_slow); 2530 %} 2531 2532 instruct mulD_reg(regD dst, regD src) %{ 2533 predicate((UseSSE>=2) && (UseAVX == 0)); 2534 match(Set dst (MulD dst src)); 2535 2536 format %{ "mulsd $dst, $src" %} 2537 ins_cost(150); 2538 ins_encode %{ 2539 __ mulsd($dst$$XMMRegister, $src$$XMMRegister); 2540 %} 2541 ins_pipe(pipe_slow); 2542 %} 2543 2544 instruct mulD_mem(regD dst, memory src) %{ 2545 predicate((UseSSE>=2) && (UseAVX == 0)); 2546 match(Set dst (MulD dst (LoadD src))); 2547 2548 format %{ "mulsd $dst, $src" %} 2549 ins_cost(150); 2550 ins_encode %{ 2551 __ mulsd($dst$$XMMRegister, $src$$Address); 2552 %} 2553 ins_pipe(pipe_slow); 2554 %} 2555 2556 instruct mulD_imm(regD dst, immD con) %{ 2557 predicate((UseSSE>=2) && (UseAVX == 0)); 2558 match(Set dst (MulD dst con)); 2559 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2560 ins_cost(150); 2561 ins_encode %{ 2562 __ mulsd($dst$$XMMRegister, $constantaddress($con)); 2563 %} 2564 ins_pipe(pipe_slow); 2565 %} 2566 2567 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{ 2568 predicate(UseAVX > 0); 2569 match(Set dst (MulD src1 src2)); 2570 2571 format %{ "vmulsd $dst, $src1, $src2" %} 2572 ins_cost(150); 2573 ins_encode %{ 2574 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2575 %} 2576 ins_pipe(pipe_slow); 2577 %} 2578 2579 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{ 2580 predicate(UseAVX > 0); 2581 match(Set dst (MulD src1 (LoadD src2))); 2582 2583 format %{ "vmulsd $dst, $src1, $src2" %} 2584 ins_cost(150); 2585 ins_encode %{ 2586 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2587 %} 2588 ins_pipe(pipe_slow); 2589 %} 2590 2591 instruct mulD_reg_imm(regD dst, regD src, immD con) %{ 2592 predicate(UseAVX > 0); 2593 match(Set dst (MulD src con)); 2594 2595 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2596 ins_cost(150); 2597 ins_encode %{ 2598 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2599 %} 2600 ins_pipe(pipe_slow); 2601 %} 2602 2603 instruct divF_reg(regF dst, regF src) %{ 2604 predicate((UseSSE>=1) && (UseAVX == 0)); 2605 match(Set dst (DivF dst src)); 2606 2607 format %{ "divss $dst, $src" %} 2608 ins_cost(150); 2609 ins_encode %{ 2610 __ divss($dst$$XMMRegister, $src$$XMMRegister); 2611 %} 2612 ins_pipe(pipe_slow); 2613 %} 2614 2615 instruct divF_mem(regF dst, memory src) %{ 2616 predicate((UseSSE>=1) && (UseAVX == 0)); 2617 match(Set dst (DivF dst (LoadF src))); 2618 2619 format %{ "divss $dst, $src" %} 2620 ins_cost(150); 2621 ins_encode %{ 2622 __ divss($dst$$XMMRegister, $src$$Address); 2623 %} 2624 ins_pipe(pipe_slow); 2625 %} 2626 2627 instruct divF_imm(regF dst, immF con) %{ 2628 predicate((UseSSE>=1) && (UseAVX == 0)); 2629 match(Set dst (DivF dst con)); 2630 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2631 ins_cost(150); 2632 ins_encode %{ 2633 __ divss($dst$$XMMRegister, $constantaddress($con)); 2634 %} 2635 ins_pipe(pipe_slow); 2636 %} 2637 2638 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{ 2639 predicate(UseAVX > 0); 2640 match(Set dst (DivF src1 src2)); 2641 2642 format %{ "vdivss $dst, $src1, $src2" %} 2643 ins_cost(150); 2644 ins_encode %{ 2645 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2646 %} 2647 ins_pipe(pipe_slow); 2648 %} 2649 2650 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{ 2651 predicate(UseAVX > 0); 2652 match(Set dst (DivF src1 (LoadF src2))); 2653 2654 format %{ "vdivss $dst, $src1, $src2" %} 2655 ins_cost(150); 2656 ins_encode %{ 2657 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2658 %} 2659 ins_pipe(pipe_slow); 2660 %} 2661 2662 instruct divF_reg_imm(regF dst, regF src, immF con) %{ 2663 predicate(UseAVX > 0); 2664 match(Set dst (DivF src con)); 2665 2666 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2667 ins_cost(150); 2668 ins_encode %{ 2669 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2670 %} 2671 ins_pipe(pipe_slow); 2672 %} 2673 2674 instruct divD_reg(regD dst, regD src) %{ 2675 predicate((UseSSE>=2) && (UseAVX == 0)); 2676 match(Set dst (DivD dst src)); 2677 2678 format %{ "divsd $dst, $src" %} 2679 ins_cost(150); 2680 ins_encode %{ 2681 __ divsd($dst$$XMMRegister, $src$$XMMRegister); 2682 %} 2683 ins_pipe(pipe_slow); 2684 %} 2685 2686 instruct divD_mem(regD dst, memory src) %{ 2687 predicate((UseSSE>=2) && (UseAVX == 0)); 2688 match(Set dst (DivD dst (LoadD src))); 2689 2690 format %{ "divsd $dst, $src" %} 2691 ins_cost(150); 2692 ins_encode %{ 2693 __ divsd($dst$$XMMRegister, $src$$Address); 2694 %} 2695 ins_pipe(pipe_slow); 2696 %} 2697 2698 instruct divD_imm(regD dst, immD con) %{ 2699 predicate((UseSSE>=2) && (UseAVX == 0)); 2700 match(Set dst (DivD dst con)); 2701 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2702 ins_cost(150); 2703 ins_encode %{ 2704 __ divsd($dst$$XMMRegister, $constantaddress($con)); 2705 %} 2706 ins_pipe(pipe_slow); 2707 %} 2708 2709 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{ 2710 predicate(UseAVX > 0); 2711 match(Set dst (DivD src1 src2)); 2712 2713 format %{ "vdivsd $dst, $src1, $src2" %} 2714 ins_cost(150); 2715 ins_encode %{ 2716 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2717 %} 2718 ins_pipe(pipe_slow); 2719 %} 2720 2721 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{ 2722 predicate(UseAVX > 0); 2723 match(Set dst (DivD src1 (LoadD src2))); 2724 2725 format %{ "vdivsd $dst, $src1, $src2" %} 2726 ins_cost(150); 2727 ins_encode %{ 2728 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2729 %} 2730 ins_pipe(pipe_slow); 2731 %} 2732 2733 instruct divD_reg_imm(regD dst, regD src, immD con) %{ 2734 predicate(UseAVX > 0); 2735 match(Set dst (DivD src con)); 2736 2737 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2738 ins_cost(150); 2739 ins_encode %{ 2740 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2741 %} 2742 ins_pipe(pipe_slow); 2743 %} 2744 2745 instruct absF_reg(regF dst) %{ 2746 predicate((UseSSE>=1) && (UseAVX == 0)); 2747 match(Set dst (AbsF dst)); 2748 ins_cost(150); 2749 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %} 2750 ins_encode %{ 2751 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask())); 2752 %} 2753 ins_pipe(pipe_slow); 2754 %} 2755 2756 instruct absF_reg_reg(regF dst, regF src) %{ 2757 predicate(VM_Version::supports_avxonly()); 2758 match(Set dst (AbsF src)); 2759 ins_cost(150); 2760 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 2761 ins_encode %{ 2762 int vector_len = 0; 2763 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 2764 ExternalAddress(float_signmask()), vector_len); 2765 %} 2766 ins_pipe(pipe_slow); 2767 %} 2768 2769 #ifdef _LP64 2770 instruct absF_reg_reg_evex(regF dst, regF src) %{ 2771 predicate(UseAVX > 2 && VM_Version::supports_avx512vl()); 2772 match(Set dst (AbsF src)); 2773 ins_cost(150); 2774 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 2775 ins_encode %{ 2776 int vector_len = 0; 2777 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 2778 ExternalAddress(float_signmask()), vector_len); 2779 %} 2780 ins_pipe(pipe_slow); 2781 %} 2782 2783 instruct absF_reg_reg_evex_special(regF dst, regF src1, regF src2) %{ 2784 predicate(VM_Version::supports_avx512novl()); 2785 match(Set dst (AbsF src1)); 2786 effect(TEMP src2); 2787 ins_cost(150); 2788 format %{ "vabsss $dst, $src1, $src2, [0x7fffffff]\t# abs float by sign masking" %} 2789 ins_encode %{ 2790 int vector_len = 0; 2791 __ vabsss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 2792 ExternalAddress(float_signmask()), vector_len); 2793 %} 2794 ins_pipe(pipe_slow); 2795 %} 2796 #else // _LP64 2797 instruct absF_reg_reg_evex(regF dst, regF src) %{ 2798 predicate(UseAVX > 2); 2799 match(Set dst (AbsF src)); 2800 ins_cost(150); 2801 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 2802 ins_encode %{ 2803 int vector_len = 0; 2804 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 2805 ExternalAddress(float_signmask()), vector_len); 2806 %} 2807 ins_pipe(pipe_slow); 2808 %} 2809 #endif 2810 2811 instruct absD_reg(regD dst) %{ 2812 predicate((UseSSE>=2) && (UseAVX == 0)); 2813 match(Set dst (AbsD dst)); 2814 ins_cost(150); 2815 format %{ "andpd $dst, [0x7fffffffffffffff]\t" 2816 "# abs double by sign masking" %} 2817 ins_encode %{ 2818 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask())); 2819 %} 2820 ins_pipe(pipe_slow); 2821 %} 2822 2823 instruct absD_reg_reg(regD dst, regD src) %{ 2824 predicate(VM_Version::supports_avxonly()); 2825 match(Set dst (AbsD src)); 2826 ins_cost(150); 2827 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 2828 "# abs double by sign masking" %} 2829 ins_encode %{ 2830 int vector_len = 0; 2831 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 2832 ExternalAddress(double_signmask()), vector_len); 2833 %} 2834 ins_pipe(pipe_slow); 2835 %} 2836 2837 #ifdef _LP64 2838 instruct absD_reg_reg_evex(regD dst, regD src) %{ 2839 predicate(UseAVX > 2 && VM_Version::supports_avx512vl()); 2840 match(Set dst (AbsD src)); 2841 ins_cost(150); 2842 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 2843 "# abs double by sign masking" %} 2844 ins_encode %{ 2845 int vector_len = 0; 2846 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 2847 ExternalAddress(double_signmask()), vector_len); 2848 %} 2849 ins_pipe(pipe_slow); 2850 %} 2851 2852 instruct absD_reg_reg_evex_special(regD dst, regD src1, regD src2) %{ 2853 predicate(VM_Version::supports_avx512novl()); 2854 match(Set dst (AbsD src1)); 2855 effect(TEMP src2); 2856 ins_cost(150); 2857 format %{ "vabssd $dst, $src1, $src2, [0x7fffffffffffffff]\t# abs float by sign masking" %} 2858 ins_encode %{ 2859 int vector_len = 0; 2860 __ vabssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 2861 ExternalAddress(double_signmask()), vector_len); 2862 %} 2863 ins_pipe(pipe_slow); 2864 %} 2865 #else // _LP64 2866 instruct absD_reg_reg_evex(regD dst, regD src) %{ 2867 predicate(UseAVX > 2); 2868 match(Set dst (AbsD src)); 2869 ins_cost(150); 2870 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 2871 "# abs double by sign masking" %} 2872 ins_encode %{ 2873 int vector_len = 0; 2874 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 2875 ExternalAddress(double_signmask()), vector_len); 2876 %} 2877 ins_pipe(pipe_slow); 2878 %} 2879 #endif 2880 2881 instruct negF_reg(regF dst) %{ 2882 predicate((UseSSE>=1) && (UseAVX == 0)); 2883 match(Set dst (NegF dst)); 2884 ins_cost(150); 2885 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %} 2886 ins_encode %{ 2887 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip())); 2888 %} 2889 ins_pipe(pipe_slow); 2890 %} 2891 2892 instruct negF_reg_reg(regF dst, regF src) %{ 2893 predicate(UseAVX > 0); 2894 match(Set dst (NegF src)); 2895 ins_cost(150); 2896 format %{ "vnegatess $dst, $src, [0x80000000]\t# neg float by sign flipping" %} 2897 ins_encode %{ 2898 __ vnegatess($dst$$XMMRegister, $src$$XMMRegister, 2899 ExternalAddress(float_signflip())); 2900 %} 2901 ins_pipe(pipe_slow); 2902 %} 2903 2904 instruct negD_reg(regD dst) %{ 2905 predicate((UseSSE>=2) && (UseAVX == 0)); 2906 match(Set dst (NegD dst)); 2907 ins_cost(150); 2908 format %{ "xorpd $dst, [0x8000000000000000]\t" 2909 "# neg double by sign flipping" %} 2910 ins_encode %{ 2911 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip())); 2912 %} 2913 ins_pipe(pipe_slow); 2914 %} 2915 2916 instruct negD_reg_reg(regD dst, regD src) %{ 2917 predicate(UseAVX > 0); 2918 match(Set dst (NegD src)); 2919 ins_cost(150); 2920 format %{ "vnegatess $dst, $src, [0x8000000000000000]\t" 2921 "# neg double by sign flipping" %} 2922 ins_encode %{ 2923 __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister, 2924 ExternalAddress(double_signflip())); 2925 %} 2926 ins_pipe(pipe_slow); 2927 %} 2928 2929 instruct sqrtF_reg(regF dst, regF src) %{ 2930 predicate(UseSSE>=1); 2931 match(Set dst (ConvD2F (SqrtD (ConvF2D src)))); 2932 2933 format %{ "sqrtss $dst, $src" %} 2934 ins_cost(150); 2935 ins_encode %{ 2936 __ sqrtss($dst$$XMMRegister, $src$$XMMRegister); 2937 %} 2938 ins_pipe(pipe_slow); 2939 %} 2940 2941 instruct sqrtF_mem(regF dst, memory src) %{ 2942 predicate(UseSSE>=1); 2943 match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF src))))); 2944 2945 format %{ "sqrtss $dst, $src" %} 2946 ins_cost(150); 2947 ins_encode %{ 2948 __ sqrtss($dst$$XMMRegister, $src$$Address); 2949 %} 2950 ins_pipe(pipe_slow); 2951 %} 2952 2953 instruct sqrtF_imm(regF dst, immF con) %{ 2954 predicate(UseSSE>=1); 2955 match(Set dst (ConvD2F (SqrtD (ConvF2D con)))); 2956 format %{ "sqrtss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2957 ins_cost(150); 2958 ins_encode %{ 2959 __ sqrtss($dst$$XMMRegister, $constantaddress($con)); 2960 %} 2961 ins_pipe(pipe_slow); 2962 %} 2963 2964 instruct sqrtD_reg(regD dst, regD src) %{ 2965 predicate(UseSSE>=2); 2966 match(Set dst (SqrtD src)); 2967 2968 format %{ "sqrtsd $dst, $src" %} 2969 ins_cost(150); 2970 ins_encode %{ 2971 __ sqrtsd($dst$$XMMRegister, $src$$XMMRegister); 2972 %} 2973 ins_pipe(pipe_slow); 2974 %} 2975 2976 instruct sqrtD_mem(regD dst, memory src) %{ 2977 predicate(UseSSE>=2); 2978 match(Set dst (SqrtD (LoadD src))); 2979 2980 format %{ "sqrtsd $dst, $src" %} 2981 ins_cost(150); 2982 ins_encode %{ 2983 __ sqrtsd($dst$$XMMRegister, $src$$Address); 2984 %} 2985 ins_pipe(pipe_slow); 2986 %} 2987 2988 instruct sqrtD_imm(regD dst, immD con) %{ 2989 predicate(UseSSE>=2); 2990 match(Set dst (SqrtD con)); 2991 format %{ "sqrtsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2992 ins_cost(150); 2993 ins_encode %{ 2994 __ sqrtsd($dst$$XMMRegister, $constantaddress($con)); 2995 %} 2996 ins_pipe(pipe_slow); 2997 %} 2998 2999 // ====================VECTOR INSTRUCTIONS===================================== 3000 3001 // Load vectors (4 bytes long) 3002 instruct loadV4(vecS dst, memory mem) %{ 3003 predicate(n->as_LoadVector()->memory_size() == 4); 3004 match(Set dst (LoadVector mem)); 3005 ins_cost(125); 3006 format %{ "movd $dst,$mem\t! load vector (4 bytes)" %} 3007 ins_encode %{ 3008 __ movdl($dst$$XMMRegister, $mem$$Address); 3009 %} 3010 ins_pipe( pipe_slow ); 3011 %} 3012 3013 // Load vectors (8 bytes long) 3014 instruct loadV8(vecD dst, memory mem) %{ 3015 predicate(n->as_LoadVector()->memory_size() == 8); 3016 match(Set dst (LoadVector mem)); 3017 ins_cost(125); 3018 format %{ "movq $dst,$mem\t! load vector (8 bytes)" %} 3019 ins_encode %{ 3020 __ movq($dst$$XMMRegister, $mem$$Address); 3021 %} 3022 ins_pipe( pipe_slow ); 3023 %} 3024 3025 // Load vectors (16 bytes long) 3026 instruct loadV16(vecX dst, memory mem) %{ 3027 predicate(n->as_LoadVector()->memory_size() == 16); 3028 match(Set dst (LoadVector mem)); 3029 ins_cost(125); 3030 format %{ "movdqu $dst,$mem\t! load vector (16 bytes)" %} 3031 ins_encode %{ 3032 __ movdqu($dst$$XMMRegister, $mem$$Address); 3033 %} 3034 ins_pipe( pipe_slow ); 3035 %} 3036 3037 // Load vectors (32 bytes long) 3038 instruct loadV32(vecY dst, memory mem) %{ 3039 predicate(n->as_LoadVector()->memory_size() == 32); 3040 match(Set dst (LoadVector mem)); 3041 ins_cost(125); 3042 format %{ "vmovdqu $dst,$mem\t! load vector (32 bytes)" %} 3043 ins_encode %{ 3044 __ vmovdqu($dst$$XMMRegister, $mem$$Address); 3045 %} 3046 ins_pipe( pipe_slow ); 3047 %} 3048 3049 // Load vectors (64 bytes long) 3050 instruct loadV64(vecZ dst, memory mem) %{ 3051 predicate(n->as_LoadVector()->memory_size() == 64); 3052 match(Set dst (LoadVector mem)); 3053 ins_cost(125); 3054 format %{ "vmovdqu $dst k0,$mem\t! load vector (64 bytes)" %} 3055 ins_encode %{ 3056 int vector_len = 2; 3057 __ evmovdqul($dst$$XMMRegister, $mem$$Address, vector_len); 3058 %} 3059 ins_pipe( pipe_slow ); 3060 %} 3061 3062 // Store vectors 3063 instruct storeV4(memory mem, vecS src) %{ 3064 predicate(n->as_StoreVector()->memory_size() == 4); 3065 match(Set mem (StoreVector mem src)); 3066 ins_cost(145); 3067 format %{ "movd $mem,$src\t! store vector (4 bytes)" %} 3068 ins_encode %{ 3069 __ movdl($mem$$Address, $src$$XMMRegister); 3070 %} 3071 ins_pipe( pipe_slow ); 3072 %} 3073 3074 instruct storeV8(memory mem, vecD src) %{ 3075 predicate(n->as_StoreVector()->memory_size() == 8); 3076 match(Set mem (StoreVector mem src)); 3077 ins_cost(145); 3078 format %{ "movq $mem,$src\t! store vector (8 bytes)" %} 3079 ins_encode %{ 3080 __ movq($mem$$Address, $src$$XMMRegister); 3081 %} 3082 ins_pipe( pipe_slow ); 3083 %} 3084 3085 instruct storeV16(memory mem, vecX src) %{ 3086 predicate(n->as_StoreVector()->memory_size() == 16); 3087 match(Set mem (StoreVector mem src)); 3088 ins_cost(145); 3089 format %{ "movdqu $mem,$src\t! store vector (16 bytes)" %} 3090 ins_encode %{ 3091 __ movdqu($mem$$Address, $src$$XMMRegister); 3092 %} 3093 ins_pipe( pipe_slow ); 3094 %} 3095 3096 instruct storeV32(memory mem, vecY src) %{ 3097 predicate(n->as_StoreVector()->memory_size() == 32); 3098 match(Set mem (StoreVector mem src)); 3099 ins_cost(145); 3100 format %{ "vmovdqu $mem,$src\t! store vector (32 bytes)" %} 3101 ins_encode %{ 3102 __ vmovdqu($mem$$Address, $src$$XMMRegister); 3103 %} 3104 ins_pipe( pipe_slow ); 3105 %} 3106 3107 instruct storeV64(memory mem, vecZ src) %{ 3108 predicate(n->as_StoreVector()->memory_size() == 64); 3109 match(Set mem (StoreVector mem src)); 3110 ins_cost(145); 3111 format %{ "vmovdqu $mem k0,$src\t! store vector (64 bytes)" %} 3112 ins_encode %{ 3113 int vector_len = 2; 3114 __ evmovdqul($mem$$Address, $src$$XMMRegister, vector_len); 3115 %} 3116 ins_pipe( pipe_slow ); 3117 %} 3118 3119 // ====================LEGACY REPLICATE======================================= 3120 3121 instruct Repl4B_mem(vecS dst, memory mem) %{ 3122 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3123 match(Set dst (ReplicateB (LoadB mem))); 3124 format %{ "punpcklbw $dst,$mem\n\t" 3125 "pshuflw $dst,$dst,0x00\t! replicate4B" %} 3126 ins_encode %{ 3127 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 3128 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3129 %} 3130 ins_pipe( pipe_slow ); 3131 %} 3132 3133 instruct Repl8B_mem(vecD dst, memory mem) %{ 3134 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3135 match(Set dst (ReplicateB (LoadB mem))); 3136 format %{ "punpcklbw $dst,$mem\n\t" 3137 "pshuflw $dst,$dst,0x00\t! replicate8B" %} 3138 ins_encode %{ 3139 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 3140 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3141 %} 3142 ins_pipe( pipe_slow ); 3143 %} 3144 3145 instruct Repl16B(vecX dst, rRegI src) %{ 3146 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3147 match(Set dst (ReplicateB src)); 3148 format %{ "movd $dst,$src\n\t" 3149 "punpcklbw $dst,$dst\n\t" 3150 "pshuflw $dst,$dst,0x00\n\t" 3151 "punpcklqdq $dst,$dst\t! replicate16B" %} 3152 ins_encode %{ 3153 __ movdl($dst$$XMMRegister, $src$$Register); 3154 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3155 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3156 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3157 %} 3158 ins_pipe( pipe_slow ); 3159 %} 3160 3161 instruct Repl16B_mem(vecX dst, memory mem) %{ 3162 predicate(n->as_Vector()->length() == 16 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3163 match(Set dst (ReplicateB (LoadB mem))); 3164 format %{ "punpcklbw $dst,$mem\n\t" 3165 "pshuflw $dst,$dst,0x00\n\t" 3166 "punpcklqdq $dst,$dst\t! replicate16B" %} 3167 ins_encode %{ 3168 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 3169 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3170 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3171 %} 3172 ins_pipe( pipe_slow ); 3173 %} 3174 3175 instruct Repl32B(vecY dst, rRegI src) %{ 3176 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3177 match(Set dst (ReplicateB src)); 3178 format %{ "movd $dst,$src\n\t" 3179 "punpcklbw $dst,$dst\n\t" 3180 "pshuflw $dst,$dst,0x00\n\t" 3181 "punpcklqdq $dst,$dst\n\t" 3182 "vinserti128h $dst,$dst,$dst\t! replicate32B" %} 3183 ins_encode %{ 3184 __ movdl($dst$$XMMRegister, $src$$Register); 3185 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3186 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3187 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3188 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3189 %} 3190 ins_pipe( pipe_slow ); 3191 %} 3192 3193 instruct Repl32B_mem(vecY dst, memory mem) %{ 3194 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3195 match(Set dst (ReplicateB (LoadB mem))); 3196 format %{ "punpcklbw $dst,$mem\n\t" 3197 "pshuflw $dst,$dst,0x00\n\t" 3198 "punpcklqdq $dst,$dst\n\t" 3199 "vinserti128h $dst,$dst,$dst\t! replicate32B" %} 3200 ins_encode %{ 3201 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 3202 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3203 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3204 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3205 %} 3206 ins_pipe( pipe_slow ); 3207 %} 3208 3209 instruct Repl16B_imm(vecX dst, immI con) %{ 3210 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3211 match(Set dst (ReplicateB con)); 3212 format %{ "movq $dst,[$constantaddress]\n\t" 3213 "punpcklqdq $dst,$dst\t! replicate16B($con)" %} 3214 ins_encode %{ 3215 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3216 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3217 %} 3218 ins_pipe( pipe_slow ); 3219 %} 3220 3221 instruct Repl32B_imm(vecY dst, immI con) %{ 3222 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3223 match(Set dst (ReplicateB con)); 3224 format %{ "movq $dst,[$constantaddress]\n\t" 3225 "punpcklqdq $dst,$dst\n\t" 3226 "vinserti128h $dst,$dst,$dst\t! lreplicate32B($con)" %} 3227 ins_encode %{ 3228 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3229 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3230 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3231 %} 3232 ins_pipe( pipe_slow ); 3233 %} 3234 3235 instruct Repl4S(vecD dst, rRegI src) %{ 3236 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vlbw()); 3237 match(Set dst (ReplicateS src)); 3238 format %{ "movd $dst,$src\n\t" 3239 "pshuflw $dst,$dst,0x00\t! replicate4S" %} 3240 ins_encode %{ 3241 __ movdl($dst$$XMMRegister, $src$$Register); 3242 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3243 %} 3244 ins_pipe( pipe_slow ); 3245 %} 3246 3247 instruct Repl4S_mem(vecD dst, memory mem) %{ 3248 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3249 match(Set dst (ReplicateS (LoadS mem))); 3250 format %{ "pshuflw $dst,$mem,0x00\t! replicate4S" %} 3251 ins_encode %{ 3252 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3253 %} 3254 ins_pipe( pipe_slow ); 3255 %} 3256 3257 instruct Repl8S(vecX dst, rRegI src) %{ 3258 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vlbw()); 3259 match(Set dst (ReplicateS src)); 3260 format %{ "movd $dst,$src\n\t" 3261 "pshuflw $dst,$dst,0x00\n\t" 3262 "punpcklqdq $dst,$dst\t! replicate8S" %} 3263 ins_encode %{ 3264 __ movdl($dst$$XMMRegister, $src$$Register); 3265 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3266 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3267 %} 3268 ins_pipe( pipe_slow ); 3269 %} 3270 3271 instruct Repl8S_mem(vecX dst, memory mem) %{ 3272 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3273 match(Set dst (ReplicateS (LoadS mem))); 3274 format %{ "pshuflw $dst,$mem,0x00\n\t" 3275 "punpcklqdq $dst,$dst\t! replicate8S" %} 3276 ins_encode %{ 3277 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3278 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3279 %} 3280 ins_pipe( pipe_slow ); 3281 %} 3282 3283 instruct Repl8S_imm(vecX dst, immI con) %{ 3284 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vlbw()); 3285 match(Set dst (ReplicateS con)); 3286 format %{ "movq $dst,[$constantaddress]\n\t" 3287 "punpcklqdq $dst,$dst\t! replicate8S($con)" %} 3288 ins_encode %{ 3289 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3290 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3291 %} 3292 ins_pipe( pipe_slow ); 3293 %} 3294 3295 instruct Repl16S(vecY dst, rRegI src) %{ 3296 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3297 match(Set dst (ReplicateS src)); 3298 format %{ "movd $dst,$src\n\t" 3299 "pshuflw $dst,$dst,0x00\n\t" 3300 "punpcklqdq $dst,$dst\n\t" 3301 "vinserti128h $dst,$dst,$dst\t! replicate16S" %} 3302 ins_encode %{ 3303 __ movdl($dst$$XMMRegister, $src$$Register); 3304 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3305 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3306 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3307 %} 3308 ins_pipe( pipe_slow ); 3309 %} 3310 3311 instruct Repl16S_mem(vecY dst, memory mem) %{ 3312 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3313 match(Set dst (ReplicateS (LoadS mem))); 3314 format %{ "pshuflw $dst,$mem,0x00\n\t" 3315 "punpcklqdq $dst,$dst\n\t" 3316 "vinserti128h $dst,$dst,$dst\t! replicate16S" %} 3317 ins_encode %{ 3318 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3319 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3320 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3321 %} 3322 ins_pipe( pipe_slow ); 3323 %} 3324 3325 instruct Repl16S_imm(vecY dst, immI con) %{ 3326 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3327 match(Set dst (ReplicateS con)); 3328 format %{ "movq $dst,[$constantaddress]\n\t" 3329 "punpcklqdq $dst,$dst\n\t" 3330 "vinserti128h $dst,$dst,$dst\t! replicate16S($con)" %} 3331 ins_encode %{ 3332 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3333 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3334 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3335 %} 3336 ins_pipe( pipe_slow ); 3337 %} 3338 3339 instruct Repl4I(vecX dst, rRegI src) %{ 3340 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3341 match(Set dst (ReplicateI src)); 3342 format %{ "movd $dst,$src\n\t" 3343 "pshufd $dst,$dst,0x00\t! replicate4I" %} 3344 ins_encode %{ 3345 __ movdl($dst$$XMMRegister, $src$$Register); 3346 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3347 %} 3348 ins_pipe( pipe_slow ); 3349 %} 3350 3351 instruct Repl4I_mem(vecX dst, memory mem) %{ 3352 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3353 match(Set dst (ReplicateI (LoadI mem))); 3354 format %{ "pshufd $dst,$mem,0x00\t! replicate4I" %} 3355 ins_encode %{ 3356 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3357 %} 3358 ins_pipe( pipe_slow ); 3359 %} 3360 3361 instruct Repl8I(vecY dst, rRegI src) %{ 3362 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3363 match(Set dst (ReplicateI src)); 3364 format %{ "movd $dst,$src\n\t" 3365 "pshufd $dst,$dst,0x00\n\t" 3366 "vinserti128h $dst,$dst,$dst\t! replicate8I" %} 3367 ins_encode %{ 3368 __ movdl($dst$$XMMRegister, $src$$Register); 3369 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3370 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3371 %} 3372 ins_pipe( pipe_slow ); 3373 %} 3374 3375 instruct Repl8I_mem(vecY dst, memory mem) %{ 3376 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3377 match(Set dst (ReplicateI (LoadI mem))); 3378 format %{ "pshufd $dst,$mem,0x00\n\t" 3379 "vinserti128h $dst,$dst,$dst\t! replicate8I" %} 3380 ins_encode %{ 3381 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3382 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3383 %} 3384 ins_pipe( pipe_slow ); 3385 %} 3386 3387 instruct Repl4I_imm(vecX dst, immI con) %{ 3388 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3389 match(Set dst (ReplicateI con)); 3390 format %{ "movq $dst,[$constantaddress]\t! replicate4I($con)\n\t" 3391 "punpcklqdq $dst,$dst" %} 3392 ins_encode %{ 3393 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3394 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3395 %} 3396 ins_pipe( pipe_slow ); 3397 %} 3398 3399 instruct Repl8I_imm(vecY dst, immI con) %{ 3400 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3401 match(Set dst (ReplicateI con)); 3402 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 3403 "punpcklqdq $dst,$dst\n\t" 3404 "vinserti128h $dst,$dst,$dst" %} 3405 ins_encode %{ 3406 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3407 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3408 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3409 %} 3410 ins_pipe( pipe_slow ); 3411 %} 3412 3413 // Long could be loaded into xmm register directly from memory. 3414 instruct Repl2L_mem(vecX dst, memory mem) %{ 3415 predicate(n->as_Vector()->length() == 2 && !VM_Version::supports_avx512vlbw()); 3416 match(Set dst (ReplicateL (LoadL mem))); 3417 format %{ "movq $dst,$mem\n\t" 3418 "punpcklqdq $dst,$dst\t! replicate2L" %} 3419 ins_encode %{ 3420 __ movq($dst$$XMMRegister, $mem$$Address); 3421 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3422 %} 3423 ins_pipe( pipe_slow ); 3424 %} 3425 3426 // Replicate long (8 byte) scalar to be vector 3427 #ifdef _LP64 3428 instruct Repl4L(vecY dst, rRegL src) %{ 3429 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3430 match(Set dst (ReplicateL src)); 3431 format %{ "movdq $dst,$src\n\t" 3432 "punpcklqdq $dst,$dst\n\t" 3433 "vinserti128h $dst,$dst,$dst\t! replicate4L" %} 3434 ins_encode %{ 3435 __ movdq($dst$$XMMRegister, $src$$Register); 3436 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3437 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3438 %} 3439 ins_pipe( pipe_slow ); 3440 %} 3441 #else // _LP64 3442 instruct Repl4L(vecY dst, eRegL src, regD tmp) %{ 3443 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3444 match(Set dst (ReplicateL src)); 3445 effect(TEMP dst, USE src, TEMP tmp); 3446 format %{ "movdl $dst,$src.lo\n\t" 3447 "movdl $tmp,$src.hi\n\t" 3448 "punpckldq $dst,$tmp\n\t" 3449 "punpcklqdq $dst,$dst\n\t" 3450 "vinserti128h $dst,$dst,$dst\t! replicate4L" %} 3451 ins_encode %{ 3452 __ movdl($dst$$XMMRegister, $src$$Register); 3453 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3454 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3455 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3456 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3457 %} 3458 ins_pipe( pipe_slow ); 3459 %} 3460 #endif // _LP64 3461 3462 instruct Repl4L_imm(vecY dst, immL con) %{ 3463 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3464 match(Set dst (ReplicateL con)); 3465 format %{ "movq $dst,[$constantaddress]\n\t" 3466 "punpcklqdq $dst,$dst\n\t" 3467 "vinserti128h $dst,$dst,$dst\t! replicate4L($con)" %} 3468 ins_encode %{ 3469 __ movq($dst$$XMMRegister, $constantaddress($con)); 3470 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3471 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3472 %} 3473 ins_pipe( pipe_slow ); 3474 %} 3475 3476 instruct Repl4L_mem(vecY dst, memory mem) %{ 3477 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3478 match(Set dst (ReplicateL (LoadL mem))); 3479 format %{ "movq $dst,$mem\n\t" 3480 "punpcklqdq $dst,$dst\n\t" 3481 "vinserti128h $dst,$dst,$dst\t! replicate4L" %} 3482 ins_encode %{ 3483 __ movq($dst$$XMMRegister, $mem$$Address); 3484 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3485 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3486 %} 3487 ins_pipe( pipe_slow ); 3488 %} 3489 3490 instruct Repl2F_mem(vecD dst, memory mem) %{ 3491 predicate(n->as_Vector()->length() == 2 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3492 match(Set dst (ReplicateF (LoadF mem))); 3493 format %{ "pshufd $dst,$mem,0x00\t! replicate2F" %} 3494 ins_encode %{ 3495 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3496 %} 3497 ins_pipe( pipe_slow ); 3498 %} 3499 3500 instruct Repl4F_mem(vecX dst, memory mem) %{ 3501 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3502 match(Set dst (ReplicateF (LoadF mem))); 3503 format %{ "pshufd $dst,$mem,0x00\t! replicate4F" %} 3504 ins_encode %{ 3505 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3506 %} 3507 ins_pipe( pipe_slow ); 3508 %} 3509 3510 instruct Repl8F(vecY dst, regF src) %{ 3511 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3512 match(Set dst (ReplicateF src)); 3513 format %{ "pshufd $dst,$src,0x00\n\t" 3514 "vinsertf128h $dst,$dst,$dst\t! replicate8F" %} 3515 ins_encode %{ 3516 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3517 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3518 %} 3519 ins_pipe( pipe_slow ); 3520 %} 3521 3522 instruct Repl8F_mem(vecY dst, memory mem) %{ 3523 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3524 match(Set dst (ReplicateF (LoadF mem))); 3525 format %{ "pshufd $dst,$mem,0x00\n\t" 3526 "vinsertf128h $dst,$dst,$dst\t! replicate8F" %} 3527 ins_encode %{ 3528 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3529 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3530 %} 3531 ins_pipe( pipe_slow ); 3532 %} 3533 3534 instruct Repl2F_zero(vecD dst, immF0 zero) %{ 3535 predicate(n->as_Vector()->length() == 2 && UseAVX < 3); 3536 match(Set dst (ReplicateF zero)); 3537 format %{ "xorps $dst,$dst\t! replicate2F zero" %} 3538 ins_encode %{ 3539 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 3540 %} 3541 ins_pipe( fpu_reg_reg ); 3542 %} 3543 3544 instruct Repl4F_zero(vecX dst, immF0 zero) %{ 3545 predicate(n->as_Vector()->length() == 4 && UseAVX < 3); 3546 match(Set dst (ReplicateF zero)); 3547 format %{ "xorps $dst,$dst\t! replicate4F zero" %} 3548 ins_encode %{ 3549 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 3550 %} 3551 ins_pipe( fpu_reg_reg ); 3552 %} 3553 3554 instruct Repl8F_zero(vecY dst, immF0 zero) %{ 3555 predicate(n->as_Vector()->length() == 8 && UseAVX < 3); 3556 match(Set dst (ReplicateF zero)); 3557 format %{ "vxorps $dst,$dst,$dst\t! replicate8F zero" %} 3558 ins_encode %{ 3559 int vector_len = 1; 3560 __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3561 %} 3562 ins_pipe( fpu_reg_reg ); 3563 %} 3564 3565 instruct Repl2D_mem(vecX dst, memory mem) %{ 3566 predicate(n->as_Vector()->length() == 2 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3567 match(Set dst (ReplicateD (LoadD mem))); 3568 format %{ "pshufd $dst,$mem,0x44\t! replicate2D" %} 3569 ins_encode %{ 3570 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44); 3571 %} 3572 ins_pipe( pipe_slow ); 3573 %} 3574 3575 instruct Repl4D(vecY dst, regD src) %{ 3576 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3577 match(Set dst (ReplicateD src)); 3578 format %{ "pshufd $dst,$src,0x44\n\t" 3579 "vinsertf128h $dst,$dst,$dst\t! replicate4D" %} 3580 ins_encode %{ 3581 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 3582 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3583 %} 3584 ins_pipe( pipe_slow ); 3585 %} 3586 3587 instruct Repl4D_mem(vecY dst, memory mem) %{ 3588 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3589 match(Set dst (ReplicateD (LoadD mem))); 3590 format %{ "pshufd $dst,$mem,0x44\n\t" 3591 "vinsertf128h $dst,$dst,$dst\t! replicate4D" %} 3592 ins_encode %{ 3593 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44); 3594 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3595 %} 3596 ins_pipe( pipe_slow ); 3597 %} 3598 3599 // Replicate double (8 byte) scalar zero to be vector 3600 instruct Repl2D_zero(vecX dst, immD0 zero) %{ 3601 predicate(n->as_Vector()->length() == 2 && UseAVX < 3); 3602 match(Set dst (ReplicateD zero)); 3603 format %{ "xorpd $dst,$dst\t! replicate2D zero" %} 3604 ins_encode %{ 3605 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister); 3606 %} 3607 ins_pipe( fpu_reg_reg ); 3608 %} 3609 3610 instruct Repl4D_zero(vecY dst, immD0 zero) %{ 3611 predicate(n->as_Vector()->length() == 4 && UseAVX < 3); 3612 match(Set dst (ReplicateD zero)); 3613 format %{ "vxorpd $dst,$dst,$dst,vect256\t! replicate4D zero" %} 3614 ins_encode %{ 3615 int vector_len = 1; 3616 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3617 %} 3618 ins_pipe( fpu_reg_reg ); 3619 %} 3620 3621 // ====================GENERIC REPLICATE========================================== 3622 3623 // Replicate byte scalar to be vector 3624 instruct Repl4B(vecS dst, rRegI src) %{ 3625 predicate(n->as_Vector()->length() == 4); 3626 match(Set dst (ReplicateB src)); 3627 format %{ "movd $dst,$src\n\t" 3628 "punpcklbw $dst,$dst\n\t" 3629 "pshuflw $dst,$dst,0x00\t! replicate4B" %} 3630 ins_encode %{ 3631 __ movdl($dst$$XMMRegister, $src$$Register); 3632 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3633 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3634 %} 3635 ins_pipe( pipe_slow ); 3636 %} 3637 3638 instruct Repl8B(vecD dst, rRegI src) %{ 3639 predicate(n->as_Vector()->length() == 8); 3640 match(Set dst (ReplicateB src)); 3641 format %{ "movd $dst,$src\n\t" 3642 "punpcklbw $dst,$dst\n\t" 3643 "pshuflw $dst,$dst,0x00\t! replicate8B" %} 3644 ins_encode %{ 3645 __ movdl($dst$$XMMRegister, $src$$Register); 3646 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3647 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3648 %} 3649 ins_pipe( pipe_slow ); 3650 %} 3651 3652 // Replicate byte scalar immediate to be vector by loading from const table. 3653 instruct Repl4B_imm(vecS dst, immI con) %{ 3654 predicate(n->as_Vector()->length() == 4); 3655 match(Set dst (ReplicateB con)); 3656 format %{ "movdl $dst,[$constantaddress]\t! replicate4B($con)" %} 3657 ins_encode %{ 3658 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 1))); 3659 %} 3660 ins_pipe( pipe_slow ); 3661 %} 3662 3663 instruct Repl8B_imm(vecD dst, immI con) %{ 3664 predicate(n->as_Vector()->length() == 8); 3665 match(Set dst (ReplicateB con)); 3666 format %{ "movq $dst,[$constantaddress]\t! replicate8B($con)" %} 3667 ins_encode %{ 3668 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3669 %} 3670 ins_pipe( pipe_slow ); 3671 %} 3672 3673 // Replicate byte scalar zero to be vector 3674 instruct Repl4B_zero(vecS dst, immI0 zero) %{ 3675 predicate(n->as_Vector()->length() == 4); 3676 match(Set dst (ReplicateB zero)); 3677 format %{ "pxor $dst,$dst\t! replicate4B zero" %} 3678 ins_encode %{ 3679 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3680 %} 3681 ins_pipe( fpu_reg_reg ); 3682 %} 3683 3684 instruct Repl8B_zero(vecD dst, immI0 zero) %{ 3685 predicate(n->as_Vector()->length() == 8); 3686 match(Set dst (ReplicateB zero)); 3687 format %{ "pxor $dst,$dst\t! replicate8B zero" %} 3688 ins_encode %{ 3689 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3690 %} 3691 ins_pipe( fpu_reg_reg ); 3692 %} 3693 3694 instruct Repl16B_zero(vecX dst, immI0 zero) %{ 3695 predicate(n->as_Vector()->length() == 16); 3696 match(Set dst (ReplicateB zero)); 3697 format %{ "pxor $dst,$dst\t! replicate16B zero" %} 3698 ins_encode %{ 3699 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3700 %} 3701 ins_pipe( fpu_reg_reg ); 3702 %} 3703 3704 instruct Repl32B_zero(vecY dst, immI0 zero) %{ 3705 predicate(n->as_Vector()->length() == 32); 3706 match(Set dst (ReplicateB zero)); 3707 format %{ "vpxor $dst,$dst,$dst\t! replicate32B zero" %} 3708 ins_encode %{ 3709 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3710 int vector_len = 1; 3711 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3712 %} 3713 ins_pipe( fpu_reg_reg ); 3714 %} 3715 3716 // Replicate char/short (2 byte) scalar to be vector 3717 instruct Repl2S(vecS dst, rRegI src) %{ 3718 predicate(n->as_Vector()->length() == 2); 3719 match(Set dst (ReplicateS src)); 3720 format %{ "movd $dst,$src\n\t" 3721 "pshuflw $dst,$dst,0x00\t! replicate2S" %} 3722 ins_encode %{ 3723 __ movdl($dst$$XMMRegister, $src$$Register); 3724 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3725 %} 3726 ins_pipe( fpu_reg_reg ); 3727 %} 3728 3729 // Replicate char/short (2 byte) scalar immediate to be vector by loading from const table. 3730 instruct Repl2S_imm(vecS dst, immI con) %{ 3731 predicate(n->as_Vector()->length() == 2); 3732 match(Set dst (ReplicateS con)); 3733 format %{ "movdl $dst,[$constantaddress]\t! replicate2S($con)" %} 3734 ins_encode %{ 3735 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 2))); 3736 %} 3737 ins_pipe( fpu_reg_reg ); 3738 %} 3739 3740 instruct Repl4S_imm(vecD dst, immI con) %{ 3741 predicate(n->as_Vector()->length() == 4); 3742 match(Set dst (ReplicateS con)); 3743 format %{ "movq $dst,[$constantaddress]\t! replicate4S($con)" %} 3744 ins_encode %{ 3745 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3746 %} 3747 ins_pipe( fpu_reg_reg ); 3748 %} 3749 3750 // Replicate char/short (2 byte) scalar zero to be vector 3751 instruct Repl2S_zero(vecS dst, immI0 zero) %{ 3752 predicate(n->as_Vector()->length() == 2); 3753 match(Set dst (ReplicateS zero)); 3754 format %{ "pxor $dst,$dst\t! replicate2S zero" %} 3755 ins_encode %{ 3756 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3757 %} 3758 ins_pipe( fpu_reg_reg ); 3759 %} 3760 3761 instruct Repl4S_zero(vecD dst, immI0 zero) %{ 3762 predicate(n->as_Vector()->length() == 4); 3763 match(Set dst (ReplicateS zero)); 3764 format %{ "pxor $dst,$dst\t! replicate4S zero" %} 3765 ins_encode %{ 3766 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3767 %} 3768 ins_pipe( fpu_reg_reg ); 3769 %} 3770 3771 instruct Repl8S_zero(vecX dst, immI0 zero) %{ 3772 predicate(n->as_Vector()->length() == 8); 3773 match(Set dst (ReplicateS zero)); 3774 format %{ "pxor $dst,$dst\t! replicate8S zero" %} 3775 ins_encode %{ 3776 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3777 %} 3778 ins_pipe( fpu_reg_reg ); 3779 %} 3780 3781 instruct Repl16S_zero(vecY dst, immI0 zero) %{ 3782 predicate(n->as_Vector()->length() == 16); 3783 match(Set dst (ReplicateS zero)); 3784 format %{ "vpxor $dst,$dst,$dst\t! replicate16S zero" %} 3785 ins_encode %{ 3786 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3787 int vector_len = 1; 3788 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3789 %} 3790 ins_pipe( fpu_reg_reg ); 3791 %} 3792 3793 // Replicate integer (4 byte) scalar to be vector 3794 instruct Repl2I(vecD dst, rRegI src) %{ 3795 predicate(n->as_Vector()->length() == 2); 3796 match(Set dst (ReplicateI src)); 3797 format %{ "movd $dst,$src\n\t" 3798 "pshufd $dst,$dst,0x00\t! replicate2I" %} 3799 ins_encode %{ 3800 __ movdl($dst$$XMMRegister, $src$$Register); 3801 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3802 %} 3803 ins_pipe( fpu_reg_reg ); 3804 %} 3805 3806 // Integer could be loaded into xmm register directly from memory. 3807 instruct Repl2I_mem(vecD dst, memory mem) %{ 3808 predicate(n->as_Vector()->length() == 2); 3809 match(Set dst (ReplicateI (LoadI mem))); 3810 format %{ "movd $dst,$mem\n\t" 3811 "pshufd $dst,$dst,0x00\t! replicate2I" %} 3812 ins_encode %{ 3813 __ movdl($dst$$XMMRegister, $mem$$Address); 3814 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3815 %} 3816 ins_pipe( fpu_reg_reg ); 3817 %} 3818 3819 // Replicate integer (4 byte) scalar immediate to be vector by loading from const table. 3820 instruct Repl2I_imm(vecD dst, immI con) %{ 3821 predicate(n->as_Vector()->length() == 2); 3822 match(Set dst (ReplicateI con)); 3823 format %{ "movq $dst,[$constantaddress]\t! replicate2I($con)" %} 3824 ins_encode %{ 3825 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3826 %} 3827 ins_pipe( fpu_reg_reg ); 3828 %} 3829 3830 // Replicate integer (4 byte) scalar zero to be vector 3831 instruct Repl2I_zero(vecD dst, immI0 zero) %{ 3832 predicate(n->as_Vector()->length() == 2); 3833 match(Set dst (ReplicateI zero)); 3834 format %{ "pxor $dst,$dst\t! replicate2I" %} 3835 ins_encode %{ 3836 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3837 %} 3838 ins_pipe( fpu_reg_reg ); 3839 %} 3840 3841 instruct Repl4I_zero(vecX dst, immI0 zero) %{ 3842 predicate(n->as_Vector()->length() == 4); 3843 match(Set dst (ReplicateI zero)); 3844 format %{ "pxor $dst,$dst\t! replicate4I zero)" %} 3845 ins_encode %{ 3846 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3847 %} 3848 ins_pipe( fpu_reg_reg ); 3849 %} 3850 3851 instruct Repl8I_zero(vecY dst, immI0 zero) %{ 3852 predicate(n->as_Vector()->length() == 8); 3853 match(Set dst (ReplicateI zero)); 3854 format %{ "vpxor $dst,$dst,$dst\t! replicate8I zero" %} 3855 ins_encode %{ 3856 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3857 int vector_len = 1; 3858 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3859 %} 3860 ins_pipe( fpu_reg_reg ); 3861 %} 3862 3863 // Replicate long (8 byte) scalar to be vector 3864 #ifdef _LP64 3865 instruct Repl2L(vecX dst, rRegL src) %{ 3866 predicate(n->as_Vector()->length() == 2); 3867 match(Set dst (ReplicateL src)); 3868 format %{ "movdq $dst,$src\n\t" 3869 "punpcklqdq $dst,$dst\t! replicate2L" %} 3870 ins_encode %{ 3871 __ movdq($dst$$XMMRegister, $src$$Register); 3872 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3873 %} 3874 ins_pipe( pipe_slow ); 3875 %} 3876 #else // _LP64 3877 instruct Repl2L(vecX dst, eRegL src, regD tmp) %{ 3878 predicate(n->as_Vector()->length() == 2); 3879 match(Set dst (ReplicateL src)); 3880 effect(TEMP dst, USE src, TEMP tmp); 3881 format %{ "movdl $dst,$src.lo\n\t" 3882 "movdl $tmp,$src.hi\n\t" 3883 "punpckldq $dst,$tmp\n\t" 3884 "punpcklqdq $dst,$dst\t! replicate2L"%} 3885 ins_encode %{ 3886 __ movdl($dst$$XMMRegister, $src$$Register); 3887 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3888 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3889 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3890 %} 3891 ins_pipe( pipe_slow ); 3892 %} 3893 #endif // _LP64 3894 3895 // Replicate long (8 byte) scalar immediate to be vector by loading from const table. 3896 instruct Repl2L_imm(vecX dst, immL con) %{ 3897 predicate(n->as_Vector()->length() == 2); 3898 match(Set dst (ReplicateL con)); 3899 format %{ "movq $dst,[$constantaddress]\n\t" 3900 "punpcklqdq $dst,$dst\t! replicate2L($con)" %} 3901 ins_encode %{ 3902 __ movq($dst$$XMMRegister, $constantaddress($con)); 3903 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3904 %} 3905 ins_pipe( pipe_slow ); 3906 %} 3907 3908 // Replicate long (8 byte) scalar zero to be vector 3909 instruct Repl2L_zero(vecX dst, immL0 zero) %{ 3910 predicate(n->as_Vector()->length() == 2); 3911 match(Set dst (ReplicateL zero)); 3912 format %{ "pxor $dst,$dst\t! replicate2L zero" %} 3913 ins_encode %{ 3914 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3915 %} 3916 ins_pipe( fpu_reg_reg ); 3917 %} 3918 3919 instruct Repl4L_zero(vecY dst, immL0 zero) %{ 3920 predicate(n->as_Vector()->length() == 4); 3921 match(Set dst (ReplicateL zero)); 3922 format %{ "vpxor $dst,$dst,$dst\t! replicate4L zero" %} 3923 ins_encode %{ 3924 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3925 int vector_len = 1; 3926 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3927 %} 3928 ins_pipe( fpu_reg_reg ); 3929 %} 3930 3931 // Replicate float (4 byte) scalar to be vector 3932 instruct Repl2F(vecD dst, regF src) %{ 3933 predicate(n->as_Vector()->length() == 2); 3934 match(Set dst (ReplicateF src)); 3935 format %{ "pshufd $dst,$dst,0x00\t! replicate2F" %} 3936 ins_encode %{ 3937 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3938 %} 3939 ins_pipe( fpu_reg_reg ); 3940 %} 3941 3942 instruct Repl4F(vecX dst, regF src) %{ 3943 predicate(n->as_Vector()->length() == 4); 3944 match(Set dst (ReplicateF src)); 3945 format %{ "pshufd $dst,$dst,0x00\t! replicate4F" %} 3946 ins_encode %{ 3947 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3948 %} 3949 ins_pipe( pipe_slow ); 3950 %} 3951 3952 // Replicate double (8 bytes) scalar to be vector 3953 instruct Repl2D(vecX dst, regD src) %{ 3954 predicate(n->as_Vector()->length() == 2); 3955 match(Set dst (ReplicateD src)); 3956 format %{ "pshufd $dst,$src,0x44\t! replicate2D" %} 3957 ins_encode %{ 3958 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 3959 %} 3960 ins_pipe( pipe_slow ); 3961 %} 3962 3963 // ====================EVEX REPLICATE============================================= 3964 3965 instruct Repl4B_mem_evex(vecS dst, memory mem) %{ 3966 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vlbw()); 3967 match(Set dst (ReplicateB (LoadB mem))); 3968 format %{ "vpbroadcastb $dst,$mem\t! replicate4B" %} 3969 ins_encode %{ 3970 int vector_len = 0; 3971 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 3972 %} 3973 ins_pipe( pipe_slow ); 3974 %} 3975 3976 instruct Repl8B_mem_evex(vecD dst, memory mem) %{ 3977 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); 3978 match(Set dst (ReplicateB (LoadB mem))); 3979 format %{ "vpbroadcastb $dst,$mem\t! replicate8B" %} 3980 ins_encode %{ 3981 int vector_len = 0; 3982 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 3983 %} 3984 ins_pipe( pipe_slow ); 3985 %} 3986 3987 instruct Repl16B_evex(vecX dst, rRegI src) %{ 3988 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 3989 match(Set dst (ReplicateB src)); 3990 format %{ "vpbroadcastb $dst,$src\t! replicate16B" %} 3991 ins_encode %{ 3992 int vector_len = 0; 3993 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 3994 %} 3995 ins_pipe( pipe_slow ); 3996 %} 3997 3998 instruct Repl16B_mem_evex(vecX dst, memory mem) %{ 3999 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 4000 match(Set dst (ReplicateB (LoadB mem))); 4001 format %{ "vpbroadcastb $dst,$mem\t! replicate16B" %} 4002 ins_encode %{ 4003 int vector_len = 0; 4004 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 4005 %} 4006 ins_pipe( pipe_slow ); 4007 %} 4008 4009 instruct Repl32B_evex(vecY dst, rRegI src) %{ 4010 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw()); 4011 match(Set dst (ReplicateB src)); 4012 format %{ "vpbroadcastb $dst,$src\t! replicate32B" %} 4013 ins_encode %{ 4014 int vector_len = 1; 4015 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 4016 %} 4017 ins_pipe( pipe_slow ); 4018 %} 4019 4020 instruct Repl32B_mem_evex(vecY dst, memory mem) %{ 4021 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw()); 4022 match(Set dst (ReplicateB (LoadB mem))); 4023 format %{ "vpbroadcastb $dst,$mem\t! replicate32B" %} 4024 ins_encode %{ 4025 int vector_len = 1; 4026 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 4027 %} 4028 ins_pipe( pipe_slow ); 4029 %} 4030 4031 instruct Repl64B_evex(vecZ dst, rRegI src) %{ 4032 predicate(n->as_Vector()->length() == 64 && VM_Version::supports_avx512bw()); 4033 match(Set dst (ReplicateB src)); 4034 format %{ "vpbroadcastb $dst,$src\t! upper replicate64B" %} 4035 ins_encode %{ 4036 int vector_len = 2; 4037 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 4038 %} 4039 ins_pipe( pipe_slow ); 4040 %} 4041 4042 instruct Repl64B_mem_evex(vecZ dst, memory mem) %{ 4043 predicate(n->as_Vector()->length() == 64 && VM_Version::supports_avx512bw()); 4044 match(Set dst (ReplicateB (LoadB mem))); 4045 format %{ "vpbroadcastb $dst,$mem\t! replicate64B" %} 4046 ins_encode %{ 4047 int vector_len = 2; 4048 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 4049 %} 4050 ins_pipe( pipe_slow ); 4051 %} 4052 4053 instruct Repl16B_imm_evex(vecX dst, immI con) %{ 4054 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 4055 match(Set dst (ReplicateB con)); 4056 format %{ "movq $dst,[$constantaddress]\n\t" 4057 "vpbroadcastb $dst,$dst\t! replicate16B" %} 4058 ins_encode %{ 4059 int vector_len = 0; 4060 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 4061 __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4062 %} 4063 ins_pipe( pipe_slow ); 4064 %} 4065 4066 instruct Repl32B_imm_evex(vecY dst, immI con) %{ 4067 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw()); 4068 match(Set dst (ReplicateB con)); 4069 format %{ "movq $dst,[$constantaddress]\n\t" 4070 "vpbroadcastb $dst,$dst\t! replicate32B" %} 4071 ins_encode %{ 4072 int vector_len = 1; 4073 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 4074 __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4075 %} 4076 ins_pipe( pipe_slow ); 4077 %} 4078 4079 instruct Repl64B_imm_evex(vecZ dst, immI con) %{ 4080 predicate(n->as_Vector()->length() == 64 && VM_Version::supports_avx512bw()); 4081 match(Set dst (ReplicateB con)); 4082 format %{ "movq $dst,[$constantaddress]\n\t" 4083 "vpbroadcastb $dst,$dst\t! upper replicate64B" %} 4084 ins_encode %{ 4085 int vector_len = 2; 4086 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 4087 __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4088 %} 4089 ins_pipe( pipe_slow ); 4090 %} 4091 4092 instruct Repl64B_zero_evex(vecZ dst, immI0 zero) %{ 4093 predicate(n->as_Vector()->length() == 64 && UseAVX > 2); 4094 match(Set dst (ReplicateB zero)); 4095 format %{ "vpxor $dst k0,$dst,$dst\t! replicate64B zero" %} 4096 ins_encode %{ 4097 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 4098 int vector_len = 2; 4099 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4100 %} 4101 ins_pipe( fpu_reg_reg ); 4102 %} 4103 4104 instruct Repl4S_evex(vecD dst, rRegI src) %{ 4105 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vlbw()); 4106 match(Set dst (ReplicateS src)); 4107 format %{ "vpbroadcastw $dst,$src\t! replicate4S" %} 4108 ins_encode %{ 4109 int vector_len = 0; 4110 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 4111 %} 4112 ins_pipe( pipe_slow ); 4113 %} 4114 4115 instruct Repl4S_mem_evex(vecD dst, memory mem) %{ 4116 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vlbw()); 4117 match(Set dst (ReplicateS (LoadS mem))); 4118 format %{ "vpbroadcastw $dst,$mem\t! replicate4S" %} 4119 ins_encode %{ 4120 int vector_len = 0; 4121 __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 4122 %} 4123 ins_pipe( pipe_slow ); 4124 %} 4125 4126 instruct Repl8S_evex(vecX dst, rRegI src) %{ 4127 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); 4128 match(Set dst (ReplicateS src)); 4129 format %{ "vpbroadcastw $dst,$src\t! replicate8S" %} 4130 ins_encode %{ 4131 int vector_len = 0; 4132 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 4133 %} 4134 ins_pipe( pipe_slow ); 4135 %} 4136 4137 instruct Repl8S_mem_evex(vecX dst, memory mem) %{ 4138 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); 4139 match(Set dst (ReplicateS (LoadS mem))); 4140 format %{ "vpbroadcastw $dst,$mem\t! replicate8S" %} 4141 ins_encode %{ 4142 int vector_len = 0; 4143 __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 4144 %} 4145 ins_pipe( pipe_slow ); 4146 %} 4147 4148 instruct Repl16S_evex(vecY dst, rRegI src) %{ 4149 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 4150 match(Set dst (ReplicateS src)); 4151 format %{ "vpbroadcastw $dst,$src\t! replicate16S" %} 4152 ins_encode %{ 4153 int vector_len = 1; 4154 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 4155 %} 4156 ins_pipe( pipe_slow ); 4157 %} 4158 4159 instruct Repl16S_mem_evex(vecY dst, memory mem) %{ 4160 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 4161 match(Set dst (ReplicateS (LoadS mem))); 4162 format %{ "vpbroadcastw $dst,$mem\t! replicate16S" %} 4163 ins_encode %{ 4164 int vector_len = 1; 4165 __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 4166 %} 4167 ins_pipe( pipe_slow ); 4168 %} 4169 4170 instruct Repl32S_evex(vecZ dst, rRegI src) %{ 4171 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512bw()); 4172 match(Set dst (ReplicateS src)); 4173 format %{ "vpbroadcastw $dst,$src\t! replicate32S" %} 4174 ins_encode %{ 4175 int vector_len = 2; 4176 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 4177 %} 4178 ins_pipe( pipe_slow ); 4179 %} 4180 4181 instruct Repl32S_mem_evex(vecZ dst, memory mem) %{ 4182 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512bw()); 4183 match(Set dst (ReplicateS (LoadS mem))); 4184 format %{ "vpbroadcastw $dst,$mem\t! replicate32S" %} 4185 ins_encode %{ 4186 int vector_len = 2; 4187 __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 4188 %} 4189 ins_pipe( pipe_slow ); 4190 %} 4191 4192 instruct Repl8S_imm_evex(vecX dst, immI con) %{ 4193 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); 4194 match(Set dst (ReplicateS con)); 4195 format %{ "movq $dst,[$constantaddress]\n\t" 4196 "vpbroadcastw $dst,$dst\t! replicate8S" %} 4197 ins_encode %{ 4198 int vector_len = 0; 4199 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 4200 __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4201 %} 4202 ins_pipe( pipe_slow ); 4203 %} 4204 4205 instruct Repl16S_imm_evex(vecY dst, immI con) %{ 4206 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 4207 match(Set dst (ReplicateS con)); 4208 format %{ "movq $dst,[$constantaddress]\n\t" 4209 "vpbroadcastw $dst,$dst\t! replicate16S" %} 4210 ins_encode %{ 4211 int vector_len = 1; 4212 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 4213 __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4214 %} 4215 ins_pipe( pipe_slow ); 4216 %} 4217 4218 instruct Repl32S_imm_evex(vecZ dst, immI con) %{ 4219 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512bw()); 4220 match(Set dst (ReplicateS con)); 4221 format %{ "movq $dst,[$constantaddress]\n\t" 4222 "vpbroadcastw $dst,$dst\t! replicate32S" %} 4223 ins_encode %{ 4224 int vector_len = 2; 4225 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 4226 __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4227 %} 4228 ins_pipe( pipe_slow ); 4229 %} 4230 4231 instruct Repl32S_zero_evex(vecZ dst, immI0 zero) %{ 4232 predicate(n->as_Vector()->length() == 32 && UseAVX > 2); 4233 match(Set dst (ReplicateS zero)); 4234 format %{ "vpxor $dst k0,$dst,$dst\t! replicate32S zero" %} 4235 ins_encode %{ 4236 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 4237 int vector_len = 2; 4238 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4239 %} 4240 ins_pipe( fpu_reg_reg ); 4241 %} 4242 4243 instruct Repl4I_evex(vecX dst, rRegI src) %{ 4244 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4245 match(Set dst (ReplicateI src)); 4246 format %{ "vpbroadcastd $dst,$src\t! replicate4I" %} 4247 ins_encode %{ 4248 int vector_len = 0; 4249 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 4250 %} 4251 ins_pipe( pipe_slow ); 4252 %} 4253 4254 instruct Repl4I_mem_evex(vecX dst, memory mem) %{ 4255 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4256 match(Set dst (ReplicateI (LoadI mem))); 4257 format %{ "vpbroadcastd $dst,$mem\t! replicate4I" %} 4258 ins_encode %{ 4259 int vector_len = 0; 4260 __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 4261 %} 4262 ins_pipe( pipe_slow ); 4263 %} 4264 4265 instruct Repl8I_evex(vecY dst, rRegI src) %{ 4266 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4267 match(Set dst (ReplicateI src)); 4268 format %{ "vpbroadcastd $dst,$src\t! replicate8I" %} 4269 ins_encode %{ 4270 int vector_len = 1; 4271 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 4272 %} 4273 ins_pipe( pipe_slow ); 4274 %} 4275 4276 instruct Repl8I_mem_evex(vecY dst, memory mem) %{ 4277 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4278 match(Set dst (ReplicateI (LoadI mem))); 4279 format %{ "vpbroadcastd $dst,$mem\t! replicate8I" %} 4280 ins_encode %{ 4281 int vector_len = 1; 4282 __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 4283 %} 4284 ins_pipe( pipe_slow ); 4285 %} 4286 4287 instruct Repl16I_evex(vecZ dst, rRegI src) %{ 4288 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4289 match(Set dst (ReplicateI src)); 4290 format %{ "vpbroadcastd $dst,$src\t! replicate16I" %} 4291 ins_encode %{ 4292 int vector_len = 2; 4293 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 4294 %} 4295 ins_pipe( pipe_slow ); 4296 %} 4297 4298 instruct Repl16I_mem_evex(vecZ dst, memory mem) %{ 4299 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4300 match(Set dst (ReplicateI (LoadI mem))); 4301 format %{ "vpbroadcastd $dst,$mem\t! replicate16I" %} 4302 ins_encode %{ 4303 int vector_len = 2; 4304 __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 4305 %} 4306 ins_pipe( pipe_slow ); 4307 %} 4308 4309 instruct Repl4I_imm_evex(vecX dst, immI con) %{ 4310 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4311 match(Set dst (ReplicateI con)); 4312 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 4313 "vpbroadcastd $dst,$dst\t! replicate4I" %} 4314 ins_encode %{ 4315 int vector_len = 0; 4316 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4317 __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4318 %} 4319 ins_pipe( pipe_slow ); 4320 %} 4321 4322 instruct Repl8I_imm_evex(vecY dst, immI con) %{ 4323 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4324 match(Set dst (ReplicateI con)); 4325 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 4326 "vpbroadcastd $dst,$dst\t! replicate8I" %} 4327 ins_encode %{ 4328 int vector_len = 1; 4329 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4330 __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4331 %} 4332 ins_pipe( pipe_slow ); 4333 %} 4334 4335 instruct Repl16I_imm_evex(vecZ dst, immI con) %{ 4336 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4337 match(Set dst (ReplicateI con)); 4338 format %{ "movq $dst,[$constantaddress]\t! replicate16I($con)\n\t" 4339 "vpbroadcastd $dst,$dst\t! replicate16I" %} 4340 ins_encode %{ 4341 int vector_len = 2; 4342 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4343 __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4344 %} 4345 ins_pipe( pipe_slow ); 4346 %} 4347 4348 instruct Repl16I_zero_evex(vecZ dst, immI0 zero) %{ 4349 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4350 match(Set dst (ReplicateI zero)); 4351 format %{ "vpxor $dst k0,$dst,$dst\t! replicate16I zero" %} 4352 ins_encode %{ 4353 // Use vxorpd since AVX does not have vpxor for 512-bit (AVX2 will have it). 4354 int vector_len = 2; 4355 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4356 %} 4357 ins_pipe( fpu_reg_reg ); 4358 %} 4359 4360 // Replicate long (8 byte) scalar to be vector 4361 #ifdef _LP64 4362 instruct Repl4L_evex(vecY dst, rRegL src) %{ 4363 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4364 match(Set dst (ReplicateL src)); 4365 format %{ "vpbroadcastq $dst,$src\t! replicate4L" %} 4366 ins_encode %{ 4367 int vector_len = 1; 4368 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vector_len); 4369 %} 4370 ins_pipe( pipe_slow ); 4371 %} 4372 4373 instruct Repl8L_evex(vecZ dst, rRegL src) %{ 4374 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4375 match(Set dst (ReplicateL src)); 4376 format %{ "vpbroadcastq $dst,$src\t! replicate8L" %} 4377 ins_encode %{ 4378 int vector_len = 2; 4379 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vector_len); 4380 %} 4381 ins_pipe( pipe_slow ); 4382 %} 4383 #else // _LP64 4384 instruct Repl4L_evex(vecY dst, eRegL src, regD tmp) %{ 4385 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4386 match(Set dst (ReplicateL src)); 4387 effect(TEMP dst, USE src, TEMP tmp); 4388 format %{ "movdl $dst,$src.lo\n\t" 4389 "movdl $tmp,$src.hi\n\t" 4390 "punpckldq $dst,$tmp\n\t" 4391 "vpbroadcastq $dst,$dst\t! replicate4L" %} 4392 ins_encode %{ 4393 int vector_len = 1; 4394 __ movdl($dst$$XMMRegister, $src$$Register); 4395 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4396 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4397 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4398 %} 4399 ins_pipe( pipe_slow ); 4400 %} 4401 4402 instruct Repl8L_evex(vecZ dst, eRegL src, regD tmp) %{ 4403 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4404 match(Set dst (ReplicateL src)); 4405 effect(TEMP dst, USE src, TEMP tmp); 4406 format %{ "movdl $dst,$src.lo\n\t" 4407 "movdl $tmp,$src.hi\n\t" 4408 "punpckldq $dst,$tmp\n\t" 4409 "vpbroadcastq $dst,$dst\t! replicate8L" %} 4410 ins_encode %{ 4411 int vector_len = 2; 4412 __ movdl($dst$$XMMRegister, $src$$Register); 4413 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4414 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4415 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4416 %} 4417 ins_pipe( pipe_slow ); 4418 %} 4419 #endif // _LP64 4420 4421 instruct Repl4L_imm_evex(vecY dst, immL con) %{ 4422 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4423 match(Set dst (ReplicateL con)); 4424 format %{ "movq $dst,[$constantaddress]\n\t" 4425 "vpbroadcastq $dst,$dst\t! replicate4L" %} 4426 ins_encode %{ 4427 int vector_len = 1; 4428 __ movq($dst$$XMMRegister, $constantaddress($con)); 4429 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4430 %} 4431 ins_pipe( pipe_slow ); 4432 %} 4433 4434 instruct Repl8L_imm_evex(vecZ dst, immL con) %{ 4435 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4436 match(Set dst (ReplicateL con)); 4437 format %{ "movq $dst,[$constantaddress]\n\t" 4438 "vpbroadcastq $dst,$dst\t! replicate8L" %} 4439 ins_encode %{ 4440 int vector_len = 2; 4441 __ movq($dst$$XMMRegister, $constantaddress($con)); 4442 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4443 %} 4444 ins_pipe( pipe_slow ); 4445 %} 4446 4447 instruct Repl2L_mem_evex(vecX dst, memory mem) %{ 4448 predicate(n->as_Vector()->length() == 2 && VM_Version::supports_avx512vl()); 4449 match(Set dst (ReplicateL (LoadL mem))); 4450 format %{ "vpbroadcastd $dst,$mem\t! replicate2L" %} 4451 ins_encode %{ 4452 int vector_len = 0; 4453 __ evpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4454 %} 4455 ins_pipe( pipe_slow ); 4456 %} 4457 4458 instruct Repl4L_mem_evex(vecY dst, memory mem) %{ 4459 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4460 match(Set dst (ReplicateL (LoadL mem))); 4461 format %{ "vpbroadcastd $dst,$mem\t! replicate4L" %} 4462 ins_encode %{ 4463 int vector_len = 1; 4464 __ evpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4465 %} 4466 ins_pipe( pipe_slow ); 4467 %} 4468 4469 instruct Repl8L_mem_evex(vecZ dst, memory mem) %{ 4470 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4471 match(Set dst (ReplicateL (LoadL mem))); 4472 format %{ "vpbroadcastd $dst,$mem\t! replicate8L" %} 4473 ins_encode %{ 4474 int vector_len = 2; 4475 __ evpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4476 %} 4477 ins_pipe( pipe_slow ); 4478 %} 4479 4480 instruct Repl8L_zero_evex(vecZ dst, immL0 zero) %{ 4481 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4482 match(Set dst (ReplicateL zero)); 4483 format %{ "vpxor $dst k0,$dst,$dst\t! replicate8L zero" %} 4484 ins_encode %{ 4485 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 4486 int vector_len = 2; 4487 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4488 %} 4489 ins_pipe( fpu_reg_reg ); 4490 %} 4491 4492 instruct Repl8F_evex(vecY dst, regF src) %{ 4493 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4494 match(Set dst (ReplicateF src)); 4495 format %{ "vbroadcastss $dst,$src\t! replicate8F" %} 4496 ins_encode %{ 4497 int vector_len = 1; 4498 __ evpbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4499 %} 4500 ins_pipe( pipe_slow ); 4501 %} 4502 4503 instruct Repl8F_mem_evex(vecY dst, memory mem) %{ 4504 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4505 match(Set dst (ReplicateF (LoadF mem))); 4506 format %{ "vbroadcastss $dst,$mem\t! replicate8F" %} 4507 ins_encode %{ 4508 int vector_len = 1; 4509 __ evpbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len); 4510 %} 4511 ins_pipe( pipe_slow ); 4512 %} 4513 4514 instruct Repl16F_evex(vecZ dst, regF src) %{ 4515 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4516 match(Set dst (ReplicateF src)); 4517 format %{ "vbroadcastss $dst,$src\t! replicate16F" %} 4518 ins_encode %{ 4519 int vector_len = 2; 4520 __ evpbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4521 %} 4522 ins_pipe( pipe_slow ); 4523 %} 4524 4525 instruct Repl16F_mem_evex(vecZ dst, memory mem) %{ 4526 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4527 match(Set dst (ReplicateF (LoadF mem))); 4528 format %{ "vbroadcastss $dst,$mem\t! replicate16F" %} 4529 ins_encode %{ 4530 int vector_len = 2; 4531 __ evpbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len); 4532 %} 4533 ins_pipe( pipe_slow ); 4534 %} 4535 4536 instruct Repl2F_zero_evex(vecD dst, immF0 zero) %{ 4537 predicate(n->as_Vector()->length() == 2 && UseAVX > 2); 4538 match(Set dst (ReplicateF zero)); 4539 format %{ "vpxor $dst k0,$dst,$dst\t! replicate2F zero" %} 4540 ins_encode %{ 4541 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4542 int vector_len = 2; 4543 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4544 %} 4545 ins_pipe( fpu_reg_reg ); 4546 %} 4547 4548 instruct Repl4F_zero_evex(vecX dst, immF0 zero) %{ 4549 predicate(n->as_Vector()->length() == 4 && UseAVX > 2); 4550 match(Set dst (ReplicateF zero)); 4551 format %{ "vpxor $dst k0,$dst,$dst\t! replicate4F zero" %} 4552 ins_encode %{ 4553 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4554 int vector_len = 2; 4555 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4556 %} 4557 ins_pipe( fpu_reg_reg ); 4558 %} 4559 4560 instruct Repl8F_zero_evex(vecY dst, immF0 zero) %{ 4561 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4562 match(Set dst (ReplicateF zero)); 4563 format %{ "vpxor $dst k0,$dst,$dst\t! replicate8F zero" %} 4564 ins_encode %{ 4565 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4566 int vector_len = 2; 4567 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4568 %} 4569 ins_pipe( fpu_reg_reg ); 4570 %} 4571 4572 instruct Repl16F_zero_evex(vecZ dst, immF0 zero) %{ 4573 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4574 match(Set dst (ReplicateF zero)); 4575 format %{ "vpxor $dst k0,$dst,$dst\t! replicate16F zero" %} 4576 ins_encode %{ 4577 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4578 int vector_len = 2; 4579 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4580 %} 4581 ins_pipe( fpu_reg_reg ); 4582 %} 4583 4584 instruct Repl4D_evex(vecY dst, regD src) %{ 4585 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4586 match(Set dst (ReplicateD src)); 4587 format %{ "vbroadcastsd $dst,$src\t! replicate4D" %} 4588 ins_encode %{ 4589 int vector_len = 1; 4590 __ evpbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4591 %} 4592 ins_pipe( pipe_slow ); 4593 %} 4594 4595 instruct Repl4D_mem_evex(vecY dst, memory mem) %{ 4596 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4597 match(Set dst (ReplicateD (LoadD mem))); 4598 format %{ "vbroadcastsd $dst,$mem\t! replicate4D" %} 4599 ins_encode %{ 4600 int vector_len = 1; 4601 __ evpbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len); 4602 %} 4603 ins_pipe( pipe_slow ); 4604 %} 4605 4606 instruct Repl8D_evex(vecZ dst, regD src) %{ 4607 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4608 match(Set dst (ReplicateD src)); 4609 format %{ "vbroadcastsd $dst,$src\t! replicate8D" %} 4610 ins_encode %{ 4611 int vector_len = 2; 4612 __ evpbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4613 %} 4614 ins_pipe( pipe_slow ); 4615 %} 4616 4617 instruct Repl8D_mem_evex(vecZ dst, memory mem) %{ 4618 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4619 match(Set dst (ReplicateD (LoadD mem))); 4620 format %{ "vbroadcastsd $dst,$mem\t! replicate8D" %} 4621 ins_encode %{ 4622 int vector_len = 2; 4623 __ evpbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len); 4624 %} 4625 ins_pipe( pipe_slow ); 4626 %} 4627 4628 instruct Repl2D_zero_evex(vecX dst, immD0 zero) %{ 4629 predicate(n->as_Vector()->length() == 2 && UseAVX > 2); 4630 match(Set dst (ReplicateD zero)); 4631 format %{ "vpxor $dst k0,$dst,$dst\t! replicate2D zero" %} 4632 ins_encode %{ 4633 // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation 4634 int vector_len = 2; 4635 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4636 %} 4637 ins_pipe( fpu_reg_reg ); 4638 %} 4639 4640 instruct Repl4D_zero_evex(vecY dst, immD0 zero) %{ 4641 predicate(n->as_Vector()->length() == 4 && UseAVX > 2); 4642 match(Set dst (ReplicateD zero)); 4643 format %{ "vpxor $dst k0,$dst,$dst\t! replicate4D zero" %} 4644 ins_encode %{ 4645 // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation 4646 int vector_len = 2; 4647 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4648 %} 4649 ins_pipe( fpu_reg_reg ); 4650 %} 4651 4652 instruct Repl8D_zero_evex(vecZ dst, immD0 zero) %{ 4653 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4654 match(Set dst (ReplicateD zero)); 4655 format %{ "vpxor $dst k0,$dst,$dst,vect512\t! replicate8D zero" %} 4656 ins_encode %{ 4657 // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation 4658 int vector_len = 2; 4659 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4660 %} 4661 ins_pipe( fpu_reg_reg ); 4662 %} 4663 4664 // ====================REDUCTION ARITHMETIC======================================= 4665 4666 instruct rsadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 4667 predicate(UseSSE > 2 && UseAVX == 0); 4668 match(Set dst (AddReductionVI src1 src2)); 4669 effect(TEMP tmp2, TEMP tmp); 4670 format %{ "movdqu $tmp2,$src2\n\t" 4671 "phaddd $tmp2,$tmp2\n\t" 4672 "movd $tmp,$src1\n\t" 4673 "paddd $tmp,$tmp2\n\t" 4674 "movd $dst,$tmp\t! add reduction2I" %} 4675 ins_encode %{ 4676 __ movdqu($tmp2$$XMMRegister, $src2$$XMMRegister); 4677 __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister); 4678 __ movdl($tmp$$XMMRegister, $src1$$Register); 4679 __ paddd($tmp$$XMMRegister, $tmp2$$XMMRegister); 4680 __ movdl($dst$$Register, $tmp$$XMMRegister); 4681 %} 4682 ins_pipe( pipe_slow ); 4683 %} 4684 4685 instruct rvadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 4686 predicate(VM_Version::supports_avxonly()); 4687 match(Set dst (AddReductionVI src1 src2)); 4688 effect(TEMP tmp, TEMP tmp2); 4689 format %{ "vphaddd $tmp,$src2,$src2\n\t" 4690 "movd $tmp2,$src1\n\t" 4691 "vpaddd $tmp2,$tmp2,$tmp\n\t" 4692 "movd $dst,$tmp2\t! add reduction2I" %} 4693 ins_encode %{ 4694 int vector_len = 0; 4695 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 4696 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4697 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 4698 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4699 %} 4700 ins_pipe( pipe_slow ); 4701 %} 4702 4703 instruct rvadd2I_reduction_reg_evex(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 4704 predicate(UseAVX > 2); 4705 match(Set dst (AddReductionVI src1 src2)); 4706 effect(TEMP tmp, TEMP tmp2); 4707 format %{ "pshufd $tmp2,$src2,0x1\n\t" 4708 "vpaddd $tmp,$src2,$tmp2\n\t" 4709 "movd $tmp2,$src1\n\t" 4710 "vpaddd $tmp2,$tmp,$tmp2\n\t" 4711 "movd $dst,$tmp2\t! add reduction2I" %} 4712 ins_encode %{ 4713 int vector_len = 0; 4714 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 4715 __ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4716 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4717 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4718 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4719 %} 4720 ins_pipe( pipe_slow ); 4721 %} 4722 4723 instruct rsadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 4724 predicate(UseSSE > 2 && UseAVX == 0); 4725 match(Set dst (AddReductionVI src1 src2)); 4726 effect(TEMP tmp, TEMP tmp2); 4727 format %{ "movdqu $tmp,$src2\n\t" 4728 "phaddd $tmp,$tmp\n\t" 4729 "phaddd $tmp,$tmp\n\t" 4730 "movd $tmp2,$src1\n\t" 4731 "paddd $tmp2,$tmp\n\t" 4732 "movd $dst,$tmp2\t! add reduction4I" %} 4733 ins_encode %{ 4734 __ movdqu($tmp$$XMMRegister, $src2$$XMMRegister); 4735 __ phaddd($tmp$$XMMRegister, $tmp$$XMMRegister); 4736 __ phaddd($tmp$$XMMRegister, $tmp$$XMMRegister); 4737 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4738 __ paddd($tmp2$$XMMRegister, $tmp$$XMMRegister); 4739 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4740 %} 4741 ins_pipe( pipe_slow ); 4742 %} 4743 4744 instruct rvadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 4745 predicate(VM_Version::supports_avxonly()); 4746 match(Set dst (AddReductionVI src1 src2)); 4747 effect(TEMP tmp, TEMP tmp2); 4748 format %{ "vphaddd $tmp,$src2,$src2\n\t" 4749 "vphaddd $tmp,$tmp,$tmp\n\t" 4750 "movd $tmp2,$src1\n\t" 4751 "vpaddd $tmp2,$tmp2,$tmp\n\t" 4752 "movd $dst,$tmp2\t! add reduction4I" %} 4753 ins_encode %{ 4754 int vector_len = 0; 4755 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 4756 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); 4757 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4758 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 4759 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4760 %} 4761 ins_pipe( pipe_slow ); 4762 %} 4763 4764 instruct rvadd4I_reduction_reg_evex(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 4765 predicate(UseAVX > 2); 4766 match(Set dst (AddReductionVI src1 src2)); 4767 effect(TEMP tmp, TEMP tmp2); 4768 format %{ "pshufd $tmp2,$src2,0xE\n\t" 4769 "vpaddd $tmp,$src2,$tmp2\n\t" 4770 "pshufd $tmp2,$tmp,0x1\n\t" 4771 "vpaddd $tmp,$tmp,$tmp2\n\t" 4772 "movd $tmp2,$src1\n\t" 4773 "vpaddd $tmp2,$tmp,$tmp2\n\t" 4774 "movd $dst,$tmp2\t! add reduction4I" %} 4775 ins_encode %{ 4776 int vector_len = 0; 4777 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 4778 __ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4779 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 4780 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4781 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4782 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4783 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4784 %} 4785 ins_pipe( pipe_slow ); 4786 %} 4787 4788 instruct rvadd8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 4789 predicate(VM_Version::supports_avxonly()); 4790 match(Set dst (AddReductionVI src1 src2)); 4791 effect(TEMP tmp, TEMP tmp2); 4792 format %{ "vphaddd $tmp,$src2,$src2\n\t" 4793 "vphaddd $tmp,$tmp,$tmp2\n\t" 4794 "vextracti128 $tmp2,$tmp\n\t" 4795 "vpaddd $tmp,$tmp,$tmp2\n\t" 4796 "movd $tmp2,$src1\n\t" 4797 "vpaddd $tmp2,$tmp2,$tmp\n\t" 4798 "movd $dst,$tmp2\t! add reduction8I" %} 4799 ins_encode %{ 4800 int vector_len = 1; 4801 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 4802 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4803 __ vextracti128h($tmp2$$XMMRegister, $tmp$$XMMRegister); 4804 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4805 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4806 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4807 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4808 %} 4809 ins_pipe( pipe_slow ); 4810 %} 4811 4812 instruct rvadd8I_reduction_reg_evex(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 4813 predicate(UseAVX > 2); 4814 match(Set dst (AddReductionVI src1 src2)); 4815 effect(TEMP tmp, TEMP tmp2); 4816 format %{ "vextracti128 $tmp,$src2\n\t" 4817 "vpaddd $tmp,$tmp,$src2\n\t" 4818 "pshufd $tmp2,$tmp,0xE\n\t" 4819 "vpaddd $tmp,$tmp,$tmp2\n\t" 4820 "pshufd $tmp2,$tmp,0x1\n\t" 4821 "vpaddd $tmp,$tmp,$tmp2\n\t" 4822 "movd $tmp2,$src1\n\t" 4823 "vpaddd $tmp2,$tmp,$tmp2\n\t" 4824 "movd $dst,$tmp2\t! add reduction8I" %} 4825 ins_encode %{ 4826 int vector_len = 0; 4827 __ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister); 4828 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 4829 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 4830 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4831 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 4832 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4833 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4834 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4835 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4836 %} 4837 ins_pipe( pipe_slow ); 4838 %} 4839 4840 instruct rvadd16I_reduction_reg_evex(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ 4841 predicate(UseAVX > 2); 4842 match(Set dst (AddReductionVI src1 src2)); 4843 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 4844 format %{ "vextracti64x4 $tmp3,$src2,0x1\n\t" 4845 "vpaddd $tmp3,$tmp3,$src2\n\t" 4846 "vextracti128 $tmp,$tmp3\n\t" 4847 "vpaddd $tmp,$tmp,$tmp3\n\t" 4848 "pshufd $tmp2,$tmp,0xE\n\t" 4849 "vpaddd $tmp,$tmp,$tmp2\n\t" 4850 "pshufd $tmp2,$tmp,0x1\n\t" 4851 "vpaddd $tmp,$tmp,$tmp2\n\t" 4852 "movd $tmp2,$src1\n\t" 4853 "vpaddd $tmp2,$tmp,$tmp2\n\t" 4854 "movd $dst,$tmp2\t! mul reduction16I" %} 4855 ins_encode %{ 4856 __ vextracti64x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 1); 4857 __ vpaddd($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); 4858 __ vextracti128h($tmp$$XMMRegister, $tmp3$$XMMRegister); 4859 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); 4860 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 4861 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4862 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 4863 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4864 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4865 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4866 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4867 %} 4868 ins_pipe( pipe_slow ); 4869 %} 4870 4871 #ifdef _LP64 4872 instruct rvadd2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, regF tmp, regF tmp2) %{ 4873 predicate(UseAVX > 2); 4874 match(Set dst (AddReductionVL src1 src2)); 4875 effect(TEMP tmp, TEMP tmp2); 4876 format %{ "pshufd $tmp2,$src2,0xE\n\t" 4877 "vpaddq $tmp,$src2,$tmp2\n\t" 4878 "movdq $tmp2,$src1\n\t" 4879 "vpaddq $tmp2,$tmp,$tmp2\n\t" 4880 "movdq $dst,$tmp2\t! add reduction2L" %} 4881 ins_encode %{ 4882 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 4883 __ vpaddq($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, 0); 4884 __ movdq($tmp2$$XMMRegister, $src1$$Register); 4885 __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4886 __ movdq($dst$$Register, $tmp2$$XMMRegister); 4887 %} 4888 ins_pipe( pipe_slow ); 4889 %} 4890 4891 instruct rvadd4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{ 4892 predicate(UseAVX > 2); 4893 match(Set dst (AddReductionVL src1 src2)); 4894 effect(TEMP tmp, TEMP tmp2); 4895 format %{ "vextracti128 $tmp,$src2\n\t" 4896 "vpaddq $tmp2,$tmp,$src2\n\t" 4897 "pshufd $tmp,$tmp2,0xE\n\t" 4898 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4899 "movdq $tmp,$src1\n\t" 4900 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4901 "movdq $dst,$tmp2\t! add reduction4L" %} 4902 ins_encode %{ 4903 __ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister); 4904 __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); 4905 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 4906 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4907 __ movdq($tmp$$XMMRegister, $src1$$Register); 4908 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4909 __ movdq($dst$$Register, $tmp2$$XMMRegister); 4910 %} 4911 ins_pipe( pipe_slow ); 4912 %} 4913 4914 instruct rvadd8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{ 4915 predicate(UseAVX > 2); 4916 match(Set dst (AddReductionVL src1 src2)); 4917 effect(TEMP tmp, TEMP tmp2); 4918 format %{ "vextracti64x4 $tmp2,$src2,0x1\n\t" 4919 "vpaddq $tmp2,$tmp2,$src2\n\t" 4920 "vextracti128 $tmp,$tmp2\n\t" 4921 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4922 "pshufd $tmp,$tmp2,0xE\n\t" 4923 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4924 "movdq $tmp,$src1\n\t" 4925 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4926 "movdq $dst,$tmp2\t! add reduction8L" %} 4927 ins_encode %{ 4928 __ vextracti64x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 1); 4929 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 4930 __ vextracti128h($tmp$$XMMRegister, $tmp2$$XMMRegister); 4931 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4932 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 4933 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4934 __ movdq($tmp$$XMMRegister, $src1$$Register); 4935 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4936 __ movdq($dst$$Register, $tmp2$$XMMRegister); 4937 %} 4938 ins_pipe( pipe_slow ); 4939 %} 4940 #endif 4941 4942 instruct rsadd2F_reduction_reg(regF dst, vecD src2, regF tmp) %{ 4943 predicate(UseSSE >= 1 && UseAVX == 0); 4944 match(Set dst (AddReductionVF dst src2)); 4945 effect(TEMP dst, TEMP tmp); 4946 format %{ "addss $dst,$src2\n\t" 4947 "pshufd $tmp,$src2,0x01\n\t" 4948 "addss $dst,$tmp\t! add reduction2F" %} 4949 ins_encode %{ 4950 __ addss($dst$$XMMRegister, $src2$$XMMRegister); 4951 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 4952 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 4953 %} 4954 ins_pipe( pipe_slow ); 4955 %} 4956 4957 instruct rvadd2F_reduction_reg(regF dst, vecD src2, regF tmp) %{ 4958 predicate(UseAVX > 0); 4959 match(Set dst (AddReductionVF dst src2)); 4960 effect(TEMP dst, TEMP tmp); 4961 format %{ "vaddss $dst,$dst,$src2\n\t" 4962 "pshufd $tmp,$src2,0x01\n\t" 4963 "vaddss $dst,$dst,$tmp\t! add reduction2F" %} 4964 ins_encode %{ 4965 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 4966 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 4967 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4968 %} 4969 ins_pipe( pipe_slow ); 4970 %} 4971 4972 instruct rsadd4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ 4973 predicate(UseSSE >= 1 && UseAVX == 0); 4974 match(Set dst (AddReductionVF dst src2)); 4975 effect(TEMP dst, TEMP tmp); 4976 format %{ "addss $dst,$src2\n\t" 4977 "pshufd $tmp,$src2,0x01\n\t" 4978 "addss $dst,$tmp\n\t" 4979 "pshufd $tmp,$src2,0x02\n\t" 4980 "addss $dst,$tmp\n\t" 4981 "pshufd $tmp,$src2,0x03\n\t" 4982 "addss $dst,$tmp\t! add reduction4F" %} 4983 ins_encode %{ 4984 __ addss($dst$$XMMRegister, $src2$$XMMRegister); 4985 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 4986 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 4987 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 4988 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 4989 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 4990 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 4991 %} 4992 ins_pipe( pipe_slow ); 4993 %} 4994 4995 instruct rvadd4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ 4996 predicate(UseAVX > 0); 4997 match(Set dst (AddReductionVF dst src2)); 4998 effect(TEMP tmp, TEMP dst); 4999 format %{ "vaddss $dst,dst,$src2\n\t" 5000 "pshufd $tmp,$src2,0x01\n\t" 5001 "vaddss $dst,$dst,$tmp\n\t" 5002 "pshufd $tmp,$src2,0x02\n\t" 5003 "vaddss $dst,$dst,$tmp\n\t" 5004 "pshufd $tmp,$src2,0x03\n\t" 5005 "vaddss $dst,$dst,$tmp\t! add reduction4F" %} 5006 ins_encode %{ 5007 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5008 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5009 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5010 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5011 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5012 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5013 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5014 %} 5015 ins_pipe( pipe_slow ); 5016 %} 5017 5018 instruct radd8F_reduction_reg(regF dst, vecY src2, regF tmp, regF tmp2) %{ 5019 predicate(UseAVX > 0); 5020 match(Set dst (AddReductionVF dst src2)); 5021 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5022 format %{ "vaddss $dst,$dst,$src2\n\t" 5023 "pshufd $tmp,$src2,0x01\n\t" 5024 "vaddss $dst,$dst,$tmp\n\t" 5025 "pshufd $tmp,$src2,0x02\n\t" 5026 "vaddss $dst,$dst,$tmp\n\t" 5027 "pshufd $tmp,$src2,0x03\n\t" 5028 "vaddss $dst,$dst,$tmp\n\t" 5029 "vextractf128 $tmp2,$src2\n\t" 5030 "vaddss $dst,$dst,$tmp2\n\t" 5031 "pshufd $tmp,$tmp2,0x01\n\t" 5032 "vaddss $dst,$dst,$tmp\n\t" 5033 "pshufd $tmp,$tmp2,0x02\n\t" 5034 "vaddss $dst,$dst,$tmp\n\t" 5035 "pshufd $tmp,$tmp2,0x03\n\t" 5036 "vaddss $dst,$dst,$tmp\t! add reduction8F" %} 5037 ins_encode %{ 5038 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5039 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5040 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5041 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5042 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5043 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5044 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5045 __ vextractf128h($tmp2$$XMMRegister, $src2$$XMMRegister); 5046 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5047 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5048 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5049 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5050 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5051 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5052 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5053 %} 5054 ins_pipe( pipe_slow ); 5055 %} 5056 5057 instruct radd16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{ 5058 predicate(UseAVX > 2); 5059 match(Set dst (AddReductionVF dst src2)); 5060 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5061 format %{ "vaddss $dst,$dst,$src2\n\t" 5062 "pshufd $tmp,$src2,0x01\n\t" 5063 "vaddss $dst,$dst,$tmp\n\t" 5064 "pshufd $tmp,$src2,0x02\n\t" 5065 "vaddss $dst,$dst,$tmp\n\t" 5066 "pshufd $tmp,$src2,0x03\n\t" 5067 "vaddss $dst,$dst,$tmp\n\t" 5068 "vextractf32x4 $tmp2,$src2, 0x1\n\t" 5069 "vaddss $dst,$dst,$tmp2\n\t" 5070 "pshufd $tmp,$tmp2,0x01\n\t" 5071 "vaddss $dst,$dst,$tmp\n\t" 5072 "pshufd $tmp,$tmp2,0x02\n\t" 5073 "vaddss $dst,$dst,$tmp\n\t" 5074 "pshufd $tmp,$tmp2,0x03\n\t" 5075 "vaddss $dst,$dst,$tmp\n\t" 5076 "vextractf32x4 $tmp2,$src2, 0x2\n\t" 5077 "vaddss $dst,$dst,$tmp2\n\t" 5078 "pshufd $tmp,$tmp2,0x01\n\t" 5079 "vaddss $dst,$dst,$tmp\n\t" 5080 "pshufd $tmp,$tmp2,0x02\n\t" 5081 "vaddss $dst,$dst,$tmp\n\t" 5082 "pshufd $tmp,$tmp2,0x03\n\t" 5083 "vaddss $dst,$dst,$tmp\n\t" 5084 "vextractf32x4 $tmp2,$src2, 0x3\n\t" 5085 "vaddss $dst,$dst,$tmp2\n\t" 5086 "pshufd $tmp,$tmp2,0x01\n\t" 5087 "vaddss $dst,$dst,$tmp\n\t" 5088 "pshufd $tmp,$tmp2,0x02\n\t" 5089 "vaddss $dst,$dst,$tmp\n\t" 5090 "pshufd $tmp,$tmp2,0x03\n\t" 5091 "vaddss $dst,$dst,$tmp\t! add reduction16F" %} 5092 ins_encode %{ 5093 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5094 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5095 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5096 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5097 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5098 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5099 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5100 __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5101 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5102 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5103 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5104 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5105 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5106 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5107 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5108 __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 5109 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5110 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5111 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5112 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5113 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5114 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5115 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5116 __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 5117 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5118 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5119 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5120 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5121 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5122 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5123 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5124 %} 5125 ins_pipe( pipe_slow ); 5126 %} 5127 5128 instruct rsadd2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ 5129 predicate(UseSSE >= 1 && UseAVX == 0); 5130 match(Set dst (AddReductionVD dst src2)); 5131 effect(TEMP tmp, TEMP dst); 5132 format %{ "addsd $dst,$src2\n\t" 5133 "pshufd $tmp,$src2,0xE\n\t" 5134 "addsd $dst,$tmp\t! add reduction2D" %} 5135 ins_encode %{ 5136 __ addsd($dst$$XMMRegister, $src2$$XMMRegister); 5137 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5138 __ addsd($dst$$XMMRegister, $tmp$$XMMRegister); 5139 %} 5140 ins_pipe( pipe_slow ); 5141 %} 5142 5143 instruct rvadd2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ 5144 predicate(UseAVX > 0); 5145 match(Set dst (AddReductionVD dst src2)); 5146 effect(TEMP tmp, TEMP dst); 5147 format %{ "vaddsd $dst,$dst,$src2\n\t" 5148 "pshufd $tmp,$src2,0xE\n\t" 5149 "vaddsd $dst,$dst,$tmp\t! add reduction2D" %} 5150 ins_encode %{ 5151 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5152 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5153 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5154 %} 5155 ins_pipe( pipe_slow ); 5156 %} 5157 5158 instruct rvadd4D_reduction_reg(regD dst, vecY src2, regD tmp, regD tmp2) %{ 5159 predicate(UseAVX > 0); 5160 match(Set dst (AddReductionVD dst src2)); 5161 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5162 format %{ "vaddsd $dst,$dst,$src2\n\t" 5163 "pshufd $tmp,$src2,0xE\n\t" 5164 "vaddsd $dst,$dst,$tmp\n\t" 5165 "vextractf32x4h $tmp2,$src2, 0x1\n\t" 5166 "vaddsd $dst,$dst,$tmp2\n\t" 5167 "pshufd $tmp,$tmp2,0xE\n\t" 5168 "vaddsd $dst,$dst,$tmp\t! add reduction4D" %} 5169 ins_encode %{ 5170 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5171 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5172 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5173 __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5174 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5175 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5176 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5177 %} 5178 ins_pipe( pipe_slow ); 5179 %} 5180 5181 instruct rvadd8D_reduction_reg(regD dst, vecZ src2, regD tmp, regD tmp2) %{ 5182 predicate(UseAVX > 2); 5183 match(Set dst (AddReductionVD dst src2)); 5184 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5185 format %{ "vaddsd $dst,$dst,$src2\n\t" 5186 "pshufd $tmp,$src2,0xE\n\t" 5187 "vaddsd $dst,$dst,$tmp\n\t" 5188 "vextractf32x4 $tmp2,$src2, 0x1\n\t" 5189 "vaddsd $dst,$dst,$tmp2\n\t" 5190 "pshufd $tmp,$tmp2,0xE\n\t" 5191 "vaddsd $dst,$dst,$tmp\n\t" 5192 "vextractf32x4 $tmp2,$src2, 0x2\n\t" 5193 "vaddsd $dst,$dst,$tmp2\n\t" 5194 "pshufd $tmp,$tmp2,0xE\n\t" 5195 "vaddsd $dst,$dst,$tmp\n\t" 5196 "vextractf32x4 $tmp2,$src2, 0x3\n\t" 5197 "vaddsd $dst,$dst,$tmp2\n\t" 5198 "pshufd $tmp,$tmp2,0xE\n\t" 5199 "vaddsd $dst,$dst,$tmp\t! add reduction8D" %} 5200 ins_encode %{ 5201 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5202 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5203 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5204 __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5205 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5206 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5207 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5208 __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 5209 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5210 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5211 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5212 __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 5213 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5214 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5215 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5216 %} 5217 ins_pipe( pipe_slow ); 5218 %} 5219 5220 instruct rsmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 5221 predicate(UseSSE > 3 && UseAVX == 0); 5222 match(Set dst (MulReductionVI src1 src2)); 5223 effect(TEMP tmp, TEMP tmp2); 5224 format %{ "pshufd $tmp2,$src2,0x1\n\t" 5225 "pmulld $tmp2,$src2\n\t" 5226 "movd $tmp,$src1\n\t" 5227 "pmulld $tmp2,$tmp\n\t" 5228 "movd $dst,$tmp2\t! mul reduction2I" %} 5229 ins_encode %{ 5230 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5231 __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister); 5232 __ movdl($tmp$$XMMRegister, $src1$$Register); 5233 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 5234 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5235 %} 5236 ins_pipe( pipe_slow ); 5237 %} 5238 5239 instruct rvmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 5240 predicate(UseAVX > 0); 5241 match(Set dst (MulReductionVI src1 src2)); 5242 effect(TEMP tmp, TEMP tmp2); 5243 format %{ "pshufd $tmp2,$src2,0x1\n\t" 5244 "vpmulld $tmp,$src2,$tmp2\n\t" 5245 "movd $tmp2,$src1\n\t" 5246 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5247 "movd $dst,$tmp2\t! mul reduction2I" %} 5248 ins_encode %{ 5249 int vector_len = 0; 5250 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5251 __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5252 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5253 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5254 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5255 %} 5256 ins_pipe( pipe_slow ); 5257 %} 5258 5259 instruct rsmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 5260 predicate(UseSSE > 3 && UseAVX == 0); 5261 match(Set dst (MulReductionVI src1 src2)); 5262 effect(TEMP tmp, TEMP tmp2); 5263 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5264 "pmulld $tmp2,$src2\n\t" 5265 "pshufd $tmp,$tmp2,0x1\n\t" 5266 "pmulld $tmp2,$tmp\n\t" 5267 "movd $tmp,$src1\n\t" 5268 "pmulld $tmp2,$tmp\n\t" 5269 "movd $dst,$tmp2\t! mul reduction4I" %} 5270 ins_encode %{ 5271 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5272 __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister); 5273 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x1); 5274 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 5275 __ movdl($tmp$$XMMRegister, $src1$$Register); 5276 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 5277 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5278 %} 5279 ins_pipe( pipe_slow ); 5280 %} 5281 5282 instruct rvmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 5283 predicate(UseAVX > 0); 5284 match(Set dst (MulReductionVI src1 src2)); 5285 effect(TEMP tmp, TEMP tmp2); 5286 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5287 "vpmulld $tmp,$src2,$tmp2\n\t" 5288 "pshufd $tmp2,$tmp,0x1\n\t" 5289 "vpmulld $tmp,$tmp,$tmp2\n\t" 5290 "movd $tmp2,$src1\n\t" 5291 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5292 "movd $dst,$tmp2\t! mul reduction4I" %} 5293 ins_encode %{ 5294 int vector_len = 0; 5295 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5296 __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5297 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5298 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5299 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5300 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5301 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5302 %} 5303 ins_pipe( pipe_slow ); 5304 %} 5305 5306 instruct rvmul8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 5307 predicate(UseAVX > 0); 5308 match(Set dst (MulReductionVI src1 src2)); 5309 effect(TEMP tmp, TEMP tmp2); 5310 format %{ "vextracti128 $tmp,$src2\n\t" 5311 "vpmulld $tmp,$tmp,$src2\n\t" 5312 "pshufd $tmp2,$tmp,0xE\n\t" 5313 "vpmulld $tmp,$tmp,$tmp2\n\t" 5314 "pshufd $tmp2,$tmp,0x1\n\t" 5315 "vpmulld $tmp,$tmp,$tmp2\n\t" 5316 "movd $tmp2,$src1\n\t" 5317 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5318 "movd $dst,$tmp2\t! mul reduction8I" %} 5319 ins_encode %{ 5320 int vector_len = 0; 5321 __ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister); 5322 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 5323 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5324 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5325 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5326 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5327 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5328 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5329 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5330 %} 5331 ins_pipe( pipe_slow ); 5332 %} 5333 5334 instruct rvmul16I_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ 5335 predicate(UseAVX > 2); 5336 match(Set dst (MulReductionVI src1 src2)); 5337 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 5338 format %{ "vextracti64x4 $tmp3,$src2,0x1\n\t" 5339 "vpmulld $tmp3,$tmp3,$src2\n\t" 5340 "vextracti128 $tmp,$tmp3\n\t" 5341 "vpmulld $tmp,$tmp,$src2\n\t" 5342 "pshufd $tmp2,$tmp,0xE\n\t" 5343 "vpmulld $tmp,$tmp,$tmp2\n\t" 5344 "pshufd $tmp2,$tmp,0x1\n\t" 5345 "vpmulld $tmp,$tmp,$tmp2\n\t" 5346 "movd $tmp2,$src1\n\t" 5347 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5348 "movd $dst,$tmp2\t! mul reduction16I" %} 5349 ins_encode %{ 5350 __ vextracti64x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 1); 5351 __ vpmulld($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); 5352 __ vextracti128h($tmp$$XMMRegister, $tmp3$$XMMRegister); 5353 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); 5354 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5355 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5356 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5357 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5358 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5359 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5360 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5361 %} 5362 ins_pipe( pipe_slow ); 5363 %} 5364 5365 #ifdef _LP64 5366 instruct rvmul2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, regF tmp, regF tmp2) %{ 5367 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 5368 match(Set dst (MulReductionVL src1 src2)); 5369 effect(TEMP tmp, TEMP tmp2); 5370 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5371 "vpmullq $tmp,$src2,$tmp2\n\t" 5372 "movdq $tmp2,$src1\n\t" 5373 "vpmullq $tmp2,$tmp,$tmp2\n\t" 5374 "movdq $dst,$tmp2\t! mul reduction2L" %} 5375 ins_encode %{ 5376 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5377 __ vpmullq($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, 0); 5378 __ movdq($tmp2$$XMMRegister, $src1$$Register); 5379 __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5380 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5381 %} 5382 ins_pipe( pipe_slow ); 5383 %} 5384 5385 instruct rvmul4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{ 5386 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 5387 match(Set dst (MulReductionVL src1 src2)); 5388 effect(TEMP tmp, TEMP tmp2); 5389 format %{ "vextracti128 $tmp,$src2\n\t" 5390 "vpmullq $tmp2,$tmp,$src2\n\t" 5391 "pshufd $tmp,$tmp2,0xE\n\t" 5392 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5393 "movdq $tmp,$src1\n\t" 5394 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5395 "movdq $dst,$tmp2\t! mul reduction4L" %} 5396 ins_encode %{ 5397 __ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister); 5398 __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); 5399 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5400 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5401 __ movdq($tmp$$XMMRegister, $src1$$Register); 5402 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5403 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5404 %} 5405 ins_pipe( pipe_slow ); 5406 %} 5407 5408 instruct rvmul8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{ 5409 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 5410 match(Set dst (MulReductionVL src1 src2)); 5411 effect(TEMP tmp, TEMP tmp2); 5412 format %{ "vextracti64x4 $tmp2,$src2,0x1\n\t" 5413 "vpmullq $tmp2,$tmp2,$src2\n\t" 5414 "vextracti128 $tmp,$tmp2\n\t" 5415 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5416 "pshufd $tmp,$tmp2,0xE\n\t" 5417 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5418 "movdq $tmp,$src1\n\t" 5419 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5420 "movdq $dst,$tmp2\t! mul reduction8L" %} 5421 ins_encode %{ 5422 __ vextracti64x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 1); 5423 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 5424 __ vextracti128h($tmp$$XMMRegister, $tmp2$$XMMRegister); 5425 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5426 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5427 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5428 __ movdq($tmp$$XMMRegister, $src1$$Register); 5429 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5430 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5431 %} 5432 ins_pipe( pipe_slow ); 5433 %} 5434 #endif 5435 5436 instruct rsmul2F_reduction(regF dst, vecD src2, regF tmp) %{ 5437 predicate(UseSSE >= 1 && UseAVX == 0); 5438 match(Set dst (MulReductionVF dst src2)); 5439 effect(TEMP dst, TEMP tmp); 5440 format %{ "mulss $dst,$src2\n\t" 5441 "pshufd $tmp,$src2,0x01\n\t" 5442 "mulss $dst,$tmp\t! mul reduction2F" %} 5443 ins_encode %{ 5444 __ mulss($dst$$XMMRegister, $src2$$XMMRegister); 5445 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5446 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5447 %} 5448 ins_pipe( pipe_slow ); 5449 %} 5450 5451 instruct rvmul2F_reduction_reg(regF dst, vecD src2, regF tmp) %{ 5452 predicate(UseAVX > 0); 5453 match(Set dst (MulReductionVF dst src2)); 5454 effect(TEMP tmp, TEMP dst); 5455 format %{ "vmulss $dst,$dst,$src2\n\t" 5456 "pshufd $tmp,$src2,0x01\n\t" 5457 "vmulss $dst,$dst,$tmp\t! mul reduction2F" %} 5458 ins_encode %{ 5459 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5460 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5461 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5462 %} 5463 ins_pipe( pipe_slow ); 5464 %} 5465 5466 instruct rsmul4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ 5467 predicate(UseSSE >= 1 && UseAVX == 0); 5468 match(Set dst (MulReductionVF dst src2)); 5469 effect(TEMP dst, TEMP tmp); 5470 format %{ "mulss $dst,$src2\n\t" 5471 "pshufd $tmp,$src2,0x01\n\t" 5472 "mulss $dst,$tmp\n\t" 5473 "pshufd $tmp,$src2,0x02\n\t" 5474 "mulss $dst,$tmp\n\t" 5475 "pshufd $tmp,$src2,0x03\n\t" 5476 "mulss $dst,$tmp\t! mul reduction4F" %} 5477 ins_encode %{ 5478 __ mulss($dst$$XMMRegister, $src2$$XMMRegister); 5479 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5480 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5481 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5482 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5483 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5484 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5485 %} 5486 ins_pipe( pipe_slow ); 5487 %} 5488 5489 instruct rvmul4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ 5490 predicate(UseAVX > 0); 5491 match(Set dst (MulReductionVF dst src2)); 5492 effect(TEMP tmp, TEMP dst); 5493 format %{ "vmulss $dst,$dst,$src2\n\t" 5494 "pshufd $tmp,$src2,0x01\n\t" 5495 "vmulss $dst,$dst,$tmp\n\t" 5496 "pshufd $tmp,$src2,0x02\n\t" 5497 "vmulss $dst,$dst,$tmp\n\t" 5498 "pshufd $tmp,$src2,0x03\n\t" 5499 "vmulss $dst,$dst,$tmp\t! mul reduction4F" %} 5500 ins_encode %{ 5501 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5502 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5503 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5504 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5505 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5506 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5507 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5508 %} 5509 ins_pipe( pipe_slow ); 5510 %} 5511 5512 instruct rvmul8F_reduction_reg(regF dst, vecY src2, regF tmp, regF tmp2) %{ 5513 predicate(UseAVX > 0); 5514 match(Set dst (MulReductionVF dst src2)); 5515 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5516 format %{ "vmulss $dst,$dst,$src2\n\t" 5517 "pshufd $tmp,$src2,0x01\n\t" 5518 "vmulss $dst,$dst,$tmp\n\t" 5519 "pshufd $tmp,$src2,0x02\n\t" 5520 "vmulss $dst,$dst,$tmp\n\t" 5521 "pshufd $tmp,$src2,0x03\n\t" 5522 "vmulss $dst,$dst,$tmp\n\t" 5523 "vextractf128 $tmp2,$src2\n\t" 5524 "vmulss $dst,$dst,$tmp2\n\t" 5525 "pshufd $tmp,$tmp2,0x01\n\t" 5526 "vmulss $dst,$dst,$tmp\n\t" 5527 "pshufd $tmp,$tmp2,0x02\n\t" 5528 "vmulss $dst,$dst,$tmp\n\t" 5529 "pshufd $tmp,$tmp2,0x03\n\t" 5530 "vmulss $dst,$dst,$tmp\t! mul reduction8F" %} 5531 ins_encode %{ 5532 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5533 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5534 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5535 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5536 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5537 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5538 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5539 __ vextractf128h($tmp2$$XMMRegister, $src2$$XMMRegister); 5540 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5541 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5542 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5543 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5544 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5545 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5546 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5547 %} 5548 ins_pipe( pipe_slow ); 5549 %} 5550 5551 instruct rvmul16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{ 5552 predicate(UseAVX > 2); 5553 match(Set dst (MulReductionVF dst src2)); 5554 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5555 format %{ "vmulss $dst,$dst,$src2\n\t" 5556 "pshufd $tmp,$src2,0x01\n\t" 5557 "vmulss $dst,$dst,$tmp\n\t" 5558 "pshufd $tmp,$src2,0x02\n\t" 5559 "vmulss $dst,$dst,$tmp\n\t" 5560 "pshufd $tmp,$src2,0x03\n\t" 5561 "vmulss $dst,$dst,$tmp\n\t" 5562 "vextractf32x4 $tmp2,$src2, 0x1\n\t" 5563 "vmulss $dst,$dst,$tmp2\n\t" 5564 "pshufd $tmp,$tmp2,0x01\n\t" 5565 "vmulss $dst,$dst,$tmp\n\t" 5566 "pshufd $tmp,$tmp2,0x02\n\t" 5567 "vmulss $dst,$dst,$tmp\n\t" 5568 "pshufd $tmp,$tmp2,0x03\n\t" 5569 "vmulss $dst,$dst,$tmp\n\t" 5570 "vextractf32x4 $tmp2,$src2, 0x2\n\t" 5571 "vmulss $dst,$dst,$tmp2\n\t" 5572 "pshufd $tmp,$tmp2,0x01\n\t" 5573 "vmulss $dst,$dst,$tmp\n\t" 5574 "pshufd $tmp,$tmp2,0x02\n\t" 5575 "vmulss $dst,$dst,$tmp\n\t" 5576 "pshufd $tmp,$tmp2,0x03\n\t" 5577 "vmulss $dst,$dst,$tmp\n\t" 5578 "vextractf32x4 $tmp2,$src2, 0x3\n\t" 5579 "vmulss $dst,$dst,$tmp2\n\t" 5580 "pshufd $tmp,$tmp2,0x01\n\t" 5581 "vmulss $dst,$dst,$tmp\n\t" 5582 "pshufd $tmp,$tmp2,0x02\n\t" 5583 "vmulss $dst,$dst,$tmp\n\t" 5584 "pshufd $tmp,$tmp2,0x03\n\t" 5585 "vmulss $dst,$dst,$tmp\t! mul reduction16F" %} 5586 ins_encode %{ 5587 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5588 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5589 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5590 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5591 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5592 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5593 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5594 __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5595 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5596 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5597 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5598 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5599 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5600 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5601 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5602 __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 5603 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5604 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5605 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5606 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5607 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5608 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5609 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5610 __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 5611 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5612 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5613 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5614 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5615 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5616 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5617 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5618 %} 5619 ins_pipe( pipe_slow ); 5620 %} 5621 5622 instruct rsmul2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ 5623 predicate(UseSSE >= 1 && UseAVX == 0); 5624 match(Set dst (MulReductionVD dst src2)); 5625 effect(TEMP dst, TEMP tmp); 5626 format %{ "mulsd $dst,$src2\n\t" 5627 "pshufd $tmp,$src2,0xE\n\t" 5628 "mulsd $dst,$tmp\t! mul reduction2D" %} 5629 ins_encode %{ 5630 __ mulsd($dst$$XMMRegister, $src2$$XMMRegister); 5631 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5632 __ mulsd($dst$$XMMRegister, $tmp$$XMMRegister); 5633 %} 5634 ins_pipe( pipe_slow ); 5635 %} 5636 5637 instruct rvmul2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ 5638 predicate(UseAVX > 0); 5639 match(Set dst (MulReductionVD dst src2)); 5640 effect(TEMP tmp, TEMP dst); 5641 format %{ "vmulsd $dst,$dst,$src2\n\t" 5642 "pshufd $tmp,$src2,0xE\n\t" 5643 "vmulsd $dst,$dst,$tmp\t! mul reduction2D" %} 5644 ins_encode %{ 5645 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5646 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5647 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5648 %} 5649 ins_pipe( pipe_slow ); 5650 %} 5651 5652 instruct rvmul4D_reduction_reg(regD dst, vecY src2, regD tmp, regD tmp2) %{ 5653 predicate(UseAVX > 0); 5654 match(Set dst (MulReductionVD dst src2)); 5655 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5656 format %{ "vmulsd $dst,$dst,$src2\n\t" 5657 "pshufd $tmp,$src2,0xE\n\t" 5658 "vmulsd $dst,$dst,$tmp\n\t" 5659 "vextractf128 $tmp2,$src2\n\t" 5660 "vmulsd $dst,$dst,$tmp2\n\t" 5661 "pshufd $tmp,$tmp2,0xE\n\t" 5662 "vmulsd $dst,$dst,$tmp\t! mul reduction4D" %} 5663 ins_encode %{ 5664 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5665 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5666 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5667 __ vextractf128h($tmp2$$XMMRegister, $src2$$XMMRegister); 5668 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5669 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5670 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5671 %} 5672 ins_pipe( pipe_slow ); 5673 %} 5674 5675 instruct rvmul8D_reduction_reg(regD dst, vecZ src2, regD tmp, regD tmp2) %{ 5676 predicate(UseAVX > 2); 5677 match(Set dst (MulReductionVD dst src2)); 5678 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5679 format %{ "vmulsd $dst,$dst,$src2\n\t" 5680 "pshufd $tmp,$src2,0xE\n\t" 5681 "vmulsd $dst,$dst,$tmp\n\t" 5682 "vextractf32x4 $tmp2,$src2, 0x1\n\t" 5683 "vmulsd $dst,$dst,$tmp2\n\t" 5684 "pshufd $tmp,$src2,0xE\n\t" 5685 "vmulsd $dst,$dst,$tmp\n\t" 5686 "vextractf32x4 $tmp2,$src2, 0x2\n\t" 5687 "vmulsd $dst,$dst,$tmp2\n\t" 5688 "pshufd $tmp,$tmp2,0xE\n\t" 5689 "vmulsd $dst,$dst,$tmp\n\t" 5690 "vextractf32x4 $tmp2,$src2, 0x3\n\t" 5691 "vmulsd $dst,$dst,$tmp2\n\t" 5692 "pshufd $tmp,$tmp2,0xE\n\t" 5693 "vmulsd $dst,$dst,$tmp\t! mul reduction8D" %} 5694 ins_encode %{ 5695 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5696 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5697 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5698 __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5699 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5700 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5701 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5702 __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 5703 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5704 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5705 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5706 __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 5707 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5708 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5709 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5710 %} 5711 ins_pipe( pipe_slow ); 5712 %} 5713 5714 // ====================VECTOR ARITHMETIC======================================= 5715 5716 // --------------------------------- ADD -------------------------------------- 5717 5718 // Bytes vector add 5719 instruct vadd4B(vecS dst, vecS src) %{ 5720 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 5721 match(Set dst (AddVB dst src)); 5722 format %{ "paddb $dst,$src\t! add packed4B" %} 5723 ins_encode %{ 5724 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5725 %} 5726 ins_pipe( pipe_slow ); 5727 %} 5728 5729 instruct vadd4B_reg_avx(vecS dst, vecS src1, vecS src2) %{ 5730 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 5731 match(Set dst (AddVB src1 src2)); 5732 format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %} 5733 ins_encode %{ 5734 int vector_len = 0; 5735 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5736 %} 5737 ins_pipe( pipe_slow ); 5738 %} 5739 5740 instruct vadd4B_reg_evex(vecS dst, vecS src1, vecS src2) %{ 5741 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 5742 match(Set dst (AddVB src1 src2)); 5743 format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %} 5744 ins_encode %{ 5745 int vector_len = 0; 5746 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5747 %} 5748 ins_pipe( pipe_slow ); 5749 %} 5750 5751 instruct vadd4B_reg_evex_special(vecS dst, vecS src1, vecS src2) %{ 5752 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 5753 match(Set dst (AddVB dst src2)); 5754 effect(TEMP src1); 5755 format %{ "vpaddb $dst,$dst,$src2\t! add packed4B" %} 5756 ins_encode %{ 5757 int vector_len = 0; 5758 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5759 %} 5760 ins_pipe( pipe_slow ); 5761 %} 5762 5763 instruct vadd4B_mem_avx(vecS dst, vecS src, memory mem) %{ 5764 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 5765 match(Set dst (AddVB src (LoadVector mem))); 5766 format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %} 5767 ins_encode %{ 5768 int vector_len = 0; 5769 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5770 %} 5771 ins_pipe( pipe_slow ); 5772 %} 5773 5774 instruct vadd4B_mem_evex(vecS dst, vecS src, memory mem) %{ 5775 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 5776 match(Set dst (AddVB src (LoadVector mem))); 5777 format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %} 5778 ins_encode %{ 5779 int vector_len = 0; 5780 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5781 %} 5782 ins_pipe( pipe_slow ); 5783 %} 5784 5785 instruct vadd4B_mem_evex_special(vecS dst, vecS src, memory mem) %{ 5786 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 5787 match(Set dst (AddVB dst (LoadVector mem))); 5788 effect(TEMP src); 5789 format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %} 5790 ins_encode %{ 5791 int vector_len = 0; 5792 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5793 %} 5794 ins_pipe( pipe_slow ); 5795 %} 5796 5797 instruct vadd8B(vecD dst, vecD src) %{ 5798 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 5799 match(Set dst (AddVB dst src)); 5800 format %{ "paddb $dst,$src\t! add packed8B" %} 5801 ins_encode %{ 5802 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5803 %} 5804 ins_pipe( pipe_slow ); 5805 %} 5806 5807 instruct vadd8B_reg_avx(vecD dst, vecD src1, vecD src2) %{ 5808 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 5809 match(Set dst (AddVB src1 src2)); 5810 format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %} 5811 ins_encode %{ 5812 int vector_len = 0; 5813 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5814 %} 5815 ins_pipe( pipe_slow ); 5816 %} 5817 5818 instruct vadd8B_reg_evex(vecD dst, vecD src1, vecD src2) %{ 5819 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 5820 match(Set dst (AddVB src1 src2)); 5821 format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %} 5822 ins_encode %{ 5823 int vector_len = 0; 5824 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5825 %} 5826 ins_pipe( pipe_slow ); 5827 %} 5828 5829 instruct vadd8B_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 5830 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 5831 match(Set dst (AddVB dst src2)); 5832 effect(TEMP src1); 5833 format %{ "vpaddb $dst,$dst,$src2\t! add packed8B" %} 5834 ins_encode %{ 5835 int vector_len = 0; 5836 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5837 %} 5838 ins_pipe( pipe_slow ); 5839 %} 5840 5841 instruct vadd8B_mem_avx(vecD dst, vecD src, memory mem) %{ 5842 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 5843 match(Set dst (AddVB src (LoadVector mem))); 5844 format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %} 5845 ins_encode %{ 5846 int vector_len = 0; 5847 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5848 %} 5849 ins_pipe( pipe_slow ); 5850 %} 5851 5852 instruct vadd8B_mem_evex(vecD dst, vecD src, memory mem) %{ 5853 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 5854 match(Set dst (AddVB src (LoadVector mem))); 5855 format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %} 5856 ins_encode %{ 5857 int vector_len = 0; 5858 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5859 %} 5860 ins_pipe( pipe_slow ); 5861 %} 5862 5863 instruct vadd8B_mem_evex_special(vecD dst, vecD src, memory mem) %{ 5864 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 5865 match(Set dst (AddVB dst (LoadVector mem))); 5866 effect(TEMP src); 5867 format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %} 5868 ins_encode %{ 5869 int vector_len = 0; 5870 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5871 %} 5872 ins_pipe( pipe_slow ); 5873 %} 5874 5875 instruct vadd16B(vecX dst, vecX src) %{ 5876 predicate(UseAVX == 0 && n->as_Vector()->length() == 16); 5877 match(Set dst (AddVB dst src)); 5878 format %{ "paddb $dst,$src\t! add packed16B" %} 5879 ins_encode %{ 5880 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5881 %} 5882 ins_pipe( pipe_slow ); 5883 %} 5884 5885 instruct vadd16B_reg_avx(vecX dst, vecX src1, vecX src2) %{ 5886 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); 5887 match(Set dst (AddVB src1 src2)); 5888 format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %} 5889 ins_encode %{ 5890 int vector_len = 0; 5891 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5892 %} 5893 ins_pipe( pipe_slow ); 5894 %} 5895 5896 instruct vadd16B_reg_evex(vecX dst, vecX src1, vecX src2) %{ 5897 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 5898 match(Set dst (AddVB src1 src2)); 5899 format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %} 5900 ins_encode %{ 5901 int vector_len = 0; 5902 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5903 %} 5904 ins_pipe( pipe_slow ); 5905 %} 5906 5907 instruct vadd16B_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 5908 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 5909 match(Set dst (AddVB dst src2)); 5910 effect(TEMP src1); 5911 format %{ "vpaddb $dst,$dst,$src2\t! add packed16B" %} 5912 ins_encode %{ 5913 int vector_len = 0; 5914 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5915 %} 5916 ins_pipe( pipe_slow ); 5917 %} 5918 5919 instruct vadd16B_mem_avx(vecX dst, vecX src, memory mem) %{ 5920 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); 5921 match(Set dst (AddVB src (LoadVector mem))); 5922 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} 5923 ins_encode %{ 5924 int vector_len = 0; 5925 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5926 %} 5927 ins_pipe( pipe_slow ); 5928 %} 5929 5930 instruct vadd16B_mem_evex(vecX dst, vecX src, memory mem) %{ 5931 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 5932 match(Set dst (AddVB src (LoadVector mem))); 5933 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} 5934 ins_encode %{ 5935 int vector_len = 0; 5936 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5937 %} 5938 ins_pipe( pipe_slow ); 5939 %} 5940 5941 instruct vadd16B_mem_evex_special(vecX dst, vecX src, memory mem) %{ 5942 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 5943 match(Set dst (AddVB dst (LoadVector mem))); 5944 effect(TEMP src); 5945 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} 5946 ins_encode %{ 5947 int vector_len = 0; 5948 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5949 %} 5950 ins_pipe( pipe_slow ); 5951 %} 5952 5953 instruct vadd32B_reg_avx(vecY dst, vecY src1, vecY src2) %{ 5954 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32); 5955 match(Set dst (AddVB src1 src2)); 5956 format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %} 5957 ins_encode %{ 5958 int vector_len = 1; 5959 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5960 %} 5961 ins_pipe( pipe_slow ); 5962 %} 5963 5964 instruct vadd32B_reg_evex(vecY dst, vecY src1, vecY src2) %{ 5965 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 5966 match(Set dst (AddVB src1 src2)); 5967 format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %} 5968 ins_encode %{ 5969 int vector_len = 1; 5970 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5971 %} 5972 ins_pipe( pipe_slow ); 5973 %} 5974 5975 instruct vadd32B_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 5976 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32); 5977 match(Set dst (AddVB dst src2)); 5978 effect(TEMP src1); 5979 format %{ "vpaddb $dst,$dst,$src2\t! add packed32B" %} 5980 ins_encode %{ 5981 int vector_len = 1; 5982 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5983 %} 5984 ins_pipe( pipe_slow ); 5985 %} 5986 5987 instruct vadd32B_mem_avx(vecY dst, vecY src, memory mem) %{ 5988 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32); 5989 match(Set dst (AddVB src (LoadVector mem))); 5990 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} 5991 ins_encode %{ 5992 int vector_len = 1; 5993 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5994 %} 5995 ins_pipe( pipe_slow ); 5996 %} 5997 5998 instruct vadd32B_mem_evex(vecY dst, vecY src, memory mem) %{ 5999 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6000 match(Set dst (AddVB src (LoadVector mem))); 6001 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} 6002 ins_encode %{ 6003 int vector_len = 1; 6004 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6005 %} 6006 ins_pipe( pipe_slow ); 6007 %} 6008 6009 instruct vadd32B_mem_evex_special(vecY dst, vecY src, memory mem) %{ 6010 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6011 match(Set dst (AddVB dst (LoadVector mem))); 6012 effect(TEMP src); 6013 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} 6014 ins_encode %{ 6015 int vector_len = 1; 6016 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6017 %} 6018 ins_pipe( pipe_slow ); 6019 %} 6020 6021 instruct vadd64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6022 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 6023 match(Set dst (AddVB src1 src2)); 6024 format %{ "vpaddb $dst,$src1,$src2\t! add packed64B" %} 6025 ins_encode %{ 6026 int vector_len = 2; 6027 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6028 %} 6029 ins_pipe( pipe_slow ); 6030 %} 6031 6032 instruct vadd64B_mem(vecZ dst, vecZ src, memory mem) %{ 6033 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 6034 match(Set dst (AddVB src (LoadVector mem))); 6035 format %{ "vpaddb $dst,$src,$mem\t! add packed64B" %} 6036 ins_encode %{ 6037 int vector_len = 2; 6038 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6039 %} 6040 ins_pipe( pipe_slow ); 6041 %} 6042 6043 // Shorts/Chars vector add 6044 instruct vadd2S(vecS dst, vecS src) %{ 6045 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 6046 match(Set dst (AddVS dst src)); 6047 format %{ "paddw $dst,$src\t! add packed2S" %} 6048 ins_encode %{ 6049 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 6050 %} 6051 ins_pipe( pipe_slow ); 6052 %} 6053 6054 instruct vadd2S_reg_avx(vecS dst, vecS src1, vecS src2) %{ 6055 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 6056 match(Set dst (AddVS src1 src2)); 6057 format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %} 6058 ins_encode %{ 6059 int vector_len = 0; 6060 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6061 %} 6062 ins_pipe( pipe_slow ); 6063 %} 6064 6065 instruct vadd2S_reg_evex(vecS dst, vecS src1, vecS src2) %{ 6066 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 6067 match(Set dst (AddVS src1 src2)); 6068 format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %} 6069 ins_encode %{ 6070 int vector_len = 0; 6071 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6072 %} 6073 ins_pipe( pipe_slow ); 6074 %} 6075 6076 instruct vadd2S_reg_evex_special(vecS dst, vecS src1, vecS src2) %{ 6077 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 6078 match(Set dst (AddVS dst src2)); 6079 effect(TEMP src1); 6080 format %{ "vpaddw $dst,$dst,$src2\t! add packed2S" %} 6081 ins_encode %{ 6082 int vector_len = 0; 6083 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6084 %} 6085 ins_pipe( pipe_slow ); 6086 %} 6087 6088 instruct vadd2S_mem_avx(vecS dst, vecS src, memory mem) %{ 6089 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 6090 match(Set dst (AddVS src (LoadVector mem))); 6091 format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %} 6092 ins_encode %{ 6093 int vector_len = 0; 6094 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6095 %} 6096 ins_pipe( pipe_slow ); 6097 %} 6098 6099 instruct vadd2S_mem_evex(vecS dst, vecS src, memory mem) %{ 6100 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 6101 match(Set dst (AddVS src (LoadVector mem))); 6102 format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %} 6103 ins_encode %{ 6104 int vector_len = 0; 6105 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6106 %} 6107 ins_pipe( pipe_slow ); 6108 %} 6109 6110 instruct vadd2S_mem_evex_special(vecS dst, vecS src, memory mem) %{ 6111 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 6112 match(Set dst (AddVS dst (LoadVector mem))); 6113 effect(TEMP src); 6114 format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %} 6115 ins_encode %{ 6116 int vector_len = 0; 6117 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6118 %} 6119 ins_pipe( pipe_slow ); 6120 %} 6121 6122 instruct vadd4S(vecD dst, vecD src) %{ 6123 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 6124 match(Set dst (AddVS dst src)); 6125 format %{ "paddw $dst,$src\t! add packed4S" %} 6126 ins_encode %{ 6127 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 6128 %} 6129 ins_pipe( pipe_slow ); 6130 %} 6131 6132 instruct vadd4S_reg_avx(vecD dst, vecD src1, vecD src2) %{ 6133 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 6134 match(Set dst (AddVS src1 src2)); 6135 format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %} 6136 ins_encode %{ 6137 int vector_len = 0; 6138 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6139 %} 6140 ins_pipe( pipe_slow ); 6141 %} 6142 6143 instruct vadd4S_reg_evex(vecD dst, vecD src1, vecD src2) %{ 6144 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 6145 match(Set dst (AddVS src1 src2)); 6146 format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %} 6147 ins_encode %{ 6148 int vector_len = 0; 6149 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6150 %} 6151 ins_pipe( pipe_slow ); 6152 %} 6153 6154 instruct vadd4S_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 6155 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 6156 match(Set dst (AddVS dst src2)); 6157 effect(TEMP src1); 6158 format %{ "vpaddw $dst,$dst,$src2\t! add packed4S" %} 6159 ins_encode %{ 6160 int vector_len = 0; 6161 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6162 %} 6163 ins_pipe( pipe_slow ); 6164 %} 6165 6166 instruct vadd4S_mem_avx(vecD dst, vecD src, memory mem) %{ 6167 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 6168 match(Set dst (AddVS src (LoadVector mem))); 6169 format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %} 6170 ins_encode %{ 6171 int vector_len = 0; 6172 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6173 %} 6174 ins_pipe( pipe_slow ); 6175 %} 6176 6177 instruct vadd4S_mem_evex(vecD dst, vecD src, memory mem) %{ 6178 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 6179 match(Set dst (AddVS src (LoadVector mem))); 6180 format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %} 6181 ins_encode %{ 6182 int vector_len = 0; 6183 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6184 %} 6185 ins_pipe( pipe_slow ); 6186 %} 6187 6188 instruct vadd4S_mem_evex_special(vecD dst, vecD src, memory mem) %{ 6189 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 6190 match(Set dst (AddVS dst (LoadVector mem))); 6191 effect(TEMP src); 6192 format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %} 6193 ins_encode %{ 6194 int vector_len = 0; 6195 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6196 %} 6197 ins_pipe( pipe_slow ); 6198 %} 6199 6200 instruct vadd8S(vecX dst, vecX src) %{ 6201 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 6202 match(Set dst (AddVS dst src)); 6203 format %{ "paddw $dst,$src\t! add packed8S" %} 6204 ins_encode %{ 6205 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 6206 %} 6207 ins_pipe( pipe_slow ); 6208 %} 6209 6210 instruct vadd8S_reg_avx(vecX dst, vecX src1, vecX src2) %{ 6211 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 6212 match(Set dst (AddVS src1 src2)); 6213 format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %} 6214 ins_encode %{ 6215 int vector_len = 0; 6216 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6217 %} 6218 ins_pipe( pipe_slow ); 6219 %} 6220 6221 instruct vadd8S_reg_evex(vecX dst, vecX src1, vecX src2) %{ 6222 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 6223 match(Set dst (AddVS src1 src2)); 6224 format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %} 6225 ins_encode %{ 6226 int vector_len = 0; 6227 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6228 %} 6229 ins_pipe( pipe_slow ); 6230 %} 6231 6232 instruct vadd8S_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 6233 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 6234 match(Set dst (AddVS dst src2)); 6235 effect(TEMP src1); 6236 format %{ "vpaddw $dst,$dst,$src2\t! add packed8S" %} 6237 ins_encode %{ 6238 int vector_len = 0; 6239 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6240 %} 6241 ins_pipe( pipe_slow ); 6242 %} 6243 6244 instruct vadd8S_mem_avx(vecX dst, vecX src, memory mem) %{ 6245 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 6246 match(Set dst (AddVS src (LoadVector mem))); 6247 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} 6248 ins_encode %{ 6249 int vector_len = 0; 6250 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6251 %} 6252 ins_pipe( pipe_slow ); 6253 %} 6254 6255 instruct vadd8S_mem_evex(vecX dst, vecX src, memory mem) %{ 6256 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 6257 match(Set dst (AddVS src (LoadVector mem))); 6258 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} 6259 ins_encode %{ 6260 int vector_len = 0; 6261 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6262 %} 6263 ins_pipe( pipe_slow ); 6264 %} 6265 6266 instruct vadd8S_mem_evex_special(vecX dst, vecX src, memory mem) %{ 6267 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 6268 match(Set dst (AddVS dst (LoadVector mem))); 6269 effect(TEMP src); 6270 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} 6271 ins_encode %{ 6272 int vector_len = 0; 6273 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6274 %} 6275 ins_pipe( pipe_slow ); 6276 %} 6277 6278 instruct vadd16S_reg_avx(vecY dst, vecY src1, vecY src2) %{ 6279 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 6280 match(Set dst (AddVS src1 src2)); 6281 format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %} 6282 ins_encode %{ 6283 int vector_len = 1; 6284 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6285 %} 6286 ins_pipe( pipe_slow ); 6287 %} 6288 6289 instruct vadd16S_reg_evex(vecY dst, vecY src1, vecY src2) %{ 6290 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 6291 match(Set dst (AddVS src1 src2)); 6292 format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %} 6293 ins_encode %{ 6294 int vector_len = 1; 6295 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6296 %} 6297 ins_pipe( pipe_slow ); 6298 %} 6299 6300 instruct vadd16S_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 6301 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 6302 match(Set dst (AddVS dst src2)); 6303 effect(TEMP src1); 6304 format %{ "vpaddw $dst,$dst,$src2\t! add packed16S" %} 6305 ins_encode %{ 6306 int vector_len = 1; 6307 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6308 %} 6309 ins_pipe( pipe_slow ); 6310 %} 6311 6312 instruct vadd16S_mem_avx(vecY dst, vecY src, memory mem) %{ 6313 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 6314 match(Set dst (AddVS src (LoadVector mem))); 6315 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} 6316 ins_encode %{ 6317 int vector_len = 1; 6318 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6319 %} 6320 ins_pipe( pipe_slow ); 6321 %} 6322 6323 instruct vadd16S_mem_evex(vecY dst, vecY src, memory mem) %{ 6324 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 6325 match(Set dst (AddVS src (LoadVector mem))); 6326 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} 6327 ins_encode %{ 6328 int vector_len = 1; 6329 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6330 %} 6331 ins_pipe( pipe_slow ); 6332 %} 6333 6334 instruct vadd16S_mem_evex_special(vecY dst, vecY src, memory mem) %{ 6335 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 6336 match(Set dst (AddVS dst (LoadVector mem))); 6337 effect(TEMP src); 6338 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} 6339 ins_encode %{ 6340 int vector_len = 1; 6341 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6342 %} 6343 ins_pipe( pipe_slow ); 6344 %} 6345 6346 instruct vadd32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6347 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6348 match(Set dst (AddVS src1 src2)); 6349 format %{ "vpaddw $dst,$src1,$src2\t! add packed32S" %} 6350 ins_encode %{ 6351 int vector_len = 2; 6352 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6353 %} 6354 ins_pipe( pipe_slow ); 6355 %} 6356 6357 instruct vadd32S_mem(vecZ dst, vecZ src, memory mem) %{ 6358 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6359 match(Set dst (AddVS src (LoadVector mem))); 6360 format %{ "vpaddw $dst,$src,$mem\t! add packed32S" %} 6361 ins_encode %{ 6362 int vector_len = 2; 6363 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6364 %} 6365 ins_pipe( pipe_slow ); 6366 %} 6367 6368 // Integers vector add 6369 instruct vadd2I(vecD dst, vecD src) %{ 6370 predicate(n->as_Vector()->length() == 2); 6371 match(Set dst (AddVI dst src)); 6372 format %{ "paddd $dst,$src\t! add packed2I" %} 6373 ins_encode %{ 6374 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 6375 %} 6376 ins_pipe( pipe_slow ); 6377 %} 6378 6379 instruct vadd2I_reg(vecD dst, vecD src1, vecD src2) %{ 6380 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6381 match(Set dst (AddVI src1 src2)); 6382 format %{ "vpaddd $dst,$src1,$src2\t! add packed2I" %} 6383 ins_encode %{ 6384 int vector_len = 0; 6385 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6386 %} 6387 ins_pipe( pipe_slow ); 6388 %} 6389 6390 instruct vadd2I_mem(vecD dst, vecD src, memory mem) %{ 6391 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6392 match(Set dst (AddVI src (LoadVector mem))); 6393 format %{ "vpaddd $dst,$src,$mem\t! add packed2I" %} 6394 ins_encode %{ 6395 int vector_len = 0; 6396 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6397 %} 6398 ins_pipe( pipe_slow ); 6399 %} 6400 6401 instruct vadd4I(vecX dst, vecX src) %{ 6402 predicate(n->as_Vector()->length() == 4); 6403 match(Set dst (AddVI dst src)); 6404 format %{ "paddd $dst,$src\t! add packed4I" %} 6405 ins_encode %{ 6406 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 6407 %} 6408 ins_pipe( pipe_slow ); 6409 %} 6410 6411 instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{ 6412 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6413 match(Set dst (AddVI src1 src2)); 6414 format %{ "vpaddd $dst,$src1,$src2\t! add packed4I" %} 6415 ins_encode %{ 6416 int vector_len = 0; 6417 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6418 %} 6419 ins_pipe( pipe_slow ); 6420 %} 6421 6422 instruct vadd4I_mem(vecX dst, vecX src, memory mem) %{ 6423 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6424 match(Set dst (AddVI src (LoadVector mem))); 6425 format %{ "vpaddd $dst,$src,$mem\t! add packed4I" %} 6426 ins_encode %{ 6427 int vector_len = 0; 6428 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6429 %} 6430 ins_pipe( pipe_slow ); 6431 %} 6432 6433 instruct vadd8I_reg(vecY dst, vecY src1, vecY src2) %{ 6434 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 6435 match(Set dst (AddVI src1 src2)); 6436 format %{ "vpaddd $dst,$src1,$src2\t! add packed8I" %} 6437 ins_encode %{ 6438 int vector_len = 1; 6439 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6440 %} 6441 ins_pipe( pipe_slow ); 6442 %} 6443 6444 instruct vadd8I_mem(vecY dst, vecY src, memory mem) %{ 6445 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 6446 match(Set dst (AddVI src (LoadVector mem))); 6447 format %{ "vpaddd $dst,$src,$mem\t! add packed8I" %} 6448 ins_encode %{ 6449 int vector_len = 1; 6450 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6451 %} 6452 ins_pipe( pipe_slow ); 6453 %} 6454 6455 instruct vadd16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6456 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6457 match(Set dst (AddVI src1 src2)); 6458 format %{ "vpaddd $dst,$src1,$src2\t! add packed16I" %} 6459 ins_encode %{ 6460 int vector_len = 2; 6461 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6462 %} 6463 ins_pipe( pipe_slow ); 6464 %} 6465 6466 instruct vadd16I_mem(vecZ dst, vecZ src, memory mem) %{ 6467 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6468 match(Set dst (AddVI src (LoadVector mem))); 6469 format %{ "vpaddd $dst,$src,$mem\t! add packed16I" %} 6470 ins_encode %{ 6471 int vector_len = 2; 6472 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6473 %} 6474 ins_pipe( pipe_slow ); 6475 %} 6476 6477 // Longs vector add 6478 instruct vadd2L(vecX dst, vecX src) %{ 6479 predicate(n->as_Vector()->length() == 2); 6480 match(Set dst (AddVL dst src)); 6481 format %{ "paddq $dst,$src\t! add packed2L" %} 6482 ins_encode %{ 6483 __ paddq($dst$$XMMRegister, $src$$XMMRegister); 6484 %} 6485 ins_pipe( pipe_slow ); 6486 %} 6487 6488 instruct vadd2L_reg(vecX dst, vecX src1, vecX src2) %{ 6489 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6490 match(Set dst (AddVL src1 src2)); 6491 format %{ "vpaddq $dst,$src1,$src2\t! add packed2L" %} 6492 ins_encode %{ 6493 int vector_len = 0; 6494 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6495 %} 6496 ins_pipe( pipe_slow ); 6497 %} 6498 6499 instruct vadd2L_mem(vecX dst, vecX src, memory mem) %{ 6500 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6501 match(Set dst (AddVL src (LoadVector mem))); 6502 format %{ "vpaddq $dst,$src,$mem\t! add packed2L" %} 6503 ins_encode %{ 6504 int vector_len = 0; 6505 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6506 %} 6507 ins_pipe( pipe_slow ); 6508 %} 6509 6510 instruct vadd4L_reg(vecY dst, vecY src1, vecY src2) %{ 6511 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 6512 match(Set dst (AddVL src1 src2)); 6513 format %{ "vpaddq $dst,$src1,$src2\t! add packed4L" %} 6514 ins_encode %{ 6515 int vector_len = 1; 6516 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6517 %} 6518 ins_pipe( pipe_slow ); 6519 %} 6520 6521 instruct vadd4L_mem(vecY dst, vecY src, memory mem) %{ 6522 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 6523 match(Set dst (AddVL src (LoadVector mem))); 6524 format %{ "vpaddq $dst,$src,$mem\t! add packed4L" %} 6525 ins_encode %{ 6526 int vector_len = 1; 6527 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6528 %} 6529 ins_pipe( pipe_slow ); 6530 %} 6531 6532 instruct vadd8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6533 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6534 match(Set dst (AddVL src1 src2)); 6535 format %{ "vpaddq $dst,$src1,$src2\t! add packed8L" %} 6536 ins_encode %{ 6537 int vector_len = 2; 6538 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6539 %} 6540 ins_pipe( pipe_slow ); 6541 %} 6542 6543 instruct vadd8L_mem(vecZ dst, vecZ src, memory mem) %{ 6544 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6545 match(Set dst (AddVL src (LoadVector mem))); 6546 format %{ "vpaddq $dst,$src,$mem\t! add packed8L" %} 6547 ins_encode %{ 6548 int vector_len = 2; 6549 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6550 %} 6551 ins_pipe( pipe_slow ); 6552 %} 6553 6554 // Floats vector add 6555 instruct vadd2F(vecD dst, vecD src) %{ 6556 predicate(n->as_Vector()->length() == 2); 6557 match(Set dst (AddVF dst src)); 6558 format %{ "addps $dst,$src\t! add packed2F" %} 6559 ins_encode %{ 6560 __ addps($dst$$XMMRegister, $src$$XMMRegister); 6561 %} 6562 ins_pipe( pipe_slow ); 6563 %} 6564 6565 instruct vadd2F_reg(vecD dst, vecD src1, vecD src2) %{ 6566 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6567 match(Set dst (AddVF src1 src2)); 6568 format %{ "vaddps $dst,$src1,$src2\t! add packed2F" %} 6569 ins_encode %{ 6570 int vector_len = 0; 6571 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6572 %} 6573 ins_pipe( pipe_slow ); 6574 %} 6575 6576 instruct vadd2F_mem(vecD dst, vecD src, memory mem) %{ 6577 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6578 match(Set dst (AddVF src (LoadVector mem))); 6579 format %{ "vaddps $dst,$src,$mem\t! add packed2F" %} 6580 ins_encode %{ 6581 int vector_len = 0; 6582 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6583 %} 6584 ins_pipe( pipe_slow ); 6585 %} 6586 6587 instruct vadd4F(vecX dst, vecX src) %{ 6588 predicate(n->as_Vector()->length() == 4); 6589 match(Set dst (AddVF dst src)); 6590 format %{ "addps $dst,$src\t! add packed4F" %} 6591 ins_encode %{ 6592 __ addps($dst$$XMMRegister, $src$$XMMRegister); 6593 %} 6594 ins_pipe( pipe_slow ); 6595 %} 6596 6597 instruct vadd4F_reg(vecX dst, vecX src1, vecX src2) %{ 6598 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6599 match(Set dst (AddVF src1 src2)); 6600 format %{ "vaddps $dst,$src1,$src2\t! add packed4F" %} 6601 ins_encode %{ 6602 int vector_len = 0; 6603 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6604 %} 6605 ins_pipe( pipe_slow ); 6606 %} 6607 6608 instruct vadd4F_mem(vecX dst, vecX src, memory mem) %{ 6609 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6610 match(Set dst (AddVF src (LoadVector mem))); 6611 format %{ "vaddps $dst,$src,$mem\t! add packed4F" %} 6612 ins_encode %{ 6613 int vector_len = 0; 6614 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6615 %} 6616 ins_pipe( pipe_slow ); 6617 %} 6618 6619 instruct vadd8F_reg(vecY dst, vecY src1, vecY src2) %{ 6620 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6621 match(Set dst (AddVF src1 src2)); 6622 format %{ "vaddps $dst,$src1,$src2\t! add packed8F" %} 6623 ins_encode %{ 6624 int vector_len = 1; 6625 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6626 %} 6627 ins_pipe( pipe_slow ); 6628 %} 6629 6630 instruct vadd8F_mem(vecY dst, vecY src, memory mem) %{ 6631 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6632 match(Set dst (AddVF src (LoadVector mem))); 6633 format %{ "vaddps $dst,$src,$mem\t! add packed8F" %} 6634 ins_encode %{ 6635 int vector_len = 1; 6636 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6637 %} 6638 ins_pipe( pipe_slow ); 6639 %} 6640 6641 instruct vadd16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6642 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6643 match(Set dst (AddVF src1 src2)); 6644 format %{ "vaddps $dst,$src1,$src2\t! add packed16F" %} 6645 ins_encode %{ 6646 int vector_len = 2; 6647 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6648 %} 6649 ins_pipe( pipe_slow ); 6650 %} 6651 6652 instruct vadd16F_mem(vecZ dst, vecZ src, memory mem) %{ 6653 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6654 match(Set dst (AddVF src (LoadVector mem))); 6655 format %{ "vaddps $dst,$src,$mem\t! add packed16F" %} 6656 ins_encode %{ 6657 int vector_len = 2; 6658 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6659 %} 6660 ins_pipe( pipe_slow ); 6661 %} 6662 6663 // Doubles vector add 6664 instruct vadd2D(vecX dst, vecX src) %{ 6665 predicate(n->as_Vector()->length() == 2); 6666 match(Set dst (AddVD dst src)); 6667 format %{ "addpd $dst,$src\t! add packed2D" %} 6668 ins_encode %{ 6669 __ addpd($dst$$XMMRegister, $src$$XMMRegister); 6670 %} 6671 ins_pipe( pipe_slow ); 6672 %} 6673 6674 instruct vadd2D_reg(vecX dst, vecX src1, vecX src2) %{ 6675 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6676 match(Set dst (AddVD src1 src2)); 6677 format %{ "vaddpd $dst,$src1,$src2\t! add packed2D" %} 6678 ins_encode %{ 6679 int vector_len = 0; 6680 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6681 %} 6682 ins_pipe( pipe_slow ); 6683 %} 6684 6685 instruct vadd2D_mem(vecX dst, vecX src, memory mem) %{ 6686 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6687 match(Set dst (AddVD src (LoadVector mem))); 6688 format %{ "vaddpd $dst,$src,$mem\t! add packed2D" %} 6689 ins_encode %{ 6690 int vector_len = 0; 6691 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6692 %} 6693 ins_pipe( pipe_slow ); 6694 %} 6695 6696 instruct vadd4D_reg(vecY dst, vecY src1, vecY src2) %{ 6697 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6698 match(Set dst (AddVD src1 src2)); 6699 format %{ "vaddpd $dst,$src1,$src2\t! add packed4D" %} 6700 ins_encode %{ 6701 int vector_len = 1; 6702 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6703 %} 6704 ins_pipe( pipe_slow ); 6705 %} 6706 6707 instruct vadd4D_mem(vecY dst, vecY src, memory mem) %{ 6708 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6709 match(Set dst (AddVD src (LoadVector mem))); 6710 format %{ "vaddpd $dst,$src,$mem\t! add packed4D" %} 6711 ins_encode %{ 6712 int vector_len = 1; 6713 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6714 %} 6715 ins_pipe( pipe_slow ); 6716 %} 6717 6718 instruct vadd8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6719 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6720 match(Set dst (AddVD src1 src2)); 6721 format %{ "vaddpd $dst,$src1,$src2\t! add packed8D" %} 6722 ins_encode %{ 6723 int vector_len = 2; 6724 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6725 %} 6726 ins_pipe( pipe_slow ); 6727 %} 6728 6729 instruct vadd8D_mem(vecZ dst, vecZ src, memory mem) %{ 6730 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6731 match(Set dst (AddVD src (LoadVector mem))); 6732 format %{ "vaddpd $dst,$src,$mem\t! add packed8D" %} 6733 ins_encode %{ 6734 int vector_len = 2; 6735 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6736 %} 6737 ins_pipe( pipe_slow ); 6738 %} 6739 6740 // --------------------------------- SUB -------------------------------------- 6741 6742 // Bytes vector sub 6743 instruct vsub4B(vecS dst, vecS src) %{ 6744 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 6745 match(Set dst (SubVB dst src)); 6746 format %{ "psubb $dst,$src\t! sub packed4B" %} 6747 ins_encode %{ 6748 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 6749 %} 6750 ins_pipe( pipe_slow ); 6751 %} 6752 6753 instruct vsub4B_reg_avx(vecS dst, vecS src1, vecS src2) %{ 6754 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 6755 match(Set dst (SubVB src1 src2)); 6756 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} 6757 ins_encode %{ 6758 int vector_len = 0; 6759 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6760 %} 6761 ins_pipe( pipe_slow ); 6762 %} 6763 6764 instruct vsub4B_reg_evex(vecS dst, vecS src1, vecS src2) %{ 6765 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 6766 match(Set dst (SubVB src1 src2)); 6767 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} 6768 ins_encode %{ 6769 int vector_len = 0; 6770 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6771 %} 6772 ins_pipe( pipe_slow ); 6773 %} 6774 6775 instruct vsub4B_reg_exex_special(vecS dst, vecS src1, vecS src2) %{ 6776 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 6777 match(Set dst (SubVB dst src2)); 6778 effect(TEMP src1); 6779 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} 6780 ins_encode %{ 6781 int vector_len = 0; 6782 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6783 %} 6784 ins_pipe( pipe_slow ); 6785 %} 6786 6787 instruct vsub4B_mem_avx(vecS dst, vecS src, memory mem) %{ 6788 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 6789 match(Set dst (SubVB src (LoadVector mem))); 6790 format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %} 6791 ins_encode %{ 6792 int vector_len = 0; 6793 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6794 %} 6795 ins_pipe( pipe_slow ); 6796 %} 6797 6798 instruct vsub4B_mem_evex(vecS dst, vecS src, memory mem) %{ 6799 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 6800 match(Set dst (SubVB src (LoadVector mem))); 6801 format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %} 6802 ins_encode %{ 6803 int vector_len = 0; 6804 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6805 %} 6806 ins_pipe( pipe_slow ); 6807 %} 6808 6809 instruct vsub4B_mem_evex_special(vecS dst, vecS src, memory mem) %{ 6810 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 6811 match(Set dst (SubVB dst (LoadVector mem))); 6812 effect(TEMP src); 6813 format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %} 6814 ins_encode %{ 6815 int vector_len = 0; 6816 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6817 %} 6818 ins_pipe( pipe_slow ); 6819 %} 6820 6821 instruct vsub8B(vecD dst, vecD src) %{ 6822 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 6823 match(Set dst (SubVB dst src)); 6824 format %{ "psubb $dst,$src\t! sub packed8B" %} 6825 ins_encode %{ 6826 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 6827 %} 6828 ins_pipe( pipe_slow ); 6829 %} 6830 6831 instruct vsub8B_reg_avx(vecD dst, vecD src1, vecD src2) %{ 6832 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 6833 match(Set dst (SubVB src1 src2)); 6834 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} 6835 ins_encode %{ 6836 int vector_len = 0; 6837 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6838 %} 6839 ins_pipe( pipe_slow ); 6840 %} 6841 6842 instruct vsub8B_reg_evex(vecD dst, vecD src1, vecD src2) %{ 6843 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 6844 match(Set dst (SubVB src1 src2)); 6845 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} 6846 ins_encode %{ 6847 int vector_len = 0; 6848 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6849 %} 6850 ins_pipe( pipe_slow ); 6851 %} 6852 6853 instruct vsub8B_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 6854 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 6855 match(Set dst (SubVB dst src2)); 6856 effect(TEMP src1); 6857 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} 6858 ins_encode %{ 6859 int vector_len = 0; 6860 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6861 %} 6862 ins_pipe( pipe_slow ); 6863 %} 6864 6865 instruct vsub8B_mem_avx(vecD dst, vecD src, memory mem) %{ 6866 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 6867 match(Set dst (SubVB src (LoadVector mem))); 6868 format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %} 6869 ins_encode %{ 6870 int vector_len = 0; 6871 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6872 %} 6873 ins_pipe( pipe_slow ); 6874 %} 6875 6876 instruct vsub8B_mem_evex(vecD dst, vecD src, memory mem) %{ 6877 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 6878 match(Set dst (SubVB src (LoadVector mem))); 6879 format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %} 6880 ins_encode %{ 6881 int vector_len = 0; 6882 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6883 %} 6884 ins_pipe( pipe_slow ); 6885 %} 6886 6887 instruct vsub8B_mem_evex_special(vecD dst, vecD src, memory mem) %{ 6888 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 6889 match(Set dst (SubVB dst (LoadVector mem))); 6890 effect(TEMP src); 6891 format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %} 6892 ins_encode %{ 6893 int vector_len = 0; 6894 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6895 %} 6896 ins_pipe( pipe_slow ); 6897 %} 6898 6899 instruct vsub16B(vecX dst, vecX src) %{ 6900 predicate(UseAVX == 0 && n->as_Vector()->length() == 16); 6901 match(Set dst (SubVB dst src)); 6902 format %{ "psubb $dst,$src\t! sub packed16B" %} 6903 ins_encode %{ 6904 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 6905 %} 6906 ins_pipe( pipe_slow ); 6907 %} 6908 6909 instruct vsub16B_reg_avx(vecX dst, vecX src1, vecX src2) %{ 6910 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); 6911 match(Set dst (SubVB src1 src2)); 6912 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} 6913 ins_encode %{ 6914 int vector_len = 0; 6915 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6916 %} 6917 ins_pipe( pipe_slow ); 6918 %} 6919 6920 instruct vsub16B_reg_evex(vecX dst, vecX src1, vecX src2) %{ 6921 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 6922 match(Set dst (SubVB src1 src2)); 6923 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} 6924 ins_encode %{ 6925 int vector_len = 0; 6926 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6927 %} 6928 ins_pipe( pipe_slow ); 6929 %} 6930 6931 instruct vsub16B_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 6932 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 6933 match(Set dst (SubVB dst src2)); 6934 effect(TEMP src1); 6935 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} 6936 ins_encode %{ 6937 int vector_len = 0; 6938 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6939 %} 6940 ins_pipe( pipe_slow ); 6941 %} 6942 6943 instruct vsub16B_mem_avx(vecX dst, vecX src, memory mem) %{ 6944 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); 6945 match(Set dst (SubVB src (LoadVector mem))); 6946 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} 6947 ins_encode %{ 6948 int vector_len = 0; 6949 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6950 %} 6951 ins_pipe( pipe_slow ); 6952 %} 6953 6954 instruct vsub16B_mem_evex(vecX dst, vecX src, memory mem) %{ 6955 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 6956 match(Set dst (SubVB src (LoadVector mem))); 6957 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} 6958 ins_encode %{ 6959 int vector_len = 0; 6960 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6961 %} 6962 ins_pipe( pipe_slow ); 6963 %} 6964 6965 instruct vsub16B_mem_evex_special(vecX dst, vecX src, memory mem) %{ 6966 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 6967 match(Set dst (SubVB dst (LoadVector mem))); 6968 effect(TEMP src); 6969 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} 6970 ins_encode %{ 6971 int vector_len = 0; 6972 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6973 %} 6974 ins_pipe( pipe_slow ); 6975 %} 6976 6977 instruct vsub32B_reg_avx(vecY dst, vecY src1, vecY src2) %{ 6978 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32); 6979 match(Set dst (SubVB src1 src2)); 6980 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} 6981 ins_encode %{ 6982 int vector_len = 1; 6983 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6984 %} 6985 ins_pipe( pipe_slow ); 6986 %} 6987 6988 instruct vsub32B_reg_evex(vecY dst, vecY src1, vecY src2) %{ 6989 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6990 match(Set dst (SubVB src1 src2)); 6991 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} 6992 ins_encode %{ 6993 int vector_len = 1; 6994 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6995 %} 6996 ins_pipe( pipe_slow ); 6997 %} 6998 6999 instruct vsub32B_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 7000 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32); 7001 match(Set dst (SubVB dst src2)); 7002 effect(TEMP src1); 7003 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} 7004 ins_encode %{ 7005 int vector_len = 1; 7006 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7007 %} 7008 ins_pipe( pipe_slow ); 7009 %} 7010 7011 instruct vsub32B_mem_avx(vecY dst, vecY src, memory mem) %{ 7012 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32); 7013 match(Set dst (SubVB src (LoadVector mem))); 7014 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} 7015 ins_encode %{ 7016 int vector_len = 1; 7017 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7018 %} 7019 ins_pipe( pipe_slow ); 7020 %} 7021 7022 instruct vsub32B_mem_evex(vecY dst, vecY src, memory mem) %{ 7023 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 7024 match(Set dst (SubVB src (LoadVector mem))); 7025 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} 7026 ins_encode %{ 7027 int vector_len = 1; 7028 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7029 %} 7030 ins_pipe( pipe_slow ); 7031 %} 7032 7033 instruct vsub32B_mem_evex_special(vecY dst, vecY src, memory mem) %{ 7034 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32); 7035 match(Set dst (SubVB dst (LoadVector mem))); 7036 effect(TEMP src); 7037 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} 7038 ins_encode %{ 7039 int vector_len = 1; 7040 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7041 %} 7042 ins_pipe( pipe_slow ); 7043 %} 7044 7045 instruct vsub64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7046 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 7047 match(Set dst (SubVB src1 src2)); 7048 format %{ "vpsubb $dst,$src1,$src2\t! sub packed64B" %} 7049 ins_encode %{ 7050 int vector_len = 2; 7051 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7052 %} 7053 ins_pipe( pipe_slow ); 7054 %} 7055 7056 instruct vsub64B_mem(vecZ dst, vecZ src, memory mem) %{ 7057 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 7058 match(Set dst (SubVB src (LoadVector mem))); 7059 format %{ "vpsubb $dst,$src,$mem\t! sub packed64B" %} 7060 ins_encode %{ 7061 int vector_len = 2; 7062 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7063 %} 7064 ins_pipe( pipe_slow ); 7065 %} 7066 7067 // Shorts/Chars vector sub 7068 instruct vsub2S(vecS dst, vecS src) %{ 7069 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 7070 match(Set dst (SubVS dst src)); 7071 format %{ "psubw $dst,$src\t! sub packed2S" %} 7072 ins_encode %{ 7073 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 7074 %} 7075 ins_pipe( pipe_slow ); 7076 %} 7077 7078 instruct vsub2S_reg_avx(vecS dst, vecS src1, vecS src2) %{ 7079 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 7080 match(Set dst (SubVS src1 src2)); 7081 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} 7082 ins_encode %{ 7083 int vector_len = 0; 7084 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7085 %} 7086 ins_pipe( pipe_slow ); 7087 %} 7088 7089 instruct vsub2S_reg_evex(vecS dst, vecS src1, vecS src2) %{ 7090 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 7091 match(Set dst (SubVS src1 src2)); 7092 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} 7093 ins_encode %{ 7094 int vector_len = 0; 7095 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7096 %} 7097 ins_pipe( pipe_slow ); 7098 %} 7099 7100 instruct vsub2S_reg_evex_special(vecS dst, vecS src1, vecS src2) %{ 7101 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 7102 match(Set dst (SubVS dst src2)); 7103 effect(TEMP src1); 7104 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} 7105 ins_encode %{ 7106 int vector_len = 0; 7107 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7108 %} 7109 ins_pipe( pipe_slow ); 7110 %} 7111 7112 instruct vsub2S_mem_avx(vecS dst, vecS src, memory mem) %{ 7113 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 7114 match(Set dst (SubVS src (LoadVector mem))); 7115 format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %} 7116 ins_encode %{ 7117 int vector_len = 0; 7118 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7119 %} 7120 ins_pipe( pipe_slow ); 7121 %} 7122 7123 instruct vsub2S_mem_evex(vecS dst, vecS src, memory mem) %{ 7124 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 7125 match(Set dst (SubVS src (LoadVector mem))); 7126 format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %} 7127 ins_encode %{ 7128 int vector_len = 0; 7129 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7130 %} 7131 ins_pipe( pipe_slow ); 7132 %} 7133 7134 instruct vsub2S_mem_evex_special(vecS dst, vecS src, memory mem) %{ 7135 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 7136 match(Set dst (SubVS dst (LoadVector mem))); 7137 effect(TEMP src); 7138 format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %} 7139 ins_encode %{ 7140 int vector_len = 0; 7141 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7142 %} 7143 ins_pipe( pipe_slow ); 7144 %} 7145 7146 instruct vsub4S(vecD dst, vecD src) %{ 7147 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 7148 match(Set dst (SubVS dst src)); 7149 format %{ "psubw $dst,$src\t! sub packed4S" %} 7150 ins_encode %{ 7151 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 7152 %} 7153 ins_pipe( pipe_slow ); 7154 %} 7155 7156 instruct vsub4S_reg_avx(vecD dst, vecD src1, vecD src2) %{ 7157 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 7158 match(Set dst (SubVS src1 src2)); 7159 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} 7160 ins_encode %{ 7161 int vector_len = 0; 7162 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7163 %} 7164 ins_pipe( pipe_slow ); 7165 %} 7166 7167 instruct vsub4S_reg_evex(vecD dst, vecD src1, vecD src2) %{ 7168 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 7169 match(Set dst (SubVS src1 src2)); 7170 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} 7171 ins_encode %{ 7172 int vector_len = 0; 7173 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7174 %} 7175 ins_pipe( pipe_slow ); 7176 %} 7177 7178 instruct vsub4S_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 7179 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 7180 match(Set dst (SubVS dst src2)); 7181 effect(TEMP src1); 7182 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} 7183 ins_encode %{ 7184 int vector_len = 0; 7185 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7186 %} 7187 ins_pipe( pipe_slow ); 7188 %} 7189 7190 instruct vsub4S_mem_avx(vecD dst, vecD src, memory mem) %{ 7191 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 7192 match(Set dst (SubVS src (LoadVector mem))); 7193 format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %} 7194 ins_encode %{ 7195 int vector_len = 0; 7196 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7197 %} 7198 ins_pipe( pipe_slow ); 7199 %} 7200 7201 instruct vsub4S_mem_evex(vecD dst, vecD src, memory mem) %{ 7202 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 7203 match(Set dst (SubVS src (LoadVector mem))); 7204 format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %} 7205 ins_encode %{ 7206 int vector_len = 0; 7207 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7208 %} 7209 ins_pipe( pipe_slow ); 7210 %} 7211 7212 instruct vsub4S_mem_evex_special(vecD dst, vecD src, memory mem) %{ 7213 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 7214 match(Set dst (SubVS dst (LoadVector mem))); 7215 effect(TEMP src); 7216 format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %} 7217 ins_encode %{ 7218 int vector_len = 0; 7219 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7220 %} 7221 ins_pipe( pipe_slow ); 7222 %} 7223 7224 instruct vsub8S(vecX dst, vecX src) %{ 7225 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 7226 match(Set dst (SubVS dst src)); 7227 format %{ "psubw $dst,$src\t! sub packed8S" %} 7228 ins_encode %{ 7229 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 7230 %} 7231 ins_pipe( pipe_slow ); 7232 %} 7233 7234 instruct vsub8S_reg_avx(vecX dst, vecX src1, vecX src2) %{ 7235 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 7236 match(Set dst (SubVS src1 src2)); 7237 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} 7238 ins_encode %{ 7239 int vector_len = 0; 7240 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7241 %} 7242 ins_pipe( pipe_slow ); 7243 %} 7244 7245 instruct vsub8S_reg_evex(vecX dst, vecX src1, vecX src2) %{ 7246 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 7247 match(Set dst (SubVS src1 src2)); 7248 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} 7249 ins_encode %{ 7250 int vector_len = 0; 7251 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7252 %} 7253 ins_pipe( pipe_slow ); 7254 %} 7255 7256 instruct vsub8S_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 7257 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 7258 match(Set dst (SubVS dst src2)); 7259 effect(TEMP src1); 7260 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} 7261 ins_encode %{ 7262 int vector_len = 0; 7263 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7264 %} 7265 ins_pipe( pipe_slow ); 7266 %} 7267 7268 instruct vsub8S_mem_avx(vecX dst, vecX src, memory mem) %{ 7269 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 7270 match(Set dst (SubVS src (LoadVector mem))); 7271 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} 7272 ins_encode %{ 7273 int vector_len = 0; 7274 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7275 %} 7276 ins_pipe( pipe_slow ); 7277 %} 7278 7279 instruct vsub8S_mem_evex(vecX dst, vecX src, memory mem) %{ 7280 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 7281 match(Set dst (SubVS src (LoadVector mem))); 7282 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} 7283 ins_encode %{ 7284 int vector_len = 0; 7285 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7286 %} 7287 ins_pipe( pipe_slow ); 7288 %} 7289 7290 instruct vsub8S_mem_evex_special(vecX dst, vecX src, memory mem) %{ 7291 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 7292 match(Set dst (SubVS dst (LoadVector mem))); 7293 effect(TEMP src); 7294 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} 7295 ins_encode %{ 7296 int vector_len = 0; 7297 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7298 %} 7299 ins_pipe( pipe_slow ); 7300 %} 7301 7302 instruct vsub16S_reg_avx(vecY dst, vecY src1, vecY src2) %{ 7303 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 7304 match(Set dst (SubVS src1 src2)); 7305 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} 7306 ins_encode %{ 7307 int vector_len = 1; 7308 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7309 %} 7310 ins_pipe( pipe_slow ); 7311 %} 7312 7313 instruct vsub16S_reg_evex(vecY dst, vecY src1, vecY src2) %{ 7314 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 7315 match(Set dst (SubVS src1 src2)); 7316 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} 7317 ins_encode %{ 7318 int vector_len = 1; 7319 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7320 %} 7321 ins_pipe( pipe_slow ); 7322 %} 7323 7324 instruct vsub16S_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 7325 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 7326 match(Set dst (SubVS dst src2)); 7327 effect(TEMP src1); 7328 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} 7329 ins_encode %{ 7330 int vector_len = 1; 7331 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7332 %} 7333 ins_pipe( pipe_slow ); 7334 %} 7335 7336 instruct vsub16S_mem_avx(vecY dst, vecY src, memory mem) %{ 7337 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 7338 match(Set dst (SubVS src (LoadVector mem))); 7339 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} 7340 ins_encode %{ 7341 int vector_len = 1; 7342 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7343 %} 7344 ins_pipe( pipe_slow ); 7345 %} 7346 7347 instruct vsub16S_mem_evex(vecY dst, vecY src, memory mem) %{ 7348 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 7349 match(Set dst (SubVS src (LoadVector mem))); 7350 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} 7351 ins_encode %{ 7352 int vector_len = 1; 7353 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7354 %} 7355 ins_pipe( pipe_slow ); 7356 %} 7357 7358 instruct vsub16S_mem_evex_special(vecY dst, vecY src, memory mem) %{ 7359 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 7360 match(Set dst (SubVS dst (LoadVector mem))); 7361 effect(TEMP src); 7362 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} 7363 ins_encode %{ 7364 int vector_len = 1; 7365 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7366 %} 7367 ins_pipe( pipe_slow ); 7368 %} 7369 7370 instruct vsub32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7371 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 7372 match(Set dst (SubVS src1 src2)); 7373 format %{ "vpsubw $dst,$src1,$src2\t! sub packed32S" %} 7374 ins_encode %{ 7375 int vector_len = 2; 7376 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7377 %} 7378 ins_pipe( pipe_slow ); 7379 %} 7380 7381 instruct vsub32S_mem(vecZ dst, vecZ src, memory mem) %{ 7382 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 7383 match(Set dst (SubVS src (LoadVector mem))); 7384 format %{ "vpsubw $dst,$src,$mem\t! sub packed32S" %} 7385 ins_encode %{ 7386 int vector_len = 2; 7387 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7388 %} 7389 ins_pipe( pipe_slow ); 7390 %} 7391 7392 // Integers vector sub 7393 instruct vsub2I(vecD dst, vecD src) %{ 7394 predicate(n->as_Vector()->length() == 2); 7395 match(Set dst (SubVI dst src)); 7396 format %{ "psubd $dst,$src\t! sub packed2I" %} 7397 ins_encode %{ 7398 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 7399 %} 7400 ins_pipe( pipe_slow ); 7401 %} 7402 7403 instruct vsub2I_reg(vecD dst, vecD src1, vecD src2) %{ 7404 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7405 match(Set dst (SubVI src1 src2)); 7406 format %{ "vpsubd $dst,$src1,$src2\t! sub packed2I" %} 7407 ins_encode %{ 7408 int vector_len = 0; 7409 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7410 %} 7411 ins_pipe( pipe_slow ); 7412 %} 7413 7414 instruct vsub2I_mem(vecD dst, vecD src, memory mem) %{ 7415 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7416 match(Set dst (SubVI src (LoadVector mem))); 7417 format %{ "vpsubd $dst,$src,$mem\t! sub packed2I" %} 7418 ins_encode %{ 7419 int vector_len = 0; 7420 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7421 %} 7422 ins_pipe( pipe_slow ); 7423 %} 7424 7425 instruct vsub4I(vecX dst, vecX src) %{ 7426 predicate(n->as_Vector()->length() == 4); 7427 match(Set dst (SubVI dst src)); 7428 format %{ "psubd $dst,$src\t! sub packed4I" %} 7429 ins_encode %{ 7430 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 7431 %} 7432 ins_pipe( pipe_slow ); 7433 %} 7434 7435 instruct vsub4I_reg(vecX dst, vecX src1, vecX src2) %{ 7436 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7437 match(Set dst (SubVI src1 src2)); 7438 format %{ "vpsubd $dst,$src1,$src2\t! sub packed4I" %} 7439 ins_encode %{ 7440 int vector_len = 0; 7441 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7442 %} 7443 ins_pipe( pipe_slow ); 7444 %} 7445 7446 instruct vsub4I_mem(vecX dst, vecX src, memory mem) %{ 7447 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7448 match(Set dst (SubVI src (LoadVector mem))); 7449 format %{ "vpsubd $dst,$src,$mem\t! sub packed4I" %} 7450 ins_encode %{ 7451 int vector_len = 0; 7452 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7453 %} 7454 ins_pipe( pipe_slow ); 7455 %} 7456 7457 instruct vsub8I_reg(vecY dst, vecY src1, vecY src2) %{ 7458 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 7459 match(Set dst (SubVI src1 src2)); 7460 format %{ "vpsubd $dst,$src1,$src2\t! sub packed8I" %} 7461 ins_encode %{ 7462 int vector_len = 1; 7463 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7464 %} 7465 ins_pipe( pipe_slow ); 7466 %} 7467 7468 instruct vsub8I_mem(vecY dst, vecY src, memory mem) %{ 7469 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 7470 match(Set dst (SubVI src (LoadVector mem))); 7471 format %{ "vpsubd $dst,$src,$mem\t! sub packed8I" %} 7472 ins_encode %{ 7473 int vector_len = 1; 7474 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7475 %} 7476 ins_pipe( pipe_slow ); 7477 %} 7478 7479 instruct vsub16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7480 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7481 match(Set dst (SubVI src1 src2)); 7482 format %{ "vpsubd $dst,$src1,$src2\t! sub packed16I" %} 7483 ins_encode %{ 7484 int vector_len = 2; 7485 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7486 %} 7487 ins_pipe( pipe_slow ); 7488 %} 7489 7490 instruct vsub16I_mem(vecZ dst, vecZ src, memory mem) %{ 7491 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7492 match(Set dst (SubVI src (LoadVector mem))); 7493 format %{ "vpsubd $dst,$src,$mem\t! sub packed16I" %} 7494 ins_encode %{ 7495 int vector_len = 2; 7496 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7497 %} 7498 ins_pipe( pipe_slow ); 7499 %} 7500 7501 // Longs vector sub 7502 instruct vsub2L(vecX dst, vecX src) %{ 7503 predicate(n->as_Vector()->length() == 2); 7504 match(Set dst (SubVL dst src)); 7505 format %{ "psubq $dst,$src\t! sub packed2L" %} 7506 ins_encode %{ 7507 __ psubq($dst$$XMMRegister, $src$$XMMRegister); 7508 %} 7509 ins_pipe( pipe_slow ); 7510 %} 7511 7512 instruct vsub2L_reg(vecX dst, vecX src1, vecX src2) %{ 7513 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7514 match(Set dst (SubVL src1 src2)); 7515 format %{ "vpsubq $dst,$src1,$src2\t! sub packed2L" %} 7516 ins_encode %{ 7517 int vector_len = 0; 7518 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7519 %} 7520 ins_pipe( pipe_slow ); 7521 %} 7522 7523 instruct vsub2L_mem(vecX dst, vecX src, memory mem) %{ 7524 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7525 match(Set dst (SubVL src (LoadVector mem))); 7526 format %{ "vpsubq $dst,$src,$mem\t! sub packed2L" %} 7527 ins_encode %{ 7528 int vector_len = 0; 7529 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7530 %} 7531 ins_pipe( pipe_slow ); 7532 %} 7533 7534 instruct vsub4L_reg(vecY dst, vecY src1, vecY src2) %{ 7535 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 7536 match(Set dst (SubVL src1 src2)); 7537 format %{ "vpsubq $dst,$src1,$src2\t! sub packed4L" %} 7538 ins_encode %{ 7539 int vector_len = 1; 7540 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7541 %} 7542 ins_pipe( pipe_slow ); 7543 %} 7544 7545 instruct vsub4L_mem(vecY dst, vecY src, memory mem) %{ 7546 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 7547 match(Set dst (SubVL src (LoadVector mem))); 7548 format %{ "vpsubq $dst,$src,$mem\t! sub packed4L" %} 7549 ins_encode %{ 7550 int vector_len = 1; 7551 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7552 %} 7553 ins_pipe( pipe_slow ); 7554 %} 7555 7556 instruct vsub8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7557 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7558 match(Set dst (SubVL src1 src2)); 7559 format %{ "vpsubq $dst,$src1,$src2\t! sub packed8L" %} 7560 ins_encode %{ 7561 int vector_len = 2; 7562 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7563 %} 7564 ins_pipe( pipe_slow ); 7565 %} 7566 7567 instruct vsub8L_mem(vecZ dst, vecZ src, memory mem) %{ 7568 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7569 match(Set dst (SubVL src (LoadVector mem))); 7570 format %{ "vpsubq $dst,$src,$mem\t! sub packed8L" %} 7571 ins_encode %{ 7572 int vector_len = 2; 7573 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7574 %} 7575 ins_pipe( pipe_slow ); 7576 %} 7577 7578 // Floats vector sub 7579 instruct vsub2F(vecD dst, vecD src) %{ 7580 predicate(n->as_Vector()->length() == 2); 7581 match(Set dst (SubVF dst src)); 7582 format %{ "subps $dst,$src\t! sub packed2F" %} 7583 ins_encode %{ 7584 __ subps($dst$$XMMRegister, $src$$XMMRegister); 7585 %} 7586 ins_pipe( pipe_slow ); 7587 %} 7588 7589 instruct vsub2F_reg(vecD dst, vecD src1, vecD src2) %{ 7590 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7591 match(Set dst (SubVF src1 src2)); 7592 format %{ "vsubps $dst,$src1,$src2\t! sub packed2F" %} 7593 ins_encode %{ 7594 int vector_len = 0; 7595 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7596 %} 7597 ins_pipe( pipe_slow ); 7598 %} 7599 7600 instruct vsub2F_mem(vecD dst, vecD src, memory mem) %{ 7601 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7602 match(Set dst (SubVF src (LoadVector mem))); 7603 format %{ "vsubps $dst,$src,$mem\t! sub packed2F" %} 7604 ins_encode %{ 7605 int vector_len = 0; 7606 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7607 %} 7608 ins_pipe( pipe_slow ); 7609 %} 7610 7611 instruct vsub4F(vecX dst, vecX src) %{ 7612 predicate(n->as_Vector()->length() == 4); 7613 match(Set dst (SubVF dst src)); 7614 format %{ "subps $dst,$src\t! sub packed4F" %} 7615 ins_encode %{ 7616 __ subps($dst$$XMMRegister, $src$$XMMRegister); 7617 %} 7618 ins_pipe( pipe_slow ); 7619 %} 7620 7621 instruct vsub4F_reg(vecX dst, vecX src1, vecX src2) %{ 7622 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7623 match(Set dst (SubVF src1 src2)); 7624 format %{ "vsubps $dst,$src1,$src2\t! sub packed4F" %} 7625 ins_encode %{ 7626 int vector_len = 0; 7627 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7628 %} 7629 ins_pipe( pipe_slow ); 7630 %} 7631 7632 instruct vsub4F_mem(vecX dst, vecX src, memory mem) %{ 7633 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7634 match(Set dst (SubVF src (LoadVector mem))); 7635 format %{ "vsubps $dst,$src,$mem\t! sub packed4F" %} 7636 ins_encode %{ 7637 int vector_len = 0; 7638 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7639 %} 7640 ins_pipe( pipe_slow ); 7641 %} 7642 7643 instruct vsub8F_reg(vecY dst, vecY src1, vecY src2) %{ 7644 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7645 match(Set dst (SubVF src1 src2)); 7646 format %{ "vsubps $dst,$src1,$src2\t! sub packed8F" %} 7647 ins_encode %{ 7648 int vector_len = 1; 7649 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7650 %} 7651 ins_pipe( pipe_slow ); 7652 %} 7653 7654 instruct vsub8F_mem(vecY dst, vecY src, memory mem) %{ 7655 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7656 match(Set dst (SubVF src (LoadVector mem))); 7657 format %{ "vsubps $dst,$src,$mem\t! sub packed8F" %} 7658 ins_encode %{ 7659 int vector_len = 1; 7660 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7661 %} 7662 ins_pipe( pipe_slow ); 7663 %} 7664 7665 instruct vsub16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7666 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7667 match(Set dst (SubVF src1 src2)); 7668 format %{ "vsubps $dst,$src1,$src2\t! sub packed16F" %} 7669 ins_encode %{ 7670 int vector_len = 2; 7671 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7672 %} 7673 ins_pipe( pipe_slow ); 7674 %} 7675 7676 instruct vsub16F_mem(vecZ dst, vecZ src, memory mem) %{ 7677 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7678 match(Set dst (SubVF src (LoadVector mem))); 7679 format %{ "vsubps $dst,$src,$mem\t! sub packed16F" %} 7680 ins_encode %{ 7681 int vector_len = 2; 7682 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7683 %} 7684 ins_pipe( pipe_slow ); 7685 %} 7686 7687 // Doubles vector sub 7688 instruct vsub2D(vecX dst, vecX src) %{ 7689 predicate(n->as_Vector()->length() == 2); 7690 match(Set dst (SubVD dst src)); 7691 format %{ "subpd $dst,$src\t! sub packed2D" %} 7692 ins_encode %{ 7693 __ subpd($dst$$XMMRegister, $src$$XMMRegister); 7694 %} 7695 ins_pipe( pipe_slow ); 7696 %} 7697 7698 instruct vsub2D_reg(vecX dst, vecX src1, vecX src2) %{ 7699 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7700 match(Set dst (SubVD src1 src2)); 7701 format %{ "vsubpd $dst,$src1,$src2\t! sub packed2D" %} 7702 ins_encode %{ 7703 int vector_len = 0; 7704 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7705 %} 7706 ins_pipe( pipe_slow ); 7707 %} 7708 7709 instruct vsub2D_mem(vecX dst, vecX src, memory mem) %{ 7710 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7711 match(Set dst (SubVD src (LoadVector mem))); 7712 format %{ "vsubpd $dst,$src,$mem\t! sub packed2D" %} 7713 ins_encode %{ 7714 int vector_len = 0; 7715 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7716 %} 7717 ins_pipe( pipe_slow ); 7718 %} 7719 7720 instruct vsub4D_reg(vecY dst, vecY src1, vecY src2) %{ 7721 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7722 match(Set dst (SubVD src1 src2)); 7723 format %{ "vsubpd $dst,$src1,$src2\t! sub packed4D" %} 7724 ins_encode %{ 7725 int vector_len = 1; 7726 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7727 %} 7728 ins_pipe( pipe_slow ); 7729 %} 7730 7731 instruct vsub4D_mem(vecY dst, vecY src, memory mem) %{ 7732 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7733 match(Set dst (SubVD src (LoadVector mem))); 7734 format %{ "vsubpd $dst,$src,$mem\t! sub packed4D" %} 7735 ins_encode %{ 7736 int vector_len = 1; 7737 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7738 %} 7739 ins_pipe( pipe_slow ); 7740 %} 7741 7742 instruct vsub8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7743 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7744 match(Set dst (SubVD src1 src2)); 7745 format %{ "vsubpd $dst,$src1,$src2\t! sub packed8D" %} 7746 ins_encode %{ 7747 int vector_len = 2; 7748 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7749 %} 7750 ins_pipe( pipe_slow ); 7751 %} 7752 7753 instruct vsub8D_mem(vecZ dst, vecZ src, memory mem) %{ 7754 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7755 match(Set dst (SubVD src (LoadVector mem))); 7756 format %{ "vsubpd $dst,$src,$mem\t! sub packed8D" %} 7757 ins_encode %{ 7758 int vector_len = 2; 7759 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7760 %} 7761 ins_pipe( pipe_slow ); 7762 %} 7763 7764 // --------------------------------- MUL -------------------------------------- 7765 7766 // Shorts/Chars vector mul 7767 instruct vmul2S(vecS dst, vecS src) %{ 7768 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 7769 match(Set dst (MulVS dst src)); 7770 format %{ "pmullw $dst,$src\t! mul packed2S" %} 7771 ins_encode %{ 7772 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 7773 %} 7774 ins_pipe( pipe_slow ); 7775 %} 7776 7777 instruct vmul2S_reg_avx(vecS dst, vecS src1, vecS src2) %{ 7778 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 7779 match(Set dst (MulVS src1 src2)); 7780 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} 7781 ins_encode %{ 7782 int vector_len = 0; 7783 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7784 %} 7785 ins_pipe( pipe_slow ); 7786 %} 7787 7788 instruct vmul2S_reg_evex(vecS dst, vecS src1, vecS src2) %{ 7789 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 7790 match(Set dst (MulVS src1 src2)); 7791 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} 7792 ins_encode %{ 7793 int vector_len = 0; 7794 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7795 %} 7796 ins_pipe( pipe_slow ); 7797 %} 7798 7799 instruct vmul2S_evex_special(vecS dst, vecS src1, vecS src2) %{ 7800 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 7801 match(Set dst (MulVS dst src2)); 7802 effect(TEMP src1); 7803 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} 7804 ins_encode %{ 7805 int vector_len = 0; 7806 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7807 %} 7808 ins_pipe( pipe_slow ); 7809 %} 7810 7811 instruct vmul2S_mem_avx(vecS dst, vecS src, memory mem) %{ 7812 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 7813 match(Set dst (MulVS src (LoadVector mem))); 7814 format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %} 7815 ins_encode %{ 7816 int vector_len = 0; 7817 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7818 %} 7819 ins_pipe( pipe_slow ); 7820 %} 7821 7822 instruct vmul2S_mem_evex(vecS dst, vecS src, memory mem) %{ 7823 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 7824 match(Set dst (MulVS src (LoadVector mem))); 7825 format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %} 7826 ins_encode %{ 7827 int vector_len = 0; 7828 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7829 %} 7830 ins_pipe( pipe_slow ); 7831 %} 7832 7833 instruct vmul2S_mem_evex_special(vecS dst, vecS src, memory mem) %{ 7834 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 7835 match(Set dst (MulVS dst (LoadVector mem))); 7836 effect(TEMP src); 7837 format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %} 7838 ins_encode %{ 7839 int vector_len = 0; 7840 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7841 %} 7842 ins_pipe( pipe_slow ); 7843 %} 7844 7845 instruct vmul4S(vecD dst, vecD src) %{ 7846 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 7847 match(Set dst (MulVS dst src)); 7848 format %{ "pmullw $dst,$src\t! mul packed4S" %} 7849 ins_encode %{ 7850 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 7851 %} 7852 ins_pipe( pipe_slow ); 7853 %} 7854 7855 instruct vmul4S_reg_avx(vecD dst, vecD src1, vecD src2) %{ 7856 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 7857 match(Set dst (MulVS src1 src2)); 7858 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} 7859 ins_encode %{ 7860 int vector_len = 0; 7861 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7862 %} 7863 ins_pipe( pipe_slow ); 7864 %} 7865 7866 instruct vmul4S_reg_evex(vecD dst, vecD src1, vecD src2) %{ 7867 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 7868 match(Set dst (MulVS src1 src2)); 7869 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} 7870 ins_encode %{ 7871 int vector_len = 0; 7872 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7873 %} 7874 ins_pipe( pipe_slow ); 7875 %} 7876 7877 instruct vmul4S_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 7878 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 7879 match(Set dst (MulVS dst src2)); 7880 effect(TEMP src1); 7881 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} 7882 ins_encode %{ 7883 int vector_len = 0; 7884 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7885 %} 7886 ins_pipe( pipe_slow ); 7887 %} 7888 7889 instruct vmul4S_mem_avx(vecD dst, vecD src, memory mem) %{ 7890 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 7891 match(Set dst (MulVS src (LoadVector mem))); 7892 format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %} 7893 ins_encode %{ 7894 int vector_len = 0; 7895 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7896 %} 7897 ins_pipe( pipe_slow ); 7898 %} 7899 7900 instruct vmul4S_mem_evex(vecD dst, vecD src, memory mem) %{ 7901 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 7902 match(Set dst (MulVS src (LoadVector mem))); 7903 format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %} 7904 ins_encode %{ 7905 int vector_len = 0; 7906 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7907 %} 7908 ins_pipe( pipe_slow ); 7909 %} 7910 7911 instruct vmul4S_mem_evex_special(vecD dst, vecD src, memory mem) %{ 7912 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 7913 match(Set dst (MulVS dst (LoadVector mem))); 7914 effect(TEMP src); 7915 format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %} 7916 ins_encode %{ 7917 int vector_len = 0; 7918 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7919 %} 7920 ins_pipe( pipe_slow ); 7921 %} 7922 7923 instruct vmul8S(vecX dst, vecX src) %{ 7924 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 7925 match(Set dst (MulVS dst src)); 7926 format %{ "pmullw $dst,$src\t! mul packed8S" %} 7927 ins_encode %{ 7928 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 7929 %} 7930 ins_pipe( pipe_slow ); 7931 %} 7932 7933 instruct vmul8S_reg_avx(vecX dst, vecX src1, vecX src2) %{ 7934 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 7935 match(Set dst (MulVS src1 src2)); 7936 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} 7937 ins_encode %{ 7938 int vector_len = 0; 7939 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7940 %} 7941 ins_pipe( pipe_slow ); 7942 %} 7943 7944 instruct vmul8S_reg_evex(vecX dst, vecX src1, vecX src2) %{ 7945 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 7946 match(Set dst (MulVS src1 src2)); 7947 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} 7948 ins_encode %{ 7949 int vector_len = 0; 7950 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7951 %} 7952 ins_pipe( pipe_slow ); 7953 %} 7954 7955 instruct vmul8S_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 7956 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 7957 match(Set dst (MulVS dst src2)); 7958 effect(TEMP src1); 7959 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} 7960 ins_encode %{ 7961 int vector_len = 0; 7962 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7963 %} 7964 ins_pipe( pipe_slow ); 7965 %} 7966 7967 instruct vmul8S_mem_avx(vecX dst, vecX src, memory mem) %{ 7968 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 7969 match(Set dst (MulVS src (LoadVector mem))); 7970 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} 7971 ins_encode %{ 7972 int vector_len = 0; 7973 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7974 %} 7975 ins_pipe( pipe_slow ); 7976 %} 7977 7978 instruct vmul8S_mem_evex(vecX dst, vecX src, memory mem) %{ 7979 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 7980 match(Set dst (MulVS src (LoadVector mem))); 7981 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} 7982 ins_encode %{ 7983 int vector_len = 0; 7984 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7985 %} 7986 ins_pipe( pipe_slow ); 7987 %} 7988 7989 instruct vmul8S_mem_evex_special(vecX dst, vecX src, memory mem) %{ 7990 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 7991 match(Set dst (MulVS dst (LoadVector mem))); 7992 effect(TEMP src); 7993 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} 7994 ins_encode %{ 7995 int vector_len = 0; 7996 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7997 %} 7998 ins_pipe( pipe_slow ); 7999 %} 8000 8001 instruct vmul16S_reg_avx(vecY dst, vecY src1, vecY src2) %{ 8002 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 8003 match(Set dst (MulVS src1 src2)); 8004 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} 8005 ins_encode %{ 8006 int vector_len = 1; 8007 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8008 %} 8009 ins_pipe( pipe_slow ); 8010 %} 8011 8012 instruct vmul16S_reg_evex(vecY dst, vecY src1, vecY src2) %{ 8013 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 8014 match(Set dst (MulVS src1 src2)); 8015 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} 8016 ins_encode %{ 8017 int vector_len = 1; 8018 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8019 %} 8020 ins_pipe( pipe_slow ); 8021 %} 8022 8023 instruct vmul16S_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 8024 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 8025 match(Set dst (MulVS dst src2)); 8026 effect(TEMP src1); 8027 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} 8028 ins_encode %{ 8029 int vector_len = 1; 8030 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8031 %} 8032 ins_pipe( pipe_slow ); 8033 %} 8034 8035 instruct vmul16S_mem_avx(vecY dst, vecY src, memory mem) %{ 8036 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 8037 match(Set dst (MulVS src (LoadVector mem))); 8038 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} 8039 ins_encode %{ 8040 int vector_len = 1; 8041 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8042 %} 8043 ins_pipe( pipe_slow ); 8044 %} 8045 8046 instruct vmul16S_mem_evex(vecY dst, vecY src, memory mem) %{ 8047 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 8048 match(Set dst (MulVS src (LoadVector mem))); 8049 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} 8050 ins_encode %{ 8051 int vector_len = 1; 8052 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8053 %} 8054 ins_pipe( pipe_slow ); 8055 %} 8056 8057 instruct vmul16S_mem_evex_special(vecY dst, vecY src, memory mem) %{ 8058 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 8059 match(Set dst (MulVS dst (LoadVector mem))); 8060 effect(TEMP src); 8061 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} 8062 ins_encode %{ 8063 int vector_len = 1; 8064 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8065 %} 8066 ins_pipe( pipe_slow ); 8067 %} 8068 8069 instruct vmul32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8070 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 8071 match(Set dst (MulVS src1 src2)); 8072 format %{ "vpmullw $dst,$src1,$src2\t! mul packed32S" %} 8073 ins_encode %{ 8074 int vector_len = 2; 8075 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8076 %} 8077 ins_pipe( pipe_slow ); 8078 %} 8079 8080 instruct vmul32S_mem(vecZ dst, vecZ src, memory mem) %{ 8081 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 8082 match(Set dst (MulVS src (LoadVector mem))); 8083 format %{ "vpmullw $dst,$src,$mem\t! mul packed32S" %} 8084 ins_encode %{ 8085 int vector_len = 2; 8086 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8087 %} 8088 ins_pipe( pipe_slow ); 8089 %} 8090 8091 // Integers vector mul (sse4_1) 8092 instruct vmul2I(vecD dst, vecD src) %{ 8093 predicate(UseSSE > 3 && n->as_Vector()->length() == 2); 8094 match(Set dst (MulVI dst src)); 8095 format %{ "pmulld $dst,$src\t! mul packed2I" %} 8096 ins_encode %{ 8097 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 8098 %} 8099 ins_pipe( pipe_slow ); 8100 %} 8101 8102 instruct vmul2I_reg(vecD dst, vecD src1, vecD src2) %{ 8103 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8104 match(Set dst (MulVI src1 src2)); 8105 format %{ "vpmulld $dst,$src1,$src2\t! mul packed2I" %} 8106 ins_encode %{ 8107 int vector_len = 0; 8108 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8109 %} 8110 ins_pipe( pipe_slow ); 8111 %} 8112 8113 instruct vmul2I_mem(vecD dst, vecD src, memory mem) %{ 8114 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8115 match(Set dst (MulVI src (LoadVector mem))); 8116 format %{ "vpmulld $dst,$src,$mem\t! mul packed2I" %} 8117 ins_encode %{ 8118 int vector_len = 0; 8119 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8120 %} 8121 ins_pipe( pipe_slow ); 8122 %} 8123 8124 instruct vmul4I(vecX dst, vecX src) %{ 8125 predicate(UseSSE > 3 && n->as_Vector()->length() == 4); 8126 match(Set dst (MulVI dst src)); 8127 format %{ "pmulld $dst,$src\t! mul packed4I" %} 8128 ins_encode %{ 8129 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 8130 %} 8131 ins_pipe( pipe_slow ); 8132 %} 8133 8134 instruct vmul4I_reg(vecX dst, vecX src1, vecX src2) %{ 8135 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8136 match(Set dst (MulVI src1 src2)); 8137 format %{ "vpmulld $dst,$src1,$src2\t! mul packed4I" %} 8138 ins_encode %{ 8139 int vector_len = 0; 8140 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8141 %} 8142 ins_pipe( pipe_slow ); 8143 %} 8144 8145 instruct vmul4I_mem(vecX dst, vecX src, memory mem) %{ 8146 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8147 match(Set dst (MulVI src (LoadVector mem))); 8148 format %{ "vpmulld $dst,$src,$mem\t! mul packed4I" %} 8149 ins_encode %{ 8150 int vector_len = 0; 8151 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8152 %} 8153 ins_pipe( pipe_slow ); 8154 %} 8155 8156 instruct vmul2L_reg(vecX dst, vecX src1, vecX src2) %{ 8157 predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && VM_Version::supports_avx512dq()); 8158 match(Set dst (MulVL src1 src2)); 8159 format %{ "vpmullq $dst,$src1,$src2\t! mul packed2L" %} 8160 ins_encode %{ 8161 int vector_len = 0; 8162 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8163 %} 8164 ins_pipe( pipe_slow ); 8165 %} 8166 8167 instruct vmul2L_mem(vecX dst, vecX src, memory mem) %{ 8168 predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && VM_Version::supports_avx512dq()); 8169 match(Set dst (MulVL src (LoadVector mem))); 8170 format %{ "vpmullq $dst,$src,$mem\t! mul packed2L" %} 8171 ins_encode %{ 8172 int vector_len = 0; 8173 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8174 %} 8175 ins_pipe( pipe_slow ); 8176 %} 8177 8178 instruct vmul4L_reg(vecY dst, vecY src1, vecY src2) %{ 8179 predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && VM_Version::supports_avx512dq()); 8180 match(Set dst (MulVL src1 src2)); 8181 format %{ "vpmullq $dst,$src1,$src2\t! mul packed4L" %} 8182 ins_encode %{ 8183 int vector_len = 1; 8184 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8185 %} 8186 ins_pipe( pipe_slow ); 8187 %} 8188 8189 instruct vmul4L_mem(vecY dst, vecY src, memory mem) %{ 8190 predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && VM_Version::supports_avx512dq()); 8191 match(Set dst (MulVL src (LoadVector mem))); 8192 format %{ "vpmullq $dst,$src,$mem\t! mul packed4L" %} 8193 ins_encode %{ 8194 int vector_len = 1; 8195 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8196 %} 8197 ins_pipe( pipe_slow ); 8198 %} 8199 8200 instruct vmul8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8201 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq()); 8202 match(Set dst (MulVL src1 src2)); 8203 format %{ "vpmullq $dst,$src1,$src2\t! mul packed8L" %} 8204 ins_encode %{ 8205 int vector_len = 2; 8206 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8207 %} 8208 ins_pipe( pipe_slow ); 8209 %} 8210 8211 instruct vmul8L_mem(vecZ dst, vecZ src, memory mem) %{ 8212 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq()); 8213 match(Set dst (MulVL src (LoadVector mem))); 8214 format %{ "vpmullq $dst,$src,$mem\t! mul packed8L" %} 8215 ins_encode %{ 8216 int vector_len = 2; 8217 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8218 %} 8219 ins_pipe( pipe_slow ); 8220 %} 8221 8222 instruct vmul8I_reg(vecY dst, vecY src1, vecY src2) %{ 8223 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 8224 match(Set dst (MulVI src1 src2)); 8225 format %{ "vpmulld $dst,$src1,$src2\t! mul packed8I" %} 8226 ins_encode %{ 8227 int vector_len = 1; 8228 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8229 %} 8230 ins_pipe( pipe_slow ); 8231 %} 8232 8233 instruct vmul8I_mem(vecY dst, vecY src, memory mem) %{ 8234 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 8235 match(Set dst (MulVI src (LoadVector mem))); 8236 format %{ "vpmulld $dst,$src,$mem\t! mul packed8I" %} 8237 ins_encode %{ 8238 int vector_len = 1; 8239 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8240 %} 8241 ins_pipe( pipe_slow ); 8242 %} 8243 8244 instruct vmul16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8245 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8246 match(Set dst (MulVI src1 src2)); 8247 format %{ "vpmulld $dst,$src1,$src2\t! mul packed16I" %} 8248 ins_encode %{ 8249 int vector_len = 2; 8250 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8251 %} 8252 ins_pipe( pipe_slow ); 8253 %} 8254 8255 instruct vmul16I_mem(vecZ dst, vecZ src, memory mem) %{ 8256 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8257 match(Set dst (MulVI src (LoadVector mem))); 8258 format %{ "vpmulld $dst,$src,$mem\t! mul packed16I" %} 8259 ins_encode %{ 8260 int vector_len = 2; 8261 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8262 %} 8263 ins_pipe( pipe_slow ); 8264 %} 8265 8266 // Floats vector mul 8267 instruct vmul2F(vecD dst, vecD src) %{ 8268 predicate(n->as_Vector()->length() == 2); 8269 match(Set dst (MulVF dst src)); 8270 format %{ "mulps $dst,$src\t! mul packed2F" %} 8271 ins_encode %{ 8272 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 8273 %} 8274 ins_pipe( pipe_slow ); 8275 %} 8276 8277 instruct vmul2F_reg(vecD dst, vecD src1, vecD src2) %{ 8278 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8279 match(Set dst (MulVF src1 src2)); 8280 format %{ "vmulps $dst,$src1,$src2\t! mul packed2F" %} 8281 ins_encode %{ 8282 int vector_len = 0; 8283 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8284 %} 8285 ins_pipe( pipe_slow ); 8286 %} 8287 8288 instruct vmul2F_mem(vecD dst, vecD src, memory mem) %{ 8289 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8290 match(Set dst (MulVF src (LoadVector mem))); 8291 format %{ "vmulps $dst,$src,$mem\t! mul packed2F" %} 8292 ins_encode %{ 8293 int vector_len = 0; 8294 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8295 %} 8296 ins_pipe( pipe_slow ); 8297 %} 8298 8299 instruct vmul4F(vecX dst, vecX src) %{ 8300 predicate(n->as_Vector()->length() == 4); 8301 match(Set dst (MulVF dst src)); 8302 format %{ "mulps $dst,$src\t! mul packed4F" %} 8303 ins_encode %{ 8304 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 8305 %} 8306 ins_pipe( pipe_slow ); 8307 %} 8308 8309 instruct vmul4F_reg(vecX dst, vecX src1, vecX src2) %{ 8310 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8311 match(Set dst (MulVF src1 src2)); 8312 format %{ "vmulps $dst,$src1,$src2\t! mul packed4F" %} 8313 ins_encode %{ 8314 int vector_len = 0; 8315 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8316 %} 8317 ins_pipe( pipe_slow ); 8318 %} 8319 8320 instruct vmul4F_mem(vecX dst, vecX src, memory mem) %{ 8321 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8322 match(Set dst (MulVF src (LoadVector mem))); 8323 format %{ "vmulps $dst,$src,$mem\t! mul packed4F" %} 8324 ins_encode %{ 8325 int vector_len = 0; 8326 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8327 %} 8328 ins_pipe( pipe_slow ); 8329 %} 8330 8331 instruct vmul8F_reg(vecY dst, vecY src1, vecY src2) %{ 8332 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8333 match(Set dst (MulVF src1 src2)); 8334 format %{ "vmulps $dst,$src1,$src2\t! mul packed8F" %} 8335 ins_encode %{ 8336 int vector_len = 1; 8337 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8338 %} 8339 ins_pipe( pipe_slow ); 8340 %} 8341 8342 instruct vmul8F_mem(vecY dst, vecY src, memory mem) %{ 8343 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8344 match(Set dst (MulVF src (LoadVector mem))); 8345 format %{ "vmulps $dst,$src,$mem\t! mul packed8F" %} 8346 ins_encode %{ 8347 int vector_len = 1; 8348 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8349 %} 8350 ins_pipe( pipe_slow ); 8351 %} 8352 8353 instruct vmul16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8354 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8355 match(Set dst (MulVF src1 src2)); 8356 format %{ "vmulps $dst,$src1,$src2\t! mul packed16F" %} 8357 ins_encode %{ 8358 int vector_len = 2; 8359 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8360 %} 8361 ins_pipe( pipe_slow ); 8362 %} 8363 8364 instruct vmul16F_mem(vecZ dst, vecZ src, memory mem) %{ 8365 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8366 match(Set dst (MulVF src (LoadVector mem))); 8367 format %{ "vmulps $dst,$src,$mem\t! mul packed16F" %} 8368 ins_encode %{ 8369 int vector_len = 2; 8370 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8371 %} 8372 ins_pipe( pipe_slow ); 8373 %} 8374 8375 // Doubles vector mul 8376 instruct vmul2D(vecX dst, vecX src) %{ 8377 predicate(n->as_Vector()->length() == 2); 8378 match(Set dst (MulVD dst src)); 8379 format %{ "mulpd $dst,$src\t! mul packed2D" %} 8380 ins_encode %{ 8381 __ mulpd($dst$$XMMRegister, $src$$XMMRegister); 8382 %} 8383 ins_pipe( pipe_slow ); 8384 %} 8385 8386 instruct vmul2D_reg(vecX dst, vecX src1, vecX src2) %{ 8387 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8388 match(Set dst (MulVD src1 src2)); 8389 format %{ "vmulpd $dst,$src1,$src2\t! mul packed2D" %} 8390 ins_encode %{ 8391 int vector_len = 0; 8392 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8393 %} 8394 ins_pipe( pipe_slow ); 8395 %} 8396 8397 instruct vmul2D_mem(vecX dst, vecX src, memory mem) %{ 8398 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8399 match(Set dst (MulVD src (LoadVector mem))); 8400 format %{ "vmulpd $dst,$src,$mem\t! mul packed2D" %} 8401 ins_encode %{ 8402 int vector_len = 0; 8403 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8404 %} 8405 ins_pipe( pipe_slow ); 8406 %} 8407 8408 instruct vmul4D_reg(vecY dst, vecY src1, vecY src2) %{ 8409 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8410 match(Set dst (MulVD src1 src2)); 8411 format %{ "vmulpd $dst,$src1,$src2\t! mul packed4D" %} 8412 ins_encode %{ 8413 int vector_len = 1; 8414 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8415 %} 8416 ins_pipe( pipe_slow ); 8417 %} 8418 8419 instruct vmul4D_mem(vecY dst, vecY src, memory mem) %{ 8420 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8421 match(Set dst (MulVD src (LoadVector mem))); 8422 format %{ "vmulpd $dst,$src,$mem\t! mul packed4D" %} 8423 ins_encode %{ 8424 int vector_len = 1; 8425 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8426 %} 8427 ins_pipe( pipe_slow ); 8428 %} 8429 8430 instruct vmul8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8431 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8432 match(Set dst (MulVD src1 src2)); 8433 format %{ "vmulpd $dst k0,$src1,$src2\t! mul packed8D" %} 8434 ins_encode %{ 8435 int vector_len = 2; 8436 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8437 %} 8438 ins_pipe( pipe_slow ); 8439 %} 8440 8441 instruct vmul8D_mem(vecZ dst, vecZ src, memory mem) %{ 8442 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8443 match(Set dst (MulVD src (LoadVector mem))); 8444 format %{ "vmulpd $dst k0,$src,$mem\t! mul packed8D" %} 8445 ins_encode %{ 8446 int vector_len = 2; 8447 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8448 %} 8449 ins_pipe( pipe_slow ); 8450 %} 8451 8452 instruct vcmov4D_reg(vecY dst, vecY src1, vecY src2, immI8 cop, cmpOp_vcmppd copnd) %{ 8453 predicate(UseAVX > 0 && UseAVX < 3 && n->as_Vector()->length() == 4); 8454 match(Set dst (CMoveVD (Binary copnd cop) (Binary src1 src2))); 8455 effect(TEMP dst, USE src1, USE src2); 8456 format %{ "cmppd.$copnd $dst, $src1, $src2 ! vcmovevd, cond=$cop\n\t" 8457 "vpblendd $dst,$src1,$src2,$dst ! vcmovevd\n\t" 8458 %} 8459 ins_encode %{ 8460 int vector_len = 1; 8461 int cond = (Assembler::Condition)($copnd$$cmpcode); 8462 __ cmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cond, vector_len); 8463 __ vpblendd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vector_len); 8464 %} 8465 ins_pipe( pipe_slow ); 8466 %} 8467 8468 // --------------------------------- DIV -------------------------------------- 8469 8470 // Floats vector div 8471 instruct vdiv2F(vecD dst, vecD src) %{ 8472 predicate(n->as_Vector()->length() == 2); 8473 match(Set dst (DivVF dst src)); 8474 format %{ "divps $dst,$src\t! div packed2F" %} 8475 ins_encode %{ 8476 __ divps($dst$$XMMRegister, $src$$XMMRegister); 8477 %} 8478 ins_pipe( pipe_slow ); 8479 %} 8480 8481 instruct vdiv2F_reg(vecD dst, vecD src1, vecD src2) %{ 8482 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8483 match(Set dst (DivVF src1 src2)); 8484 format %{ "vdivps $dst,$src1,$src2\t! div packed2F" %} 8485 ins_encode %{ 8486 int vector_len = 0; 8487 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8488 %} 8489 ins_pipe( pipe_slow ); 8490 %} 8491 8492 instruct vdiv2F_mem(vecD dst, vecD src, memory mem) %{ 8493 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8494 match(Set dst (DivVF src (LoadVector mem))); 8495 format %{ "vdivps $dst,$src,$mem\t! div packed2F" %} 8496 ins_encode %{ 8497 int vector_len = 0; 8498 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8499 %} 8500 ins_pipe( pipe_slow ); 8501 %} 8502 8503 instruct vdiv4F(vecX dst, vecX src) %{ 8504 predicate(n->as_Vector()->length() == 4); 8505 match(Set dst (DivVF dst src)); 8506 format %{ "divps $dst,$src\t! div packed4F" %} 8507 ins_encode %{ 8508 __ divps($dst$$XMMRegister, $src$$XMMRegister); 8509 %} 8510 ins_pipe( pipe_slow ); 8511 %} 8512 8513 instruct vdiv4F_reg(vecX dst, vecX src1, vecX src2) %{ 8514 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8515 match(Set dst (DivVF src1 src2)); 8516 format %{ "vdivps $dst,$src1,$src2\t! div packed4F" %} 8517 ins_encode %{ 8518 int vector_len = 0; 8519 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8520 %} 8521 ins_pipe( pipe_slow ); 8522 %} 8523 8524 instruct vdiv4F_mem(vecX dst, vecX src, memory mem) %{ 8525 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8526 match(Set dst (DivVF src (LoadVector mem))); 8527 format %{ "vdivps $dst,$src,$mem\t! div packed4F" %} 8528 ins_encode %{ 8529 int vector_len = 0; 8530 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8531 %} 8532 ins_pipe( pipe_slow ); 8533 %} 8534 8535 instruct vdiv8F_reg(vecY dst, vecY src1, vecY src2) %{ 8536 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8537 match(Set dst (DivVF src1 src2)); 8538 format %{ "vdivps $dst,$src1,$src2\t! div packed8F" %} 8539 ins_encode %{ 8540 int vector_len = 1; 8541 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8542 %} 8543 ins_pipe( pipe_slow ); 8544 %} 8545 8546 instruct vdiv8F_mem(vecY dst, vecY src, memory mem) %{ 8547 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8548 match(Set dst (DivVF src (LoadVector mem))); 8549 format %{ "vdivps $dst,$src,$mem\t! div packed8F" %} 8550 ins_encode %{ 8551 int vector_len = 1; 8552 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8553 %} 8554 ins_pipe( pipe_slow ); 8555 %} 8556 8557 instruct vdiv16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8558 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 8559 match(Set dst (DivVF src1 src2)); 8560 format %{ "vdivps $dst,$src1,$src2\t! div packed16F" %} 8561 ins_encode %{ 8562 int vector_len = 2; 8563 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8564 %} 8565 ins_pipe( pipe_slow ); 8566 %} 8567 8568 instruct vdiv16F_mem(vecZ dst, vecZ src, memory mem) %{ 8569 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 8570 match(Set dst (DivVF src (LoadVector mem))); 8571 format %{ "vdivps $dst,$src,$mem\t! div packed16F" %} 8572 ins_encode %{ 8573 int vector_len = 2; 8574 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8575 %} 8576 ins_pipe( pipe_slow ); 8577 %} 8578 8579 // Doubles vector div 8580 instruct vdiv2D(vecX dst, vecX src) %{ 8581 predicate(n->as_Vector()->length() == 2); 8582 match(Set dst (DivVD dst src)); 8583 format %{ "divpd $dst,$src\t! div packed2D" %} 8584 ins_encode %{ 8585 __ divpd($dst$$XMMRegister, $src$$XMMRegister); 8586 %} 8587 ins_pipe( pipe_slow ); 8588 %} 8589 8590 instruct vdiv2D_reg(vecX dst, vecX src1, vecX src2) %{ 8591 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8592 match(Set dst (DivVD src1 src2)); 8593 format %{ "vdivpd $dst,$src1,$src2\t! div packed2D" %} 8594 ins_encode %{ 8595 int vector_len = 0; 8596 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8597 %} 8598 ins_pipe( pipe_slow ); 8599 %} 8600 8601 instruct vdiv2D_mem(vecX dst, vecX src, memory mem) %{ 8602 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8603 match(Set dst (DivVD src (LoadVector mem))); 8604 format %{ "vdivpd $dst,$src,$mem\t! div packed2D" %} 8605 ins_encode %{ 8606 int vector_len = 0; 8607 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8608 %} 8609 ins_pipe( pipe_slow ); 8610 %} 8611 8612 instruct vdiv4D_reg(vecY dst, vecY src1, vecY src2) %{ 8613 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8614 match(Set dst (DivVD src1 src2)); 8615 format %{ "vdivpd $dst,$src1,$src2\t! div packed4D" %} 8616 ins_encode %{ 8617 int vector_len = 1; 8618 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8619 %} 8620 ins_pipe( pipe_slow ); 8621 %} 8622 8623 instruct vdiv4D_mem(vecY dst, vecY src, memory mem) %{ 8624 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8625 match(Set dst (DivVD src (LoadVector mem))); 8626 format %{ "vdivpd $dst,$src,$mem\t! div packed4D" %} 8627 ins_encode %{ 8628 int vector_len = 1; 8629 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8630 %} 8631 ins_pipe( pipe_slow ); 8632 %} 8633 8634 instruct vdiv8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8635 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8636 match(Set dst (DivVD src1 src2)); 8637 format %{ "vdivpd $dst,$src1,$src2\t! div packed8D" %} 8638 ins_encode %{ 8639 int vector_len = 2; 8640 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8641 %} 8642 ins_pipe( pipe_slow ); 8643 %} 8644 8645 instruct vdiv8D_mem(vecZ dst, vecZ src, memory mem) %{ 8646 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8647 match(Set dst (DivVD src (LoadVector mem))); 8648 format %{ "vdivpd $dst,$src,$mem\t! div packed8D" %} 8649 ins_encode %{ 8650 int vector_len = 2; 8651 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8652 %} 8653 ins_pipe( pipe_slow ); 8654 %} 8655 8656 // ------------------------------ Shift --------------------------------------- 8657 8658 // Left and right shift count vectors are the same on x86 8659 // (only lowest bits of xmm reg are used for count). 8660 instruct vshiftcnt(vecS dst, rRegI cnt) %{ 8661 match(Set dst (LShiftCntV cnt)); 8662 match(Set dst (RShiftCntV cnt)); 8663 format %{ "movd $dst,$cnt\t! load shift count" %} 8664 ins_encode %{ 8665 __ movdl($dst$$XMMRegister, $cnt$$Register); 8666 %} 8667 ins_pipe( pipe_slow ); 8668 %} 8669 8670 // --------------------------------- Sqrt -------------------------------------- 8671 8672 // Floating point vector sqrt - double precision only 8673 instruct vsqrt2D_reg(vecX dst, vecX src) %{ 8674 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8675 match(Set dst (SqrtVD src)); 8676 format %{ "vsqrtpd $dst,$src\t! sqrt packed2D" %} 8677 ins_encode %{ 8678 int vector_len = 0; 8679 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8680 %} 8681 ins_pipe( pipe_slow ); 8682 %} 8683 8684 instruct vsqrt2D_mem(vecX dst, memory mem) %{ 8685 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8686 match(Set dst (SqrtVD (LoadVector mem))); 8687 format %{ "vsqrtpd $dst,$mem\t! sqrt packed2D" %} 8688 ins_encode %{ 8689 int vector_len = 0; 8690 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); 8691 %} 8692 ins_pipe( pipe_slow ); 8693 %} 8694 8695 instruct vsqrt4D_reg(vecY dst, vecY src) %{ 8696 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8697 match(Set dst (SqrtVD src)); 8698 format %{ "vsqrtpd $dst,$src\t! sqrt packed4D" %} 8699 ins_encode %{ 8700 int vector_len = 1; 8701 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8702 %} 8703 ins_pipe( pipe_slow ); 8704 %} 8705 8706 instruct vsqrt4D_mem(vecY dst, memory mem) %{ 8707 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8708 match(Set dst (SqrtVD (LoadVector mem))); 8709 format %{ "vsqrtpd $dst,$mem\t! sqrt packed4D" %} 8710 ins_encode %{ 8711 int vector_len = 1; 8712 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); 8713 %} 8714 ins_pipe( pipe_slow ); 8715 %} 8716 8717 instruct vsqrt8D_reg(vecZ dst, vecZ src) %{ 8718 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8719 match(Set dst (SqrtVD src)); 8720 format %{ "vsqrtpd $dst,$src\t! sqrt packed8D" %} 8721 ins_encode %{ 8722 int vector_len = 2; 8723 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8724 %} 8725 ins_pipe( pipe_slow ); 8726 %} 8727 8728 instruct vsqrt8D_mem(vecZ dst, memory mem) %{ 8729 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8730 match(Set dst (SqrtVD (LoadVector mem))); 8731 format %{ "vsqrtpd $dst,$mem\t! sqrt packed8D" %} 8732 ins_encode %{ 8733 int vector_len = 2; 8734 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); 8735 %} 8736 ins_pipe( pipe_slow ); 8737 %} 8738 8739 // ------------------------------ LeftShift ----------------------------------- 8740 8741 // Shorts/Chars vector left shift 8742 instruct vsll2S(vecS dst, vecS shift) %{ 8743 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8744 match(Set dst (LShiftVS dst shift)); 8745 format %{ "psllw $dst,$shift\t! left shift packed2S" %} 8746 ins_encode %{ 8747 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 8748 %} 8749 ins_pipe( pipe_slow ); 8750 %} 8751 8752 instruct vsll2S_imm(vecS dst, immI8 shift) %{ 8753 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8754 match(Set dst (LShiftVS dst shift)); 8755 format %{ "psllw $dst,$shift\t! left shift packed2S" %} 8756 ins_encode %{ 8757 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 8758 %} 8759 ins_pipe( pipe_slow ); 8760 %} 8761 8762 instruct vsll2S_reg_avx(vecS dst, vecS src, vecS shift) %{ 8763 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 8764 match(Set dst (LShiftVS src shift)); 8765 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 8766 ins_encode %{ 8767 int vector_len = 0; 8768 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8769 %} 8770 ins_pipe( pipe_slow ); 8771 %} 8772 8773 instruct vsll2S_reg_evex(vecS dst, vecS src, vecS shift) %{ 8774 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 8775 match(Set dst (LShiftVS src shift)); 8776 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 8777 ins_encode %{ 8778 int vector_len = 0; 8779 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8780 %} 8781 ins_pipe( pipe_slow ); 8782 %} 8783 8784 instruct vsll2S_reg_evex_special(vecS dst, vecS src, vecS shift) %{ 8785 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 8786 match(Set dst (LShiftVS dst shift)); 8787 effect(TEMP src); 8788 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 8789 ins_encode %{ 8790 int vector_len = 0; 8791 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8792 %} 8793 ins_pipe( pipe_slow ); 8794 %} 8795 8796 instruct vsll2S_reg_imm_avx(vecS dst, vecS src, immI8 shift) %{ 8797 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 8798 match(Set dst (LShiftVS src shift)); 8799 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 8800 ins_encode %{ 8801 int vector_len = 0; 8802 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8803 %} 8804 ins_pipe( pipe_slow ); 8805 %} 8806 8807 instruct vsll2S_reg_imm_evex(vecS dst, vecS src, immI8 shift) %{ 8808 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 8809 match(Set dst (LShiftVS src shift)); 8810 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 8811 ins_encode %{ 8812 int vector_len = 0; 8813 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8814 %} 8815 ins_pipe( pipe_slow ); 8816 %} 8817 8818 instruct vsll2S_reg_imm_evex_special(vecS dst, vecS src, immI8 shift) %{ 8819 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 8820 match(Set dst (LShiftVS dst shift)); 8821 effect(TEMP src); 8822 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 8823 ins_encode %{ 8824 int vector_len = 0; 8825 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8826 %} 8827 ins_pipe( pipe_slow ); 8828 %} 8829 8830 instruct vsll4S(vecD dst, vecS shift) %{ 8831 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 8832 match(Set dst (LShiftVS dst shift)); 8833 format %{ "psllw $dst,$shift\t! left shift packed4S" %} 8834 ins_encode %{ 8835 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 8836 %} 8837 ins_pipe( pipe_slow ); 8838 %} 8839 8840 instruct vsll4S_imm(vecD dst, immI8 shift) %{ 8841 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 8842 match(Set dst (LShiftVS dst shift)); 8843 format %{ "psllw $dst,$shift\t! left shift packed4S" %} 8844 ins_encode %{ 8845 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 8846 %} 8847 ins_pipe( pipe_slow ); 8848 %} 8849 8850 instruct vsll4S_reg_avx(vecD dst, vecD src, vecS shift) %{ 8851 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 8852 match(Set dst (LShiftVS src shift)); 8853 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 8854 ins_encode %{ 8855 int vector_len = 0; 8856 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8857 %} 8858 ins_pipe( pipe_slow ); 8859 %} 8860 8861 instruct vsll4S_reg_evex(vecD dst, vecD src, vecS shift) %{ 8862 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 8863 match(Set dst (LShiftVS src shift)); 8864 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 8865 ins_encode %{ 8866 int vector_len = 0; 8867 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8868 %} 8869 ins_pipe( pipe_slow ); 8870 %} 8871 8872 instruct vsll4S_reg_evex_special(vecD dst, vecD src, vecS shift) %{ 8873 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 8874 match(Set dst (LShiftVS dst shift)); 8875 effect(TEMP src); 8876 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 8877 ins_encode %{ 8878 int vector_len = 0; 8879 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8880 %} 8881 ins_pipe( pipe_slow ); 8882 %} 8883 8884 instruct vsll4S_reg_imm_avx(vecD dst, vecD src, immI8 shift) %{ 8885 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 8886 match(Set dst (LShiftVS src shift)); 8887 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 8888 ins_encode %{ 8889 int vector_len = 0; 8890 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8891 %} 8892 ins_pipe( pipe_slow ); 8893 %} 8894 8895 instruct vsll4S_reg_imm_evex(vecD dst, vecD src, immI8 shift) %{ 8896 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 8897 match(Set dst (LShiftVS src shift)); 8898 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 8899 ins_encode %{ 8900 int vector_len = 0; 8901 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8902 %} 8903 ins_pipe( pipe_slow ); 8904 %} 8905 8906 instruct vsll4S_reg_imm_evex_special(vecD dst, vecD src, immI8 shift) %{ 8907 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 8908 match(Set dst (LShiftVS dst shift)); 8909 effect(TEMP src); 8910 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 8911 ins_encode %{ 8912 int vector_len = 0; 8913 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8914 %} 8915 ins_pipe( pipe_slow ); 8916 %} 8917 8918 instruct vsll8S(vecX dst, vecS shift) %{ 8919 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 8920 match(Set dst (LShiftVS dst shift)); 8921 format %{ "psllw $dst,$shift\t! left shift packed8S" %} 8922 ins_encode %{ 8923 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 8924 %} 8925 ins_pipe( pipe_slow ); 8926 %} 8927 8928 instruct vsll8S_imm(vecX dst, immI8 shift) %{ 8929 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 8930 match(Set dst (LShiftVS dst shift)); 8931 format %{ "psllw $dst,$shift\t! left shift packed8S" %} 8932 ins_encode %{ 8933 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 8934 %} 8935 ins_pipe( pipe_slow ); 8936 %} 8937 8938 instruct vsll8S_reg_avx(vecX dst, vecX src, vecS shift) %{ 8939 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 8940 match(Set dst (LShiftVS src shift)); 8941 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 8942 ins_encode %{ 8943 int vector_len = 0; 8944 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8945 %} 8946 ins_pipe( pipe_slow ); 8947 %} 8948 8949 instruct vsll8S_reg_evex(vecX dst, vecX src, vecS shift) %{ 8950 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 8951 match(Set dst (LShiftVS src shift)); 8952 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 8953 ins_encode %{ 8954 int vector_len = 0; 8955 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8956 %} 8957 ins_pipe( pipe_slow ); 8958 %} 8959 8960 instruct vsll8S_reg_evex_special(vecX dst, vecX src, vecS shift) %{ 8961 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 8962 match(Set dst (LShiftVS dst shift)); 8963 effect(TEMP src); 8964 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 8965 ins_encode %{ 8966 int vector_len = 0; 8967 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8968 %} 8969 ins_pipe( pipe_slow ); 8970 %} 8971 8972 instruct vsll8S_reg_imm_avx(vecX dst, vecX src, immI8 shift) %{ 8973 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 8974 match(Set dst (LShiftVS src shift)); 8975 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 8976 ins_encode %{ 8977 int vector_len = 0; 8978 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8979 %} 8980 ins_pipe( pipe_slow ); 8981 %} 8982 8983 instruct vsll8S_reg_imm_evex(vecX dst, vecX src, immI8 shift) %{ 8984 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 8985 match(Set dst (LShiftVS src shift)); 8986 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 8987 ins_encode %{ 8988 int vector_len = 0; 8989 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8990 %} 8991 ins_pipe( pipe_slow ); 8992 %} 8993 8994 instruct vsll8S_reg_imm_evex_special(vecX dst, vecX src, immI8 shift) %{ 8995 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 8996 match(Set dst (LShiftVS dst shift)); 8997 effect(TEMP src); 8998 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 8999 ins_encode %{ 9000 int vector_len = 0; 9001 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9002 %} 9003 ins_pipe( pipe_slow ); 9004 %} 9005 9006 instruct vsll16S_reg_avx(vecY dst, vecY src, vecS shift) %{ 9007 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 9008 match(Set dst (LShiftVS src shift)); 9009 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 9010 ins_encode %{ 9011 int vector_len = 1; 9012 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9013 %} 9014 ins_pipe( pipe_slow ); 9015 %} 9016 9017 instruct vsll16S_reg_evex(vecY dst, vecY src, vecS shift) %{ 9018 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 9019 match(Set dst (LShiftVS src shift)); 9020 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 9021 ins_encode %{ 9022 int vector_len = 1; 9023 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9024 %} 9025 ins_pipe( pipe_slow ); 9026 %} 9027 9028 instruct vsll16S_reg_evex_special(vecY dst, vecY src, vecS shift) %{ 9029 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 9030 match(Set dst (LShiftVS dst shift)); 9031 effect(TEMP src); 9032 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 9033 ins_encode %{ 9034 int vector_len = 1; 9035 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9036 %} 9037 ins_pipe( pipe_slow ); 9038 %} 9039 9040 instruct vsll16S_reg_imm_avx(vecY dst, vecY src, immI8 shift) %{ 9041 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 9042 match(Set dst (LShiftVS src shift)); 9043 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 9044 ins_encode %{ 9045 int vector_len = 1; 9046 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9047 %} 9048 ins_pipe( pipe_slow ); 9049 %} 9050 9051 instruct vsll16S_reg_imm_evex(vecY dst, vecY src, immI8 shift) %{ 9052 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 9053 match(Set dst (LShiftVS src shift)); 9054 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 9055 ins_encode %{ 9056 int vector_len = 1; 9057 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9058 %} 9059 ins_pipe( pipe_slow ); 9060 %} 9061 9062 instruct vsll16S_reg_imm_evex_special(vecY dst, vecY src, immI8 shift) %{ 9063 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 9064 match(Set dst (LShiftVS dst shift)); 9065 effect(TEMP src); 9066 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 9067 ins_encode %{ 9068 int vector_len = 1; 9069 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9070 %} 9071 ins_pipe( pipe_slow ); 9072 %} 9073 9074 instruct vsll32S_reg(vecZ dst, vecZ src, vecS shift) %{ 9075 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 9076 match(Set dst (LShiftVS src shift)); 9077 format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %} 9078 ins_encode %{ 9079 int vector_len = 2; 9080 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9081 %} 9082 ins_pipe( pipe_slow ); 9083 %} 9084 9085 instruct vsll32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 9086 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 9087 match(Set dst (LShiftVS src shift)); 9088 format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %} 9089 ins_encode %{ 9090 int vector_len = 2; 9091 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9092 %} 9093 ins_pipe( pipe_slow ); 9094 %} 9095 9096 // Integers vector left shift 9097 instruct vsll2I(vecD dst, vecS shift) %{ 9098 predicate(n->as_Vector()->length() == 2); 9099 match(Set dst (LShiftVI dst shift)); 9100 format %{ "pslld $dst,$shift\t! left shift packed2I" %} 9101 ins_encode %{ 9102 __ pslld($dst$$XMMRegister, $shift$$XMMRegister); 9103 %} 9104 ins_pipe( pipe_slow ); 9105 %} 9106 9107 instruct vsll2I_imm(vecD dst, immI8 shift) %{ 9108 predicate(n->as_Vector()->length() == 2); 9109 match(Set dst (LShiftVI dst shift)); 9110 format %{ "pslld $dst,$shift\t! left shift packed2I" %} 9111 ins_encode %{ 9112 __ pslld($dst$$XMMRegister, (int)$shift$$constant); 9113 %} 9114 ins_pipe( pipe_slow ); 9115 %} 9116 9117 instruct vsll2I_reg(vecD dst, vecD src, vecS shift) %{ 9118 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9119 match(Set dst (LShiftVI src shift)); 9120 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} 9121 ins_encode %{ 9122 int vector_len = 0; 9123 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9124 %} 9125 ins_pipe( pipe_slow ); 9126 %} 9127 9128 instruct vsll2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 9129 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9130 match(Set dst (LShiftVI src shift)); 9131 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} 9132 ins_encode %{ 9133 int vector_len = 0; 9134 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9135 %} 9136 ins_pipe( pipe_slow ); 9137 %} 9138 9139 instruct vsll4I(vecX dst, vecS shift) %{ 9140 predicate(n->as_Vector()->length() == 4); 9141 match(Set dst (LShiftVI dst shift)); 9142 format %{ "pslld $dst,$shift\t! left shift packed4I" %} 9143 ins_encode %{ 9144 __ pslld($dst$$XMMRegister, $shift$$XMMRegister); 9145 %} 9146 ins_pipe( pipe_slow ); 9147 %} 9148 9149 instruct vsll4I_imm(vecX dst, immI8 shift) %{ 9150 predicate(n->as_Vector()->length() == 4); 9151 match(Set dst (LShiftVI dst shift)); 9152 format %{ "pslld $dst,$shift\t! left shift packed4I" %} 9153 ins_encode %{ 9154 __ pslld($dst$$XMMRegister, (int)$shift$$constant); 9155 %} 9156 ins_pipe( pipe_slow ); 9157 %} 9158 9159 instruct vsll4I_reg(vecX dst, vecX src, vecS shift) %{ 9160 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9161 match(Set dst (LShiftVI src shift)); 9162 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} 9163 ins_encode %{ 9164 int vector_len = 0; 9165 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9166 %} 9167 ins_pipe( pipe_slow ); 9168 %} 9169 9170 instruct vsll4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 9171 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9172 match(Set dst (LShiftVI src shift)); 9173 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} 9174 ins_encode %{ 9175 int vector_len = 0; 9176 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9177 %} 9178 ins_pipe( pipe_slow ); 9179 %} 9180 9181 instruct vsll8I_reg(vecY dst, vecY src, vecS shift) %{ 9182 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 9183 match(Set dst (LShiftVI src shift)); 9184 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} 9185 ins_encode %{ 9186 int vector_len = 1; 9187 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9188 %} 9189 ins_pipe( pipe_slow ); 9190 %} 9191 9192 instruct vsll8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 9193 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 9194 match(Set dst (LShiftVI src shift)); 9195 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} 9196 ins_encode %{ 9197 int vector_len = 1; 9198 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9199 %} 9200 ins_pipe( pipe_slow ); 9201 %} 9202 9203 instruct vsll16I_reg(vecZ dst, vecZ src, vecS shift) %{ 9204 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 9205 match(Set dst (LShiftVI src shift)); 9206 format %{ "vpslld $dst,$src,$shift\t! left shift packed16I" %} 9207 ins_encode %{ 9208 int vector_len = 2; 9209 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9210 %} 9211 ins_pipe( pipe_slow ); 9212 %} 9213 9214 instruct vsll16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 9215 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 9216 match(Set dst (LShiftVI src shift)); 9217 format %{ "vpslld $dst,$src,$shift\t! left shift packed16I" %} 9218 ins_encode %{ 9219 int vector_len = 2; 9220 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9221 %} 9222 ins_pipe( pipe_slow ); 9223 %} 9224 9225 // Longs vector left shift 9226 instruct vsll2L(vecX dst, vecS shift) %{ 9227 predicate(n->as_Vector()->length() == 2); 9228 match(Set dst (LShiftVL dst shift)); 9229 format %{ "psllq $dst,$shift\t! left shift packed2L" %} 9230 ins_encode %{ 9231 __ psllq($dst$$XMMRegister, $shift$$XMMRegister); 9232 %} 9233 ins_pipe( pipe_slow ); 9234 %} 9235 9236 instruct vsll2L_imm(vecX dst, immI8 shift) %{ 9237 predicate(n->as_Vector()->length() == 2); 9238 match(Set dst (LShiftVL dst shift)); 9239 format %{ "psllq $dst,$shift\t! left shift packed2L" %} 9240 ins_encode %{ 9241 __ psllq($dst$$XMMRegister, (int)$shift$$constant); 9242 %} 9243 ins_pipe( pipe_slow ); 9244 %} 9245 9246 instruct vsll2L_reg(vecX dst, vecX src, vecS shift) %{ 9247 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9248 match(Set dst (LShiftVL src shift)); 9249 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} 9250 ins_encode %{ 9251 int vector_len = 0; 9252 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9253 %} 9254 ins_pipe( pipe_slow ); 9255 %} 9256 9257 instruct vsll2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ 9258 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9259 match(Set dst (LShiftVL src shift)); 9260 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} 9261 ins_encode %{ 9262 int vector_len = 0; 9263 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9264 %} 9265 ins_pipe( pipe_slow ); 9266 %} 9267 9268 instruct vsll4L_reg(vecY dst, vecY src, vecS shift) %{ 9269 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 9270 match(Set dst (LShiftVL src shift)); 9271 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} 9272 ins_encode %{ 9273 int vector_len = 1; 9274 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9275 %} 9276 ins_pipe( pipe_slow ); 9277 %} 9278 9279 instruct vsll4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ 9280 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 9281 match(Set dst (LShiftVL src shift)); 9282 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} 9283 ins_encode %{ 9284 int vector_len = 1; 9285 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9286 %} 9287 ins_pipe( pipe_slow ); 9288 %} 9289 9290 instruct vsll8L_reg(vecZ dst, vecZ src, vecS shift) %{ 9291 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 9292 match(Set dst (LShiftVL src shift)); 9293 format %{ "vpsllq $dst,$src,$shift\t! left shift packed8L" %} 9294 ins_encode %{ 9295 int vector_len = 2; 9296 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9297 %} 9298 ins_pipe( pipe_slow ); 9299 %} 9300 9301 instruct vsll8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 9302 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 9303 match(Set dst (LShiftVL src shift)); 9304 format %{ "vpsllq $dst,$src,$shift\t! left shift packed8L" %} 9305 ins_encode %{ 9306 int vector_len = 2; 9307 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9308 %} 9309 ins_pipe( pipe_slow ); 9310 %} 9311 9312 // ----------------------- LogicalRightShift ----------------------------------- 9313 9314 // Shorts vector logical right shift produces incorrect Java result 9315 // for negative data because java code convert short value into int with 9316 // sign extension before a shift. But char vectors are fine since chars are 9317 // unsigned values. 9318 9319 instruct vsrl2S(vecS dst, vecS shift) %{ 9320 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 9321 match(Set dst (URShiftVS dst shift)); 9322 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} 9323 ins_encode %{ 9324 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 9325 %} 9326 ins_pipe( pipe_slow ); 9327 %} 9328 9329 instruct vsrl2S_imm(vecS dst, immI8 shift) %{ 9330 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 9331 match(Set dst (URShiftVS dst shift)); 9332 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} 9333 ins_encode %{ 9334 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 9335 %} 9336 ins_pipe( pipe_slow ); 9337 %} 9338 9339 instruct vsrl2S_reg_avx(vecS dst, vecS src, vecS shift) %{ 9340 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 9341 match(Set dst (URShiftVS src shift)); 9342 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 9343 ins_encode %{ 9344 int vector_len = 0; 9345 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9346 %} 9347 ins_pipe( pipe_slow ); 9348 %} 9349 9350 instruct vsrl2S_reg_evex(vecS dst, vecS src, vecS shift) %{ 9351 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 9352 match(Set dst (URShiftVS src shift)); 9353 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 9354 ins_encode %{ 9355 int vector_len = 0; 9356 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9357 %} 9358 ins_pipe( pipe_slow ); 9359 %} 9360 9361 instruct vsrl2S_reg_evex_special(vecS dst, vecS src, vecS shift) %{ 9362 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 9363 match(Set dst (URShiftVS dst shift)); 9364 effect(TEMP src); 9365 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 9366 ins_encode %{ 9367 int vector_len = 0; 9368 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9369 %} 9370 ins_pipe( pipe_slow ); 9371 %} 9372 9373 instruct vsrl2S_reg_imm_avx(vecS dst, vecS src, immI8 shift) %{ 9374 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 9375 match(Set dst (URShiftVS src shift)); 9376 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 9377 ins_encode %{ 9378 int vector_len = 0; 9379 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9380 %} 9381 ins_pipe( pipe_slow ); 9382 %} 9383 9384 instruct vsrl2S_reg_imm_evex(vecS dst, vecS src, immI8 shift) %{ 9385 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 9386 match(Set dst (URShiftVS src shift)); 9387 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 9388 ins_encode %{ 9389 int vector_len = 0; 9390 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9391 %} 9392 ins_pipe( pipe_slow ); 9393 %} 9394 9395 instruct vsrl2S_reg_imm_evex_special(vecS dst, vecS src, immI8 shift) %{ 9396 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 9397 match(Set dst (URShiftVS dst shift)); 9398 effect(TEMP src); 9399 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 9400 ins_encode %{ 9401 int vector_len = 0; 9402 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9403 %} 9404 ins_pipe( pipe_slow ); 9405 %} 9406 9407 instruct vsrl4S(vecD dst, vecS shift) %{ 9408 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 9409 match(Set dst (URShiftVS dst shift)); 9410 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} 9411 ins_encode %{ 9412 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 9413 %} 9414 ins_pipe( pipe_slow ); 9415 %} 9416 9417 instruct vsrl4S_imm(vecD dst, immI8 shift) %{ 9418 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 9419 match(Set dst (URShiftVS dst shift)); 9420 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} 9421 ins_encode %{ 9422 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 9423 %} 9424 ins_pipe( pipe_slow ); 9425 %} 9426 9427 instruct vsrl4S_reg_avx(vecD dst, vecD src, vecS shift) %{ 9428 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 9429 match(Set dst (URShiftVS src shift)); 9430 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 9431 ins_encode %{ 9432 int vector_len = 0; 9433 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9434 %} 9435 ins_pipe( pipe_slow ); 9436 %} 9437 9438 instruct vsrl4S_reg_evex(vecD dst, vecD src, vecS shift) %{ 9439 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 9440 match(Set dst (URShiftVS src shift)); 9441 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 9442 ins_encode %{ 9443 int vector_len = 0; 9444 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9445 %} 9446 ins_pipe( pipe_slow ); 9447 %} 9448 9449 instruct vsrl4S_reg_evex_special(vecD dst, vecD src, vecS shift) %{ 9450 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 9451 match(Set dst (URShiftVS dst shift)); 9452 effect(TEMP src); 9453 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 9454 ins_encode %{ 9455 int vector_len = 0; 9456 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9457 %} 9458 ins_pipe( pipe_slow ); 9459 %} 9460 9461 instruct vsrl4S_reg_imm_avx(vecD dst, vecD src, immI8 shift) %{ 9462 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 9463 match(Set dst (URShiftVS src shift)); 9464 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 9465 ins_encode %{ 9466 int vector_len = 0; 9467 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9468 %} 9469 ins_pipe( pipe_slow ); 9470 %} 9471 9472 instruct vsrl4S_reg_imm_evex(vecD dst, vecD src, immI8 shift) %{ 9473 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 9474 match(Set dst (URShiftVS src shift)); 9475 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 9476 ins_encode %{ 9477 int vector_len = 0; 9478 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9479 %} 9480 ins_pipe( pipe_slow ); 9481 %} 9482 9483 instruct vsrl4S_reg_imm_evex_special(vecD dst, vecD src, immI8 shift) %{ 9484 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 9485 match(Set dst (URShiftVS dst shift)); 9486 effect(TEMP src); 9487 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 9488 ins_encode %{ 9489 int vector_len = 0; 9490 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9491 %} 9492 ins_pipe( pipe_slow ); 9493 %} 9494 9495 instruct vsrl8S(vecX dst, vecS shift) %{ 9496 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 9497 match(Set dst (URShiftVS dst shift)); 9498 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} 9499 ins_encode %{ 9500 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 9501 %} 9502 ins_pipe( pipe_slow ); 9503 %} 9504 9505 instruct vsrl8S_imm(vecX dst, immI8 shift) %{ 9506 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 9507 match(Set dst (URShiftVS dst shift)); 9508 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} 9509 ins_encode %{ 9510 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 9511 %} 9512 ins_pipe( pipe_slow ); 9513 %} 9514 9515 instruct vsrl8S_reg_avx(vecX dst, vecX src, vecS shift) %{ 9516 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 9517 match(Set dst (URShiftVS src shift)); 9518 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 9519 ins_encode %{ 9520 int vector_len = 0; 9521 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9522 %} 9523 ins_pipe( pipe_slow ); 9524 %} 9525 9526 instruct vsrl8S_reg_evex(vecX dst, vecX src, vecS shift) %{ 9527 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 9528 match(Set dst (URShiftVS src shift)); 9529 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 9530 ins_encode %{ 9531 int vector_len = 0; 9532 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9533 %} 9534 ins_pipe( pipe_slow ); 9535 %} 9536 9537 instruct vsrl8S_reg_evex_special(vecX dst, vecX src, vecS shift) %{ 9538 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 9539 match(Set dst (URShiftVS dst shift)); 9540 effect(TEMP src); 9541 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 9542 ins_encode %{ 9543 int vector_len = 0; 9544 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9545 %} 9546 ins_pipe( pipe_slow ); 9547 %} 9548 9549 instruct vsrl8S_reg_imm_avx(vecX dst, vecX src, immI8 shift) %{ 9550 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 9551 match(Set dst (URShiftVS src shift)); 9552 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 9553 ins_encode %{ 9554 int vector_len = 0; 9555 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9556 %} 9557 ins_pipe( pipe_slow ); 9558 %} 9559 9560 instruct vsrl8S_reg_imm_evex(vecX dst, vecX src, immI8 shift) %{ 9561 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 9562 match(Set dst (URShiftVS src shift)); 9563 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 9564 ins_encode %{ 9565 int vector_len = 0; 9566 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9567 %} 9568 ins_pipe( pipe_slow ); 9569 %} 9570 9571 instruct vsrl8S_reg_imm_evex_special(vecX dst, vecX src, immI8 shift) %{ 9572 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 9573 match(Set dst (URShiftVS dst shift)); 9574 effect(TEMP src); 9575 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 9576 ins_encode %{ 9577 int vector_len = 0; 9578 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9579 %} 9580 ins_pipe( pipe_slow ); 9581 %} 9582 9583 instruct vsrl16S_reg_avx(vecY dst, vecY src, vecS shift) %{ 9584 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 9585 match(Set dst (URShiftVS src shift)); 9586 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 9587 ins_encode %{ 9588 int vector_len = 1; 9589 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9590 %} 9591 ins_pipe( pipe_slow ); 9592 %} 9593 9594 instruct vsrl16S_reg_evex(vecY dst, vecY src, vecS shift) %{ 9595 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 9596 match(Set dst (URShiftVS src shift)); 9597 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 9598 ins_encode %{ 9599 int vector_len = 1; 9600 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9601 %} 9602 ins_pipe( pipe_slow ); 9603 %} 9604 9605 instruct vsrl16S_reg_evex_special(vecY dst, vecY src, vecS shift) %{ 9606 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 9607 match(Set dst (URShiftVS dst shift)); 9608 effect(TEMP src); 9609 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 9610 ins_encode %{ 9611 int vector_len = 1; 9612 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9613 %} 9614 ins_pipe( pipe_slow ); 9615 %} 9616 9617 instruct vsrl16S_reg_imm_avx(vecY dst, vecY src, immI8 shift) %{ 9618 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 9619 match(Set dst (URShiftVS src shift)); 9620 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 9621 ins_encode %{ 9622 int vector_len = 1; 9623 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9624 %} 9625 ins_pipe( pipe_slow ); 9626 %} 9627 9628 instruct vsrl16S_reg_imm_evex(vecY dst, vecY src, immI8 shift) %{ 9629 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 9630 match(Set dst (URShiftVS src shift)); 9631 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 9632 ins_encode %{ 9633 int vector_len = 1; 9634 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9635 %} 9636 ins_pipe( pipe_slow ); 9637 %} 9638 9639 instruct vsrl16S_reg_imm_evex_special(vecY dst, vecY src, immI8 shift) %{ 9640 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 9641 match(Set dst (URShiftVS dst shift)); 9642 effect(TEMP src); 9643 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 9644 ins_encode %{ 9645 int vector_len = 1; 9646 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9647 %} 9648 ins_pipe( pipe_slow ); 9649 %} 9650 9651 instruct vsrl32S_reg(vecZ dst, vecZ src, vecS shift) %{ 9652 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 9653 match(Set dst (URShiftVS src shift)); 9654 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %} 9655 ins_encode %{ 9656 int vector_len = 2; 9657 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9658 %} 9659 ins_pipe( pipe_slow ); 9660 %} 9661 9662 instruct vsrl32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 9663 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 9664 match(Set dst (URShiftVS src shift)); 9665 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %} 9666 ins_encode %{ 9667 int vector_len = 2; 9668 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9669 %} 9670 ins_pipe( pipe_slow ); 9671 %} 9672 9673 // Integers vector logical right shift 9674 instruct vsrl2I(vecD dst, vecS shift) %{ 9675 predicate(n->as_Vector()->length() == 2); 9676 match(Set dst (URShiftVI dst shift)); 9677 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} 9678 ins_encode %{ 9679 __ psrld($dst$$XMMRegister, $shift$$XMMRegister); 9680 %} 9681 ins_pipe( pipe_slow ); 9682 %} 9683 9684 instruct vsrl2I_imm(vecD dst, immI8 shift) %{ 9685 predicate(n->as_Vector()->length() == 2); 9686 match(Set dst (URShiftVI dst shift)); 9687 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} 9688 ins_encode %{ 9689 __ psrld($dst$$XMMRegister, (int)$shift$$constant); 9690 %} 9691 ins_pipe( pipe_slow ); 9692 %} 9693 9694 instruct vsrl2I_reg(vecD dst, vecD src, vecS shift) %{ 9695 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9696 match(Set dst (URShiftVI src shift)); 9697 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} 9698 ins_encode %{ 9699 int vector_len = 0; 9700 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9701 %} 9702 ins_pipe( pipe_slow ); 9703 %} 9704 9705 instruct vsrl2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 9706 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9707 match(Set dst (URShiftVI src shift)); 9708 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} 9709 ins_encode %{ 9710 int vector_len = 0; 9711 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9712 %} 9713 ins_pipe( pipe_slow ); 9714 %} 9715 9716 instruct vsrl4I(vecX dst, vecS shift) %{ 9717 predicate(n->as_Vector()->length() == 4); 9718 match(Set dst (URShiftVI dst shift)); 9719 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} 9720 ins_encode %{ 9721 __ psrld($dst$$XMMRegister, $shift$$XMMRegister); 9722 %} 9723 ins_pipe( pipe_slow ); 9724 %} 9725 9726 instruct vsrl4I_imm(vecX dst, immI8 shift) %{ 9727 predicate(n->as_Vector()->length() == 4); 9728 match(Set dst (URShiftVI dst shift)); 9729 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} 9730 ins_encode %{ 9731 __ psrld($dst$$XMMRegister, (int)$shift$$constant); 9732 %} 9733 ins_pipe( pipe_slow ); 9734 %} 9735 9736 instruct vsrl4I_reg(vecX dst, vecX src, vecS shift) %{ 9737 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9738 match(Set dst (URShiftVI src shift)); 9739 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} 9740 ins_encode %{ 9741 int vector_len = 0; 9742 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9743 %} 9744 ins_pipe( pipe_slow ); 9745 %} 9746 9747 instruct vsrl4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 9748 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9749 match(Set dst (URShiftVI src shift)); 9750 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} 9751 ins_encode %{ 9752 int vector_len = 0; 9753 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9754 %} 9755 ins_pipe( pipe_slow ); 9756 %} 9757 9758 instruct vsrl8I_reg(vecY dst, vecY src, vecS shift) %{ 9759 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 9760 match(Set dst (URShiftVI src shift)); 9761 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} 9762 ins_encode %{ 9763 int vector_len = 1; 9764 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9765 %} 9766 ins_pipe( pipe_slow ); 9767 %} 9768 9769 instruct vsrl8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 9770 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 9771 match(Set dst (URShiftVI src shift)); 9772 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} 9773 ins_encode %{ 9774 int vector_len = 1; 9775 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9776 %} 9777 ins_pipe( pipe_slow ); 9778 %} 9779 9780 instruct vsrl16I_reg(vecZ dst, vecZ src, vecS shift) %{ 9781 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 9782 match(Set dst (URShiftVI src shift)); 9783 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed16I" %} 9784 ins_encode %{ 9785 int vector_len = 2; 9786 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9787 %} 9788 ins_pipe( pipe_slow ); 9789 %} 9790 9791 instruct vsrl16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 9792 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 9793 match(Set dst (URShiftVI src shift)); 9794 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed16I" %} 9795 ins_encode %{ 9796 int vector_len = 2; 9797 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9798 %} 9799 ins_pipe( pipe_slow ); 9800 %} 9801 9802 // Longs vector logical right shift 9803 instruct vsrl2L(vecX dst, vecS shift) %{ 9804 predicate(n->as_Vector()->length() == 2); 9805 match(Set dst (URShiftVL dst shift)); 9806 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} 9807 ins_encode %{ 9808 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister); 9809 %} 9810 ins_pipe( pipe_slow ); 9811 %} 9812 9813 instruct vsrl2L_imm(vecX dst, immI8 shift) %{ 9814 predicate(n->as_Vector()->length() == 2); 9815 match(Set dst (URShiftVL dst shift)); 9816 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} 9817 ins_encode %{ 9818 __ psrlq($dst$$XMMRegister, (int)$shift$$constant); 9819 %} 9820 ins_pipe( pipe_slow ); 9821 %} 9822 9823 instruct vsrl2L_reg(vecX dst, vecX src, vecS shift) %{ 9824 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9825 match(Set dst (URShiftVL src shift)); 9826 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} 9827 ins_encode %{ 9828 int vector_len = 0; 9829 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9830 %} 9831 ins_pipe( pipe_slow ); 9832 %} 9833 9834 instruct vsrl2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ 9835 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9836 match(Set dst (URShiftVL src shift)); 9837 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} 9838 ins_encode %{ 9839 int vector_len = 0; 9840 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9841 %} 9842 ins_pipe( pipe_slow ); 9843 %} 9844 9845 instruct vsrl4L_reg(vecY dst, vecY src, vecS shift) %{ 9846 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 9847 match(Set dst (URShiftVL src shift)); 9848 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} 9849 ins_encode %{ 9850 int vector_len = 1; 9851 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9852 %} 9853 ins_pipe( pipe_slow ); 9854 %} 9855 9856 instruct vsrl4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ 9857 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 9858 match(Set dst (URShiftVL src shift)); 9859 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} 9860 ins_encode %{ 9861 int vector_len = 1; 9862 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9863 %} 9864 ins_pipe( pipe_slow ); 9865 %} 9866 9867 instruct vsrl8L_reg(vecZ dst, vecZ src, vecS shift) %{ 9868 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 9869 match(Set dst (URShiftVL src shift)); 9870 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed8L" %} 9871 ins_encode %{ 9872 int vector_len = 2; 9873 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9874 %} 9875 ins_pipe( pipe_slow ); 9876 %} 9877 9878 instruct vsrl8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 9879 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 9880 match(Set dst (URShiftVL src shift)); 9881 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed8L" %} 9882 ins_encode %{ 9883 int vector_len = 2; 9884 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9885 %} 9886 ins_pipe( pipe_slow ); 9887 %} 9888 9889 // ------------------- ArithmeticRightShift ----------------------------------- 9890 9891 // Shorts/Chars vector arithmetic right shift 9892 instruct vsra2S(vecS dst, vecS shift) %{ 9893 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 9894 match(Set dst (RShiftVS dst shift)); 9895 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} 9896 ins_encode %{ 9897 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 9898 %} 9899 ins_pipe( pipe_slow ); 9900 %} 9901 9902 instruct vsra2S_imm(vecS dst, immI8 shift) %{ 9903 predicate(n->as_Vector()->length() == 2); 9904 match(Set dst (RShiftVS dst shift)); 9905 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} 9906 ins_encode %{ 9907 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 9908 %} 9909 ins_pipe( pipe_slow ); 9910 %} 9911 9912 instruct vsra2S_reg_avx(vecS dst, vecS src, vecS shift) %{ 9913 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 9914 match(Set dst (RShiftVS src shift)); 9915 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 9916 ins_encode %{ 9917 int vector_len = 0; 9918 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9919 %} 9920 ins_pipe( pipe_slow ); 9921 %} 9922 9923 instruct vsra2S_reg_evex(vecS dst, vecS src, vecS shift) %{ 9924 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 9925 match(Set dst (RShiftVS src shift)); 9926 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 9927 ins_encode %{ 9928 int vector_len = 0; 9929 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9930 %} 9931 ins_pipe( pipe_slow ); 9932 %} 9933 9934 instruct vsra2S_reg_evex_special(vecS dst, vecS src, vecS shift) %{ 9935 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 9936 match(Set dst (RShiftVS dst shift)); 9937 effect(TEMP src); 9938 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 9939 ins_encode %{ 9940 int vector_len = 0; 9941 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9942 %} 9943 ins_pipe( pipe_slow ); 9944 %} 9945 9946 instruct vsra2S_reg_imm_avx(vecS dst, vecS src, immI8 shift) %{ 9947 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 9948 match(Set dst (RShiftVS src shift)); 9949 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 9950 ins_encode %{ 9951 int vector_len = 0; 9952 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9953 %} 9954 ins_pipe( pipe_slow ); 9955 %} 9956 9957 instruct vsra2S_reg_imm_evex(vecS dst, vecS src, immI8 shift) %{ 9958 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 9959 match(Set dst (RShiftVS src shift)); 9960 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 9961 ins_encode %{ 9962 int vector_len = 0; 9963 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9964 %} 9965 ins_pipe( pipe_slow ); 9966 %} 9967 9968 instruct vsra2S_reg_imm_evex_special(vecS dst, vecS src, immI8 shift) %{ 9969 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 9970 match(Set dst (RShiftVS dst shift)); 9971 effect(TEMP src); 9972 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 9973 ins_encode %{ 9974 int vector_len = 0; 9975 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9976 %} 9977 ins_pipe( pipe_slow ); 9978 %} 9979 9980 instruct vsra4S(vecD dst, vecS shift) %{ 9981 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 9982 match(Set dst (RShiftVS dst shift)); 9983 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} 9984 ins_encode %{ 9985 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 9986 %} 9987 ins_pipe( pipe_slow ); 9988 %} 9989 9990 instruct vsra4S_imm(vecD dst, immI8 shift) %{ 9991 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 9992 match(Set dst (RShiftVS dst shift)); 9993 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} 9994 ins_encode %{ 9995 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 9996 %} 9997 ins_pipe( pipe_slow ); 9998 %} 9999 10000 instruct vsra4S_reg_avx(vecD dst, vecD src, vecS shift) %{ 10001 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 10002 match(Set dst (RShiftVS src shift)); 10003 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 10004 ins_encode %{ 10005 int vector_len = 0; 10006 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10007 %} 10008 ins_pipe( pipe_slow ); 10009 %} 10010 10011 instruct vsra4S_reg_evex(vecD dst, vecD src, vecS shift) %{ 10012 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 10013 match(Set dst (RShiftVS src shift)); 10014 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 10015 ins_encode %{ 10016 int vector_len = 0; 10017 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10018 %} 10019 ins_pipe( pipe_slow ); 10020 %} 10021 10022 instruct vsra4S_reg_evex_special(vecD dst, vecD src, vecS shift) %{ 10023 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 10024 match(Set dst (RShiftVS dst shift)); 10025 effect(TEMP src); 10026 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 10027 ins_encode %{ 10028 int vector_len = 0; 10029 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10030 %} 10031 ins_pipe( pipe_slow ); 10032 %} 10033 10034 instruct vsra4S_reg_imm_avx(vecD dst, vecD src, immI8 shift) %{ 10035 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 10036 match(Set dst (RShiftVS src shift)); 10037 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 10038 ins_encode %{ 10039 int vector_len = 0; 10040 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10041 %} 10042 ins_pipe( pipe_slow ); 10043 %} 10044 10045 instruct vsra4S_reg_imm_evex(vecD dst, vecD src, immI8 shift) %{ 10046 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 10047 match(Set dst (RShiftVS src shift)); 10048 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 10049 ins_encode %{ 10050 int vector_len = 0; 10051 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10052 %} 10053 ins_pipe( pipe_slow ); 10054 %} 10055 10056 instruct vsra4S_reg_imm_evex_special(vecD dst, vecD src, immI8 shift) %{ 10057 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 10058 match(Set dst (RShiftVS dst shift)); 10059 effect(TEMP src); 10060 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 10061 ins_encode %{ 10062 int vector_len = 0; 10063 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10064 %} 10065 ins_pipe( pipe_slow ); 10066 %} 10067 10068 instruct vsra8S(vecX dst, vecS shift) %{ 10069 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 10070 match(Set dst (RShiftVS dst shift)); 10071 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} 10072 ins_encode %{ 10073 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 10074 %} 10075 ins_pipe( pipe_slow ); 10076 %} 10077 10078 instruct vsra8S_imm(vecX dst, immI8 shift) %{ 10079 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 10080 match(Set dst (RShiftVS dst shift)); 10081 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} 10082 ins_encode %{ 10083 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 10084 %} 10085 ins_pipe( pipe_slow ); 10086 %} 10087 10088 instruct vsra8S_reg_avx(vecX dst, vecX src, vecS shift) %{ 10089 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 10090 match(Set dst (RShiftVS src shift)); 10091 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 10092 ins_encode %{ 10093 int vector_len = 0; 10094 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10095 %} 10096 ins_pipe( pipe_slow ); 10097 %} 10098 10099 instruct vsra8S_reg_evex(vecX dst, vecX src, vecS shift) %{ 10100 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 10101 match(Set dst (RShiftVS src shift)); 10102 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 10103 ins_encode %{ 10104 int vector_len = 0; 10105 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10106 %} 10107 ins_pipe( pipe_slow ); 10108 %} 10109 10110 instruct vsra8S_reg_evex_special(vecX dst, vecX src, vecS shift) %{ 10111 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 10112 match(Set dst (RShiftVS dst shift)); 10113 effect(TEMP src); 10114 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 10115 ins_encode %{ 10116 int vector_len = 0; 10117 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10118 %} 10119 ins_pipe( pipe_slow ); 10120 %} 10121 10122 instruct vsra8S_reg_imm_avx(vecX dst, vecX src, immI8 shift) %{ 10123 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 10124 match(Set dst (RShiftVS src shift)); 10125 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 10126 ins_encode %{ 10127 int vector_len = 0; 10128 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10129 %} 10130 ins_pipe( pipe_slow ); 10131 %} 10132 10133 instruct vsra8S_reg_imm_evex(vecX dst, vecX src, immI8 shift) %{ 10134 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 10135 match(Set dst (RShiftVS src shift)); 10136 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 10137 ins_encode %{ 10138 int vector_len = 0; 10139 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10140 %} 10141 ins_pipe( pipe_slow ); 10142 %} 10143 10144 instruct vsra8S_reg_imm_evex_special(vecX dst, vecX src, immI8 shift) %{ 10145 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 10146 match(Set dst (RShiftVS dst shift)); 10147 effect(TEMP src); 10148 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 10149 ins_encode %{ 10150 int vector_len = 0; 10151 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10152 %} 10153 ins_pipe( pipe_slow ); 10154 %} 10155 10156 instruct vsra16S_reg_avx(vecY dst, vecY src, vecS shift) %{ 10157 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 10158 match(Set dst (RShiftVS src shift)); 10159 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 10160 ins_encode %{ 10161 int vector_len = 1; 10162 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10163 %} 10164 ins_pipe( pipe_slow ); 10165 %} 10166 10167 instruct vsra16S_reg_evex(vecY dst, vecY src, vecS shift) %{ 10168 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 10169 match(Set dst (RShiftVS src shift)); 10170 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 10171 ins_encode %{ 10172 int vector_len = 1; 10173 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10174 %} 10175 ins_pipe( pipe_slow ); 10176 %} 10177 10178 instruct vsra16S_reg_evex_special(vecY dst, vecY src, vecS shift) %{ 10179 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 10180 match(Set dst (RShiftVS dst shift)); 10181 effect(TEMP src); 10182 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 10183 ins_encode %{ 10184 int vector_len = 1; 10185 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10186 %} 10187 ins_pipe( pipe_slow ); 10188 %} 10189 10190 instruct vsra16S_reg_imm_avx(vecY dst, vecY src, immI8 shift) %{ 10191 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); 10192 match(Set dst (RShiftVS src shift)); 10193 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 10194 ins_encode %{ 10195 int vector_len = 1; 10196 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10197 %} 10198 ins_pipe( pipe_slow ); 10199 %} 10200 10201 instruct vsra16S_reg_imm_evex(vecY dst, vecY src, immI8 shift) %{ 10202 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 10203 match(Set dst (RShiftVS src shift)); 10204 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 10205 ins_encode %{ 10206 int vector_len = 1; 10207 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10208 %} 10209 ins_pipe( pipe_slow ); 10210 %} 10211 10212 instruct vsra16S_reg_imm_evex_special(vecY dst, vecY src, immI8 shift) %{ 10213 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 10214 match(Set dst (RShiftVS dst shift)); 10215 effect(TEMP src); 10216 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 10217 ins_encode %{ 10218 int vector_len = 1; 10219 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10220 %} 10221 ins_pipe( pipe_slow ); 10222 %} 10223 10224 instruct vsra32S_reg(vecZ dst, vecZ src, vecS shift) %{ 10225 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 10226 match(Set dst (RShiftVS src shift)); 10227 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %} 10228 ins_encode %{ 10229 int vector_len = 2; 10230 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10231 %} 10232 ins_pipe( pipe_slow ); 10233 %} 10234 10235 instruct vsra32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 10236 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 10237 match(Set dst (RShiftVS src shift)); 10238 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %} 10239 ins_encode %{ 10240 int vector_len = 2; 10241 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10242 %} 10243 ins_pipe( pipe_slow ); 10244 %} 10245 10246 // Integers vector arithmetic right shift 10247 instruct vsra2I(vecD dst, vecS shift) %{ 10248 predicate(n->as_Vector()->length() == 2); 10249 match(Set dst (RShiftVI dst shift)); 10250 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} 10251 ins_encode %{ 10252 __ psrad($dst$$XMMRegister, $shift$$XMMRegister); 10253 %} 10254 ins_pipe( pipe_slow ); 10255 %} 10256 10257 instruct vsra2I_imm(vecD dst, immI8 shift) %{ 10258 predicate(n->as_Vector()->length() == 2); 10259 match(Set dst (RShiftVI dst shift)); 10260 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} 10261 ins_encode %{ 10262 __ psrad($dst$$XMMRegister, (int)$shift$$constant); 10263 %} 10264 ins_pipe( pipe_slow ); 10265 %} 10266 10267 instruct vsra2I_reg(vecD dst, vecD src, vecS shift) %{ 10268 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 10269 match(Set dst (RShiftVI src shift)); 10270 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} 10271 ins_encode %{ 10272 int vector_len = 0; 10273 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10274 %} 10275 ins_pipe( pipe_slow ); 10276 %} 10277 10278 instruct vsra2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 10279 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 10280 match(Set dst (RShiftVI src shift)); 10281 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} 10282 ins_encode %{ 10283 int vector_len = 0; 10284 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10285 %} 10286 ins_pipe( pipe_slow ); 10287 %} 10288 10289 instruct vsra4I(vecX dst, vecS shift) %{ 10290 predicate(n->as_Vector()->length() == 4); 10291 match(Set dst (RShiftVI dst shift)); 10292 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} 10293 ins_encode %{ 10294 __ psrad($dst$$XMMRegister, $shift$$XMMRegister); 10295 %} 10296 ins_pipe( pipe_slow ); 10297 %} 10298 10299 instruct vsra4I_imm(vecX dst, immI8 shift) %{ 10300 predicate(n->as_Vector()->length() == 4); 10301 match(Set dst (RShiftVI dst shift)); 10302 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} 10303 ins_encode %{ 10304 __ psrad($dst$$XMMRegister, (int)$shift$$constant); 10305 %} 10306 ins_pipe( pipe_slow ); 10307 %} 10308 10309 instruct vsra4I_reg(vecX dst, vecX src, vecS shift) %{ 10310 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 10311 match(Set dst (RShiftVI src shift)); 10312 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} 10313 ins_encode %{ 10314 int vector_len = 0; 10315 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10316 %} 10317 ins_pipe( pipe_slow ); 10318 %} 10319 10320 instruct vsra4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 10321 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 10322 match(Set dst (RShiftVI src shift)); 10323 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} 10324 ins_encode %{ 10325 int vector_len = 0; 10326 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10327 %} 10328 ins_pipe( pipe_slow ); 10329 %} 10330 10331 instruct vsra8I_reg(vecY dst, vecY src, vecS shift) %{ 10332 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 10333 match(Set dst (RShiftVI src shift)); 10334 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} 10335 ins_encode %{ 10336 int vector_len = 1; 10337 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10338 %} 10339 ins_pipe( pipe_slow ); 10340 %} 10341 10342 instruct vsra8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 10343 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 10344 match(Set dst (RShiftVI src shift)); 10345 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} 10346 ins_encode %{ 10347 int vector_len = 1; 10348 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10349 %} 10350 ins_pipe( pipe_slow ); 10351 %} 10352 10353 instruct vsra16I_reg(vecZ dst, vecZ src, vecS shift) %{ 10354 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 10355 match(Set dst (RShiftVI src shift)); 10356 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed16I" %} 10357 ins_encode %{ 10358 int vector_len = 2; 10359 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10360 %} 10361 ins_pipe( pipe_slow ); 10362 %} 10363 10364 instruct vsra16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 10365 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 10366 match(Set dst (RShiftVI src shift)); 10367 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed16I" %} 10368 ins_encode %{ 10369 int vector_len = 2; 10370 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10371 %} 10372 ins_pipe( pipe_slow ); 10373 %} 10374 10375 // There are no longs vector arithmetic right shift instructions. 10376 10377 10378 // --------------------------------- AND -------------------------------------- 10379 10380 instruct vand4B(vecS dst, vecS src) %{ 10381 predicate(n->as_Vector()->length_in_bytes() == 4); 10382 match(Set dst (AndV dst src)); 10383 format %{ "pand $dst,$src\t! and vectors (4 bytes)" %} 10384 ins_encode %{ 10385 __ pand($dst$$XMMRegister, $src$$XMMRegister); 10386 %} 10387 ins_pipe( pipe_slow ); 10388 %} 10389 10390 instruct vand4B_reg(vecS dst, vecS src1, vecS src2) %{ 10391 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 10392 match(Set dst (AndV src1 src2)); 10393 format %{ "vpand $dst,$src1,$src2\t! and vectors (4 bytes)" %} 10394 ins_encode %{ 10395 int vector_len = 0; 10396 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10397 %} 10398 ins_pipe( pipe_slow ); 10399 %} 10400 10401 instruct vand4B_mem(vecS dst, vecS src, memory mem) %{ 10402 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 10403 match(Set dst (AndV src (LoadVector mem))); 10404 format %{ "vpand $dst,$src,$mem\t! and vectors (4 bytes)" %} 10405 ins_encode %{ 10406 int vector_len = 0; 10407 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10408 %} 10409 ins_pipe( pipe_slow ); 10410 %} 10411 10412 instruct vand8B(vecD dst, vecD src) %{ 10413 predicate(n->as_Vector()->length_in_bytes() == 8); 10414 match(Set dst (AndV dst src)); 10415 format %{ "pand $dst,$src\t! and vectors (8 bytes)" %} 10416 ins_encode %{ 10417 __ pand($dst$$XMMRegister, $src$$XMMRegister); 10418 %} 10419 ins_pipe( pipe_slow ); 10420 %} 10421 10422 instruct vand8B_reg(vecD dst, vecD src1, vecD src2) %{ 10423 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 10424 match(Set dst (AndV src1 src2)); 10425 format %{ "vpand $dst,$src1,$src2\t! and vectors (8 bytes)" %} 10426 ins_encode %{ 10427 int vector_len = 0; 10428 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10429 %} 10430 ins_pipe( pipe_slow ); 10431 %} 10432 10433 instruct vand8B_mem(vecD dst, vecD src, memory mem) %{ 10434 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 10435 match(Set dst (AndV src (LoadVector mem))); 10436 format %{ "vpand $dst,$src,$mem\t! and vectors (8 bytes)" %} 10437 ins_encode %{ 10438 int vector_len = 0; 10439 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10440 %} 10441 ins_pipe( pipe_slow ); 10442 %} 10443 10444 instruct vand16B(vecX dst, vecX src) %{ 10445 predicate(n->as_Vector()->length_in_bytes() == 16); 10446 match(Set dst (AndV dst src)); 10447 format %{ "pand $dst,$src\t! and vectors (16 bytes)" %} 10448 ins_encode %{ 10449 __ pand($dst$$XMMRegister, $src$$XMMRegister); 10450 %} 10451 ins_pipe( pipe_slow ); 10452 %} 10453 10454 instruct vand16B_reg(vecX dst, vecX src1, vecX src2) %{ 10455 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 10456 match(Set dst (AndV src1 src2)); 10457 format %{ "vpand $dst,$src1,$src2\t! and vectors (16 bytes)" %} 10458 ins_encode %{ 10459 int vector_len = 0; 10460 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10461 %} 10462 ins_pipe( pipe_slow ); 10463 %} 10464 10465 instruct vand16B_mem(vecX dst, vecX src, memory mem) %{ 10466 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 10467 match(Set dst (AndV src (LoadVector mem))); 10468 format %{ "vpand $dst,$src,$mem\t! and vectors (16 bytes)" %} 10469 ins_encode %{ 10470 int vector_len = 0; 10471 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10472 %} 10473 ins_pipe( pipe_slow ); 10474 %} 10475 10476 instruct vand32B_reg(vecY dst, vecY src1, vecY src2) %{ 10477 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 10478 match(Set dst (AndV src1 src2)); 10479 format %{ "vpand $dst,$src1,$src2\t! and vectors (32 bytes)" %} 10480 ins_encode %{ 10481 int vector_len = 1; 10482 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10483 %} 10484 ins_pipe( pipe_slow ); 10485 %} 10486 10487 instruct vand32B_mem(vecY dst, vecY src, memory mem) %{ 10488 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 10489 match(Set dst (AndV src (LoadVector mem))); 10490 format %{ "vpand $dst,$src,$mem\t! and vectors (32 bytes)" %} 10491 ins_encode %{ 10492 int vector_len = 1; 10493 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10494 %} 10495 ins_pipe( pipe_slow ); 10496 %} 10497 10498 instruct vand64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 10499 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 10500 match(Set dst (AndV src1 src2)); 10501 format %{ "vpand $dst,$src1,$src2\t! and vectors (64 bytes)" %} 10502 ins_encode %{ 10503 int vector_len = 2; 10504 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10505 %} 10506 ins_pipe( pipe_slow ); 10507 %} 10508 10509 instruct vand64B_mem(vecZ dst, vecZ src, memory mem) %{ 10510 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 10511 match(Set dst (AndV src (LoadVector mem))); 10512 format %{ "vpand $dst,$src,$mem\t! and vectors (64 bytes)" %} 10513 ins_encode %{ 10514 int vector_len = 2; 10515 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10516 %} 10517 ins_pipe( pipe_slow ); 10518 %} 10519 10520 // --------------------------------- OR --------------------------------------- 10521 10522 instruct vor4B(vecS dst, vecS src) %{ 10523 predicate(n->as_Vector()->length_in_bytes() == 4); 10524 match(Set dst (OrV dst src)); 10525 format %{ "por $dst,$src\t! or vectors (4 bytes)" %} 10526 ins_encode %{ 10527 __ por($dst$$XMMRegister, $src$$XMMRegister); 10528 %} 10529 ins_pipe( pipe_slow ); 10530 %} 10531 10532 instruct vor4B_reg(vecS dst, vecS src1, vecS src2) %{ 10533 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 10534 match(Set dst (OrV src1 src2)); 10535 format %{ "vpor $dst,$src1,$src2\t! or vectors (4 bytes)" %} 10536 ins_encode %{ 10537 int vector_len = 0; 10538 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10539 %} 10540 ins_pipe( pipe_slow ); 10541 %} 10542 10543 instruct vor4B_mem(vecS dst, vecS src, memory mem) %{ 10544 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 10545 match(Set dst (OrV src (LoadVector mem))); 10546 format %{ "vpor $dst,$src,$mem\t! or vectors (4 bytes)" %} 10547 ins_encode %{ 10548 int vector_len = 0; 10549 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10550 %} 10551 ins_pipe( pipe_slow ); 10552 %} 10553 10554 instruct vor8B(vecD dst, vecD src) %{ 10555 predicate(n->as_Vector()->length_in_bytes() == 8); 10556 match(Set dst (OrV dst src)); 10557 format %{ "por $dst,$src\t! or vectors (8 bytes)" %} 10558 ins_encode %{ 10559 __ por($dst$$XMMRegister, $src$$XMMRegister); 10560 %} 10561 ins_pipe( pipe_slow ); 10562 %} 10563 10564 instruct vor8B_reg(vecD dst, vecD src1, vecD src2) %{ 10565 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 10566 match(Set dst (OrV src1 src2)); 10567 format %{ "vpor $dst,$src1,$src2\t! or vectors (8 bytes)" %} 10568 ins_encode %{ 10569 int vector_len = 0; 10570 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10571 %} 10572 ins_pipe( pipe_slow ); 10573 %} 10574 10575 instruct vor8B_mem(vecD dst, vecD src, memory mem) %{ 10576 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 10577 match(Set dst (OrV src (LoadVector mem))); 10578 format %{ "vpor $dst,$src,$mem\t! or vectors (8 bytes)" %} 10579 ins_encode %{ 10580 int vector_len = 0; 10581 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10582 %} 10583 ins_pipe( pipe_slow ); 10584 %} 10585 10586 instruct vor16B(vecX dst, vecX src) %{ 10587 predicate(n->as_Vector()->length_in_bytes() == 16); 10588 match(Set dst (OrV dst src)); 10589 format %{ "por $dst,$src\t! or vectors (16 bytes)" %} 10590 ins_encode %{ 10591 __ por($dst$$XMMRegister, $src$$XMMRegister); 10592 %} 10593 ins_pipe( pipe_slow ); 10594 %} 10595 10596 instruct vor16B_reg(vecX dst, vecX src1, vecX src2) %{ 10597 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 10598 match(Set dst (OrV src1 src2)); 10599 format %{ "vpor $dst,$src1,$src2\t! or vectors (16 bytes)" %} 10600 ins_encode %{ 10601 int vector_len = 0; 10602 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10603 %} 10604 ins_pipe( pipe_slow ); 10605 %} 10606 10607 instruct vor16B_mem(vecX dst, vecX src, memory mem) %{ 10608 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 10609 match(Set dst (OrV src (LoadVector mem))); 10610 format %{ "vpor $dst,$src,$mem\t! or vectors (16 bytes)" %} 10611 ins_encode %{ 10612 int vector_len = 0; 10613 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10614 %} 10615 ins_pipe( pipe_slow ); 10616 %} 10617 10618 instruct vor32B_reg(vecY dst, vecY src1, vecY src2) %{ 10619 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 10620 match(Set dst (OrV src1 src2)); 10621 format %{ "vpor $dst,$src1,$src2\t! or vectors (32 bytes)" %} 10622 ins_encode %{ 10623 int vector_len = 1; 10624 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10625 %} 10626 ins_pipe( pipe_slow ); 10627 %} 10628 10629 instruct vor32B_mem(vecY dst, vecY src, memory mem) %{ 10630 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 10631 match(Set dst (OrV src (LoadVector mem))); 10632 format %{ "vpor $dst,$src,$mem\t! or vectors (32 bytes)" %} 10633 ins_encode %{ 10634 int vector_len = 1; 10635 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10636 %} 10637 ins_pipe( pipe_slow ); 10638 %} 10639 10640 instruct vor64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 10641 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 10642 match(Set dst (OrV src1 src2)); 10643 format %{ "vpor $dst,$src1,$src2\t! or vectors (64 bytes)" %} 10644 ins_encode %{ 10645 int vector_len = 2; 10646 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10647 %} 10648 ins_pipe( pipe_slow ); 10649 %} 10650 10651 instruct vor64B_mem(vecZ dst, vecZ src, memory mem) %{ 10652 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 10653 match(Set dst (OrV src (LoadVector mem))); 10654 format %{ "vpor $dst,$src,$mem\t! or vectors (64 bytes)" %} 10655 ins_encode %{ 10656 int vector_len = 2; 10657 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10658 %} 10659 ins_pipe( pipe_slow ); 10660 %} 10661 10662 // --------------------------------- XOR -------------------------------------- 10663 10664 instruct vxor4B(vecS dst, vecS src) %{ 10665 predicate(n->as_Vector()->length_in_bytes() == 4); 10666 match(Set dst (XorV dst src)); 10667 format %{ "pxor $dst,$src\t! xor vectors (4 bytes)" %} 10668 ins_encode %{ 10669 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 10670 %} 10671 ins_pipe( pipe_slow ); 10672 %} 10673 10674 instruct vxor4B_reg(vecS dst, vecS src1, vecS src2) %{ 10675 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 10676 match(Set dst (XorV src1 src2)); 10677 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (4 bytes)" %} 10678 ins_encode %{ 10679 int vector_len = 0; 10680 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10681 %} 10682 ins_pipe( pipe_slow ); 10683 %} 10684 10685 instruct vxor4B_mem(vecS dst, vecS src, memory mem) %{ 10686 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 10687 match(Set dst (XorV src (LoadVector mem))); 10688 format %{ "vpxor $dst,$src,$mem\t! xor vectors (4 bytes)" %} 10689 ins_encode %{ 10690 int vector_len = 0; 10691 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10692 %} 10693 ins_pipe( pipe_slow ); 10694 %} 10695 10696 instruct vxor8B(vecD dst, vecD src) %{ 10697 predicate(n->as_Vector()->length_in_bytes() == 8); 10698 match(Set dst (XorV dst src)); 10699 format %{ "pxor $dst,$src\t! xor vectors (8 bytes)" %} 10700 ins_encode %{ 10701 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 10702 %} 10703 ins_pipe( pipe_slow ); 10704 %} 10705 10706 instruct vxor8B_reg(vecD dst, vecD src1, vecD src2) %{ 10707 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 10708 match(Set dst (XorV src1 src2)); 10709 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (8 bytes)" %} 10710 ins_encode %{ 10711 int vector_len = 0; 10712 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10713 %} 10714 ins_pipe( pipe_slow ); 10715 %} 10716 10717 instruct vxor8B_mem(vecD dst, vecD src, memory mem) %{ 10718 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 10719 match(Set dst (XorV src (LoadVector mem))); 10720 format %{ "vpxor $dst,$src,$mem\t! xor vectors (8 bytes)" %} 10721 ins_encode %{ 10722 int vector_len = 0; 10723 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10724 %} 10725 ins_pipe( pipe_slow ); 10726 %} 10727 10728 instruct vxor16B(vecX dst, vecX src) %{ 10729 predicate(n->as_Vector()->length_in_bytes() == 16); 10730 match(Set dst (XorV dst src)); 10731 format %{ "pxor $dst,$src\t! xor vectors (16 bytes)" %} 10732 ins_encode %{ 10733 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 10734 %} 10735 ins_pipe( pipe_slow ); 10736 %} 10737 10738 instruct vxor16B_reg(vecX dst, vecX src1, vecX src2) %{ 10739 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 10740 match(Set dst (XorV src1 src2)); 10741 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (16 bytes)" %} 10742 ins_encode %{ 10743 int vector_len = 0; 10744 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10745 %} 10746 ins_pipe( pipe_slow ); 10747 %} 10748 10749 instruct vxor16B_mem(vecX dst, vecX src, memory mem) %{ 10750 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 10751 match(Set dst (XorV src (LoadVector mem))); 10752 format %{ "vpxor $dst,$src,$mem\t! xor vectors (16 bytes)" %} 10753 ins_encode %{ 10754 int vector_len = 0; 10755 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10756 %} 10757 ins_pipe( pipe_slow ); 10758 %} 10759 10760 instruct vxor32B_reg(vecY dst, vecY src1, vecY src2) %{ 10761 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 10762 match(Set dst (XorV src1 src2)); 10763 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (32 bytes)" %} 10764 ins_encode %{ 10765 int vector_len = 1; 10766 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10767 %} 10768 ins_pipe( pipe_slow ); 10769 %} 10770 10771 instruct vxor32B_mem(vecY dst, vecY src, memory mem) %{ 10772 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 10773 match(Set dst (XorV src (LoadVector mem))); 10774 format %{ "vpxor $dst,$src,$mem\t! xor vectors (32 bytes)" %} 10775 ins_encode %{ 10776 int vector_len = 1; 10777 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10778 %} 10779 ins_pipe( pipe_slow ); 10780 %} 10781 10782 instruct vxor64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 10783 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 10784 match(Set dst (XorV src1 src2)); 10785 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (64 bytes)" %} 10786 ins_encode %{ 10787 int vector_len = 2; 10788 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10789 %} 10790 ins_pipe( pipe_slow ); 10791 %} 10792 10793 instruct vxor64B_mem(vecZ dst, vecZ src, memory mem) %{ 10794 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 10795 match(Set dst (XorV src (LoadVector mem))); 10796 format %{ "vpxor $dst,$src,$mem\t! xor vectors (64 bytes)" %} 10797 ins_encode %{ 10798 int vector_len = 2; 10799 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10800 %} 10801 ins_pipe( pipe_slow ); 10802 %} 10803