1 // 2 // Copyright (c) 2011, 2016, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Common Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // archtecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // XMM registers. 512-bit registers or 8 words each, labeled (a)-p. 63 // Word a in each register holds a Float, words ab hold a Double. 64 // The whole registers are used in SSE4.2 version intrinsics, 65 // array copy stubs and superword operations (see UseSSE42Intrinsics, 66 // UseXMMForArrayCopy and UseSuperword flags). 67 // For pre EVEX enabled architectures: 68 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX) 69 // For EVEX enabled architectures: 70 // XMM8-XMM31 must be encoded with REX (EVEX for UseAVX). 71 // 72 // Linux ABI: No register preserved across function calls 73 // XMM0-XMM7 might hold parameters 74 // Windows ABI: XMM6-XMM31 preserved across function calls 75 // XMM0-XMM3 might hold parameters 76 77 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()); 78 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1)); 79 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2)); 80 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3)); 81 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4)); 82 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5)); 83 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6)); 84 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7)); 85 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8)); 86 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9)); 87 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10)); 88 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11)); 89 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12)); 90 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13)); 91 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14)); 92 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15)); 93 94 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()); 95 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1)); 96 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2)); 97 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3)); 98 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4)); 99 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5)); 100 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6)); 101 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7)); 102 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8)); 103 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9)); 104 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10)); 105 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11)); 106 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12)); 107 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13)); 108 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14)); 109 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15)); 110 111 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()); 112 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1)); 113 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2)); 114 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3)); 115 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4)); 116 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5)); 117 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6)); 118 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7)); 119 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8)); 120 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9)); 121 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10)); 122 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11)); 123 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12)); 124 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13)); 125 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14)); 126 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15)); 127 128 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()); 129 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1)); 130 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2)); 131 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3)); 132 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4)); 133 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5)); 134 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6)); 135 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7)); 136 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8)); 137 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9)); 138 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10)); 139 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11)); 140 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12)); 141 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13)); 142 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14)); 143 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15)); 144 145 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()); 146 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1)); 147 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2)); 148 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3)); 149 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4)); 150 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5)); 151 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6)); 152 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7)); 153 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8)); 154 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9)); 155 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10)); 156 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11)); 157 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12)); 158 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13)); 159 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14)); 160 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15)); 161 162 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()); 163 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1)); 164 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2)); 165 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3)); 166 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4)); 167 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5)); 168 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6)); 169 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7)); 170 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8)); 171 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9)); 172 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10)); 173 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11)); 174 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12)); 175 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13)); 176 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14)); 177 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15)); 178 179 #ifdef _WIN64 180 181 reg_def XMM6 ( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()); 182 reg_def XMM6b( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(1)); 183 reg_def XMM6c( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(2)); 184 reg_def XMM6d( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(3)); 185 reg_def XMM6e( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(4)); 186 reg_def XMM6f( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(5)); 187 reg_def XMM6g( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(6)); 188 reg_def XMM6h( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(7)); 189 reg_def XMM6i( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(8)); 190 reg_def XMM6j( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(9)); 191 reg_def XMM6k( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(10)); 192 reg_def XMM6l( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(11)); 193 reg_def XMM6m( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(12)); 194 reg_def XMM6n( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(13)); 195 reg_def XMM6o( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(14)); 196 reg_def XMM6p( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(15)); 197 198 reg_def XMM7 ( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()); 199 reg_def XMM7b( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(1)); 200 reg_def XMM7c( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(2)); 201 reg_def XMM7d( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(3)); 202 reg_def XMM7e( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(4)); 203 reg_def XMM7f( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(5)); 204 reg_def XMM7g( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(6)); 205 reg_def XMM7h( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(7)); 206 reg_def XMM7i( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(8)); 207 reg_def XMM7j( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(9)); 208 reg_def XMM7k( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(10)); 209 reg_def XMM7l( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(11)); 210 reg_def XMM7m( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(12)); 211 reg_def XMM7n( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(13)); 212 reg_def XMM7o( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(14)); 213 reg_def XMM7p( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(15)); 214 215 reg_def XMM8 ( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()); 216 reg_def XMM8b( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(1)); 217 reg_def XMM8c( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(2)); 218 reg_def XMM8d( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(3)); 219 reg_def XMM8e( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(4)); 220 reg_def XMM8f( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(5)); 221 reg_def XMM8g( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(6)); 222 reg_def XMM8h( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(7)); 223 reg_def XMM8i( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(8)); 224 reg_def XMM8j( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(9)); 225 reg_def XMM8k( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(10)); 226 reg_def XMM8l( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(11)); 227 reg_def XMM8m( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(12)); 228 reg_def XMM8n( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(13)); 229 reg_def XMM8o( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(14)); 230 reg_def XMM8p( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(15)); 231 232 reg_def XMM9 ( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()); 233 reg_def XMM9b( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(1)); 234 reg_def XMM9c( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(2)); 235 reg_def XMM9d( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(3)); 236 reg_def XMM9e( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(4)); 237 reg_def XMM9f( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(5)); 238 reg_def XMM9g( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(6)); 239 reg_def XMM9h( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(7)); 240 reg_def XMM9i( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(8)); 241 reg_def XMM9j( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(9)); 242 reg_def XMM9k( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(10)); 243 reg_def XMM9l( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(11)); 244 reg_def XMM9m( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(12)); 245 reg_def XMM9n( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(13)); 246 reg_def XMM9o( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(14)); 247 reg_def XMM9p( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(15)); 248 249 reg_def XMM10 ( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()); 250 reg_def XMM10b( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(1)); 251 reg_def XMM10c( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(2)); 252 reg_def XMM10d( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(3)); 253 reg_def XMM10e( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(4)); 254 reg_def XMM10f( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(5)); 255 reg_def XMM10g( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(6)); 256 reg_def XMM10h( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(7)); 257 reg_def XMM10i( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(8)); 258 reg_def XMM10j( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(9)); 259 reg_def XMM10k( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(10)); 260 reg_def XMM10l( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(11)); 261 reg_def XMM10m( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(12)); 262 reg_def XMM10n( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(13)); 263 reg_def XMM10o( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(14)); 264 reg_def XMM10p( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(15)); 265 266 reg_def XMM11 ( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()); 267 reg_def XMM11b( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(1)); 268 reg_def XMM11c( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(2)); 269 reg_def XMM11d( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(3)); 270 reg_def XMM11e( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(4)); 271 reg_def XMM11f( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(5)); 272 reg_def XMM11g( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(6)); 273 reg_def XMM11h( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(7)); 274 reg_def XMM11i( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(8)); 275 reg_def XMM11j( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(9)); 276 reg_def XMM11k( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(10)); 277 reg_def XMM11l( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(11)); 278 reg_def XMM11m( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(12)); 279 reg_def XMM11n( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(13)); 280 reg_def XMM11o( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(14)); 281 reg_def XMM11p( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(15)); 282 283 reg_def XMM12 ( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()); 284 reg_def XMM12b( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(1)); 285 reg_def XMM12c( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(2)); 286 reg_def XMM12d( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(3)); 287 reg_def XMM12e( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(4)); 288 reg_def XMM12f( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(5)); 289 reg_def XMM12g( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(6)); 290 reg_def XMM12h( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(7)); 291 reg_def XMM12i( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(8)); 292 reg_def XMM12j( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(9)); 293 reg_def XMM12k( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(10)); 294 reg_def XMM12l( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(11)); 295 reg_def XMM12m( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(12)); 296 reg_def XMM12n( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(13)); 297 reg_def XMM12o( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(14)); 298 reg_def XMM12p( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(15)); 299 300 reg_def XMM13 ( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()); 301 reg_def XMM13b( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(1)); 302 reg_def XMM13c( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(2)); 303 reg_def XMM13d( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(3)); 304 reg_def XMM13e( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(4)); 305 reg_def XMM13f( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(5)); 306 reg_def XMM13g( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(6)); 307 reg_def XMM13h( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(7)); 308 reg_def XMM13i( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(8)); 309 reg_def XMM13j( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(9)); 310 reg_def XMM13k( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(10)); 311 reg_def XMM13l( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(11)); 312 reg_def XMM13m( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(12)); 313 reg_def XMM13n( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(13)); 314 reg_def XMM13o( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(14)); 315 reg_def XMM13p( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(15)); 316 317 reg_def XMM14 ( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()); 318 reg_def XMM14b( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(1)); 319 reg_def XMM14c( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(2)); 320 reg_def XMM14d( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(3)); 321 reg_def XMM14e( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(4)); 322 reg_def XMM14f( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(5)); 323 reg_def XMM14g( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(6)); 324 reg_def XMM14h( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(7)); 325 reg_def XMM14i( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(8)); 326 reg_def XMM14j( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(9)); 327 reg_def XMM14k( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(10)); 328 reg_def XMM14l( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(11)); 329 reg_def XMM14m( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(12)); 330 reg_def XMM14n( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(13)); 331 reg_def XMM14o( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(14)); 332 reg_def XMM14p( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(15)); 333 334 reg_def XMM15 ( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()); 335 reg_def XMM15b( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(1)); 336 reg_def XMM15c( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(2)); 337 reg_def XMM15d( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(3)); 338 reg_def XMM15e( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(4)); 339 reg_def XMM15f( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(5)); 340 reg_def XMM15g( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(6)); 341 reg_def XMM15h( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(7)); 342 reg_def XMM15i( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(8)); 343 reg_def XMM15j( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(9)); 344 reg_def XMM15k( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(10)); 345 reg_def XMM15l( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(11)); 346 reg_def XMM15m( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(12)); 347 reg_def XMM15n( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(13)); 348 reg_def XMM15o( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(14)); 349 reg_def XMM15p( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(15)); 350 351 reg_def XMM16 ( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()); 352 reg_def XMM16b( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(1)); 353 reg_def XMM16c( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(2)); 354 reg_def XMM16d( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(3)); 355 reg_def XMM16e( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(4)); 356 reg_def XMM16f( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(5)); 357 reg_def XMM16g( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(6)); 358 reg_def XMM16h( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(7)); 359 reg_def XMM16i( SOC, SOE, Op_RegF, 16, xmm15->as_VMReg()->next(8)); 360 reg_def XMM16j( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(9)); 361 reg_def XMM16k( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(10)); 362 reg_def XMM16l( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(11)); 363 reg_def XMM16m( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(12)); 364 reg_def XMM16n( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(13)); 365 reg_def XMM16o( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(14)); 366 reg_def XMM16p( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(15)); 367 368 reg_def XMM17 ( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()); 369 reg_def XMM17b( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(1)); 370 reg_def XMM17c( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(2)); 371 reg_def XMM17d( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(3)); 372 reg_def XMM17e( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(4)); 373 reg_def XMM17f( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(5)); 374 reg_def XMM17g( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(6)); 375 reg_def XMM17h( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(7)); 376 reg_def XMM17i( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(8)); 377 reg_def XMM17j( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(9)); 378 reg_def XMM17k( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(10)); 379 reg_def XMM17l( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(11)); 380 reg_def XMM17m( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(12)); 381 reg_def XMM17n( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(13)); 382 reg_def XMM17o( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(14)); 383 reg_def XMM17p( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(15)); 384 385 reg_def XMM18 ( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()); 386 reg_def XMM18b( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(1)); 387 reg_def XMM18c( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(2)); 388 reg_def XMM18d( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(3)); 389 reg_def XMM18e( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(4)); 390 reg_def XMM18f( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(5)); 391 reg_def XMM18g( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(6)); 392 reg_def XMM18h( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(7)); 393 reg_def XMM18i( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(8)); 394 reg_def XMM18j( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(9)); 395 reg_def XMM18k( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(10)); 396 reg_def XMM18l( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(11)); 397 reg_def XMM18m( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(12)); 398 reg_def XMM18n( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(13)); 399 reg_def XMM18o( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(14)); 400 reg_def XMM18p( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(15)); 401 402 reg_def XMM19 ( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()); 403 reg_def XMM19b( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(1)); 404 reg_def XMM19c( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(2)); 405 reg_def XMM19d( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(3)); 406 reg_def XMM19e( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(4)); 407 reg_def XMM19f( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(5)); 408 reg_def XMM19g( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(6)); 409 reg_def XMM19h( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(7)); 410 reg_def XMM19i( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(8)); 411 reg_def XMM19j( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(9)); 412 reg_def XMM19k( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(10)); 413 reg_def XMM19l( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(11)); 414 reg_def XMM19m( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(12)); 415 reg_def XMM19n( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(13)); 416 reg_def XMM19o( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(14)); 417 reg_def XMM19p( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(15)); 418 419 reg_def XMM20 ( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()); 420 reg_def XMM20b( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(1)); 421 reg_def XMM20c( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(2)); 422 reg_def XMM20d( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(3)); 423 reg_def XMM20e( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(4)); 424 reg_def XMM20f( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(5)); 425 reg_def XMM20g( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(6)); 426 reg_def XMM20h( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(7)); 427 reg_def XMM20i( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(8)); 428 reg_def XMM20j( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(9)); 429 reg_def XMM20k( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(10)); 430 reg_def XMM20l( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(11)); 431 reg_def XMM20m( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(12)); 432 reg_def XMM20n( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(13)); 433 reg_def XMM20o( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(14)); 434 reg_def XMM20p( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(15)); 435 436 reg_def XMM21 ( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()); 437 reg_def XMM21b( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(1)); 438 reg_def XMM21c( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(2)); 439 reg_def XMM21d( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(3)); 440 reg_def XMM21e( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(4)); 441 reg_def XMM21f( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(5)); 442 reg_def XMM21g( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(6)); 443 reg_def XMM21h( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(7)); 444 reg_def XMM21i( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(8)); 445 reg_def XMM21j( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(9)); 446 reg_def XMM21k( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(10)); 447 reg_def XMM21l( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(11)); 448 reg_def XMM21m( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(12)); 449 reg_def XMM21n( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(13)); 450 reg_def XMM21o( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(14)); 451 reg_def XMM21p( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(15)); 452 453 reg_def XMM22 ( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()); 454 reg_def XMM22b( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(1)); 455 reg_def XMM22c( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(2)); 456 reg_def XMM22d( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(3)); 457 reg_def XMM22e( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(4)); 458 reg_def XMM22f( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(5)); 459 reg_def XMM22g( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(6)); 460 reg_def XMM22h( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(7)); 461 reg_def XMM22i( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(8)); 462 reg_def XMM22j( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(9)); 463 reg_def XMM22k( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(10)); 464 reg_def XMM22l( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(11)); 465 reg_def XMM22m( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(12)); 466 reg_def XMM22n( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(13)); 467 reg_def XMM22o( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(14)); 468 reg_def XMM22p( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(15)); 469 470 reg_def XMM23 ( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()); 471 reg_def XMM23b( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(1)); 472 reg_def XMM23c( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(2)); 473 reg_def XMM23d( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(3)); 474 reg_def XMM23e( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(4)); 475 reg_def XMM23f( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(5)); 476 reg_def XMM23g( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(6)); 477 reg_def XMM23h( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(7)); 478 reg_def XMM23i( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(8)); 479 reg_def XMM23j( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(9)); 480 reg_def XMM23k( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(10)); 481 reg_def XMM23l( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(11)); 482 reg_def XMM23m( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(12)); 483 reg_def XMM23n( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(13)); 484 reg_def XMM23o( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(14)); 485 reg_def XMM23p( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(15)); 486 487 reg_def XMM24 ( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()); 488 reg_def XMM24b( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(1)); 489 reg_def XMM24c( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(2)); 490 reg_def XMM24d( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(3)); 491 reg_def XMM24e( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(4)); 492 reg_def XMM24f( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(5)); 493 reg_def XMM24g( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(6)); 494 reg_def XMM24h( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(7)); 495 reg_def XMM24i( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(8)); 496 reg_def XMM24j( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(9)); 497 reg_def XMM24k( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(10)); 498 reg_def XMM24l( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(11)); 499 reg_def XMM24m( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(12)); 500 reg_def XMM24n( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(13)); 501 reg_def XMM24o( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(14)); 502 reg_def XMM24p( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(15)); 503 504 reg_def XMM25 ( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()); 505 reg_def XMM25b( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(1)); 506 reg_def XMM25c( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(2)); 507 reg_def XMM25d( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(3)); 508 reg_def XMM25e( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(4)); 509 reg_def XMM25f( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(5)); 510 reg_def XMM25g( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(6)); 511 reg_def XMM25h( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(7)); 512 reg_def XMM25i( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(8)); 513 reg_def XMM25j( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(9)); 514 reg_def XMM25k( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(10)); 515 reg_def XMM25l( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(11)); 516 reg_def XMM25m( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(12)); 517 reg_def XMM25n( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(13)); 518 reg_def XMM25o( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(14)); 519 reg_def XMM25p( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(15)); 520 521 reg_def XMM26 ( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()); 522 reg_def XMM26b( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(1)); 523 reg_def XMM26c( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(2)); 524 reg_def XMM26d( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(3)); 525 reg_def XMM26e( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(4)); 526 reg_def XMM26f( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(5)); 527 reg_def XMM26g( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(6)); 528 reg_def XMM26h( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(7)); 529 reg_def XMM26i( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(8)); 530 reg_def XMM26j( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(9)); 531 reg_def XMM26k( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(10)); 532 reg_def XMM26l( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(11)); 533 reg_def XMM26m( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(12)); 534 reg_def XMM26n( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(13)); 535 reg_def XMM26o( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(14)); 536 reg_def XMM26p( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(15)); 537 538 reg_def XMM27g( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(1)); 539 reg_def XMM27c( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(2)); 540 reg_def XMM27d( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(3)); 541 reg_def XMM27e( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(4)); 542 reg_def XMM27f( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(5)); 543 reg_def XMM27g( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(6)); 544 reg_def XMM27h( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(7)); 545 reg_def XMM27i( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(8)); 546 reg_def XMM27j( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(9)); 547 reg_def XMM27k( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(10)); 548 reg_def XMM27l( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(11)); 549 reg_def XMM27m( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(12)); 550 reg_def XMM27n( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(13)); 551 reg_def XMM27o( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(14)); 552 reg_def XMM27p( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(15)); 553 554 reg_def XMM28 ( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()); 555 reg_def XMM28b( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(1)); 556 reg_def XMM28c( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(2)); 557 reg_def XMM28d( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(3)); 558 reg_def XMM28e( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(4)); 559 reg_def XMM28f( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(5)); 560 reg_def XMM28g( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(6)); 561 reg_def XMM28h( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(7)); 562 reg_def XMM28i( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(8)); 563 reg_def XMM28j( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(9)); 564 reg_def XMM28k( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(10)); 565 reg_def XMM28l( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(11)); 566 reg_def XMM28m( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(12)); 567 reg_def XMM28n( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(13)); 568 reg_def XMM28o( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(14)); 569 reg_def XMM28p( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(15)); 570 571 reg_def XMM29 ( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()); 572 reg_def XMM29b( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(1)); 573 reg_def XMM29c( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(2)); 574 reg_def XMM29d( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(3)); 575 reg_def XMM29e( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(4)); 576 reg_def XMM29f( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(5)); 577 reg_def XMM29g( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(6)); 578 reg_def XMM29h( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(7)); 579 reg_def XMM29i( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(8)); 580 reg_def XMM29j( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(9)); 581 reg_def XMM29k( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(10)); 582 reg_def XMM29l( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(11)); 583 reg_def XMM29m( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(12)); 584 reg_def XMM29n( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(13)); 585 reg_def XMM29o( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(14)); 586 reg_def XMM29p( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(15)); 587 588 reg_def XMM30 ( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()); 589 reg_def XMM30b( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(1)); 590 reg_def XMM30c( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(2)); 591 reg_def XMM30d( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(3)); 592 reg_def XMM30e( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(4)); 593 reg_def XMM30f( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(5)); 594 reg_def XMM30g( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(6)); 595 reg_def XMM30h( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(7)); 596 reg_def XMM30i( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(8)); 597 reg_def XMM30j( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(9)); 598 reg_def XMM30k( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(10)); 599 reg_def XMM30l( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(11)); 600 reg_def XMM30m( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(12)); 601 reg_def XMM30n( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(13)); 602 reg_def XMM30o( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(14)); 603 reg_def XMM30p( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(15)); 604 605 reg_def XMM31 ( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()); 606 reg_def XMM31b( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(1)); 607 reg_def XMM31c( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(2)); 608 reg_def XMM31d( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(3)); 609 reg_def XMM31e( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(4)); 610 reg_def XMM31f( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(5)); 611 reg_def XMM31g( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(6)); 612 reg_def XMM31h( SOC, SOE, Op_RegF, 31, xmm31>-as_VMReg()->next(7)); 613 reg_def XMM31i( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(8)); 614 reg_def XMM31j( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(9)); 615 reg_def XMM31k( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(10)); 616 reg_def XMM31l( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(11)); 617 reg_def XMM31m( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(12)); 618 reg_def XMM31n( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(13)); 619 reg_def XMM31o( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(14)); 620 reg_def XMM31p( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(15)); 621 622 #else // _WIN64 623 624 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()); 625 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1)); 626 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2)); 627 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3)); 628 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4)); 629 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5)); 630 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6)); 631 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7)); 632 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8)); 633 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9)); 634 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10)); 635 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11)); 636 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12)); 637 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13)); 638 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14)); 639 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15)); 640 641 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()); 642 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1)); 643 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2)); 644 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3)); 645 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4)); 646 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5)); 647 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6)); 648 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7)); 649 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8)); 650 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9)); 651 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10)); 652 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11)); 653 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12)); 654 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13)); 655 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14)); 656 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15)); 657 658 #ifdef _LP64 659 660 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()); 661 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1)); 662 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2)); 663 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3)); 664 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4)); 665 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5)); 666 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6)); 667 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7)); 668 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8)); 669 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9)); 670 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10)); 671 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11)); 672 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12)); 673 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13)); 674 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14)); 675 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15)); 676 677 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()); 678 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1)); 679 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2)); 680 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3)); 681 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4)); 682 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5)); 683 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6)); 684 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7)); 685 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8)); 686 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9)); 687 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10)); 688 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11)); 689 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12)); 690 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13)); 691 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14)); 692 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15)); 693 694 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()); 695 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1)); 696 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2)); 697 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3)); 698 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4)); 699 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5)); 700 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6)); 701 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7)); 702 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8)); 703 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9)); 704 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10)); 705 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11)); 706 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12)); 707 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13)); 708 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14)); 709 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15)); 710 711 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()); 712 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1)); 713 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2)); 714 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3)); 715 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4)); 716 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5)); 717 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6)); 718 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7)); 719 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8)); 720 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9)); 721 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10)); 722 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11)); 723 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12)); 724 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13)); 725 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14)); 726 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15)); 727 728 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()); 729 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1)); 730 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2)); 731 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3)); 732 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4)); 733 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5)); 734 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6)); 735 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7)); 736 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8)); 737 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9)); 738 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10)); 739 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11)); 740 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12)); 741 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13)); 742 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14)); 743 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15)); 744 745 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()); 746 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1)); 747 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2)); 748 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3)); 749 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4)); 750 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5)); 751 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6)); 752 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7)); 753 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8)); 754 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9)); 755 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10)); 756 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11)); 757 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12)); 758 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13)); 759 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14)); 760 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15)); 761 762 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()); 763 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1)); 764 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2)); 765 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3)); 766 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4)); 767 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5)); 768 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6)); 769 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7)); 770 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8)); 771 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9)); 772 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10)); 773 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11)); 774 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12)); 775 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13)); 776 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14)); 777 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15)); 778 779 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()); 780 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1)); 781 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2)); 782 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3)); 783 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4)); 784 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5)); 785 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6)); 786 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7)); 787 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8)); 788 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9)); 789 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10)); 790 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11)); 791 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12)); 792 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13)); 793 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14)); 794 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15)); 795 796 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()); 797 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1)); 798 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2)); 799 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3)); 800 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4)); 801 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5)); 802 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6)); 803 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7)); 804 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8)); 805 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9)); 806 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10)); 807 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11)); 808 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12)); 809 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13)); 810 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14)); 811 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15)); 812 813 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()); 814 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1)); 815 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2)); 816 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3)); 817 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4)); 818 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5)); 819 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6)); 820 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7)); 821 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8)); 822 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9)); 823 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10)); 824 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11)); 825 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12)); 826 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13)); 827 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14)); 828 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15)); 829 830 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()); 831 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1)); 832 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2)); 833 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3)); 834 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4)); 835 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5)); 836 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6)); 837 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7)); 838 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8)); 839 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9)); 840 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10)); 841 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11)); 842 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12)); 843 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13)); 844 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14)); 845 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15)); 846 847 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()); 848 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1)); 849 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2)); 850 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3)); 851 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4)); 852 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5)); 853 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6)); 854 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7)); 855 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8)); 856 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9)); 857 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10)); 858 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11)); 859 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12)); 860 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13)); 861 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14)); 862 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15)); 863 864 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()); 865 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1)); 866 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2)); 867 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3)); 868 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4)); 869 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5)); 870 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6)); 871 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7)); 872 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8)); 873 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9)); 874 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10)); 875 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11)); 876 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12)); 877 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13)); 878 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14)); 879 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15)); 880 881 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()); 882 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1)); 883 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2)); 884 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3)); 885 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4)); 886 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5)); 887 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6)); 888 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7)); 889 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8)); 890 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9)); 891 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10)); 892 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11)); 893 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12)); 894 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13)); 895 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14)); 896 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15)); 897 898 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()); 899 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1)); 900 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2)); 901 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3)); 902 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4)); 903 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5)); 904 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6)); 905 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7)); 906 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8)); 907 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9)); 908 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10)); 909 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11)); 910 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12)); 911 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13)); 912 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14)); 913 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15)); 914 915 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()); 916 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1)); 917 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2)); 918 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3)); 919 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4)); 920 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5)); 921 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6)); 922 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7)); 923 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8)); 924 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9)); 925 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10)); 926 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11)); 927 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12)); 928 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13)); 929 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14)); 930 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15)); 931 932 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()); 933 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1)); 934 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2)); 935 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3)); 936 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4)); 937 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5)); 938 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6)); 939 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7)); 940 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8)); 941 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9)); 942 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10)); 943 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11)); 944 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12)); 945 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13)); 946 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14)); 947 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15)); 948 949 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()); 950 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1)); 951 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2)); 952 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3)); 953 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4)); 954 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5)); 955 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6)); 956 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7)); 957 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8)); 958 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9)); 959 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10)); 960 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11)); 961 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12)); 962 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13)); 963 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14)); 964 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15)); 965 966 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()); 967 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1)); 968 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2)); 969 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3)); 970 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4)); 971 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5)); 972 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6)); 973 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7)); 974 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8)); 975 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9)); 976 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10)); 977 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11)); 978 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12)); 979 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13)); 980 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14)); 981 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15)); 982 983 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()); 984 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1)); 985 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2)); 986 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3)); 987 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4)); 988 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5)); 989 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6)); 990 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7)); 991 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8)); 992 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9)); 993 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10)); 994 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11)); 995 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12)); 996 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13)); 997 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14)); 998 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15)); 999 1000 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()); 1001 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1)); 1002 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2)); 1003 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3)); 1004 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4)); 1005 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5)); 1006 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6)); 1007 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7)); 1008 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8)); 1009 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9)); 1010 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10)); 1011 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11)); 1012 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12)); 1013 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13)); 1014 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14)); 1015 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15)); 1016 1017 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()); 1018 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1)); 1019 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2)); 1020 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3)); 1021 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4)); 1022 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5)); 1023 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6)); 1024 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7)); 1025 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8)); 1026 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9)); 1027 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10)); 1028 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11)); 1029 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12)); 1030 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13)); 1031 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14)); 1032 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15)); 1033 1034 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()); 1035 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1)); 1036 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2)); 1037 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3)); 1038 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4)); 1039 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5)); 1040 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6)); 1041 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7)); 1042 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8)); 1043 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9)); 1044 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10)); 1045 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11)); 1046 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12)); 1047 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13)); 1048 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14)); 1049 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15)); 1050 1051 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()); 1052 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1)); 1053 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2)); 1054 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3)); 1055 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4)); 1056 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5)); 1057 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6)); 1058 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7)); 1059 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8)); 1060 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9)); 1061 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10)); 1062 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11)); 1063 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12)); 1064 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13)); 1065 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14)); 1066 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15)); 1067 1068 #endif // _LP64 1069 1070 #endif // _WIN64 1071 1072 #ifdef _LP64 1073 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad()); 1074 #else 1075 reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad()); 1076 #endif // _LP64 1077 1078 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1079 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1080 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1081 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1082 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1083 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1084 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1085 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1086 #ifdef _LP64 1087 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1088 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1089 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1090 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1091 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1092 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1093 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1094 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1095 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 1096 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 1097 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 1098 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 1099 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 1100 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 1101 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 1102 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 1103 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 1104 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 1105 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 1106 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 1107 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 1108 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 1109 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 1110 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 1111 #endif 1112 ); 1113 1114 // flags allocation class should be last. 1115 alloc_class chunk2(RFLAGS); 1116 1117 // Singleton class for condition codes 1118 reg_class int_flags(RFLAGS); 1119 1120 // Class for pre evex float registers 1121 reg_class float_reg_legacy(XMM0, 1122 XMM1, 1123 XMM2, 1124 XMM3, 1125 XMM4, 1126 XMM5, 1127 XMM6, 1128 XMM7 1129 #ifdef _LP64 1130 ,XMM8, 1131 XMM9, 1132 XMM10, 1133 XMM11, 1134 XMM12, 1135 XMM13, 1136 XMM14, 1137 XMM15 1138 #endif 1139 ); 1140 1141 // Class for evex float registers 1142 reg_class float_reg_evex(XMM0, 1143 XMM1, 1144 XMM2, 1145 XMM3, 1146 XMM4, 1147 XMM5, 1148 XMM6, 1149 XMM7 1150 #ifdef _LP64 1151 ,XMM8, 1152 XMM9, 1153 XMM10, 1154 XMM11, 1155 XMM12, 1156 XMM13, 1157 XMM14, 1158 XMM15, 1159 XMM16, 1160 XMM17, 1161 XMM18, 1162 XMM19, 1163 XMM20, 1164 XMM21, 1165 XMM22, 1166 XMM23, 1167 XMM24, 1168 XMM25, 1169 XMM26, 1170 XMM27, 1171 XMM28, 1172 XMM29, 1173 XMM30, 1174 XMM31 1175 #endif 1176 ); 1177 1178 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} ); 1179 1180 // Class for pre evex double registers 1181 reg_class double_reg_legacy(XMM0, XMM0b, 1182 XMM1, XMM1b, 1183 XMM2, XMM2b, 1184 XMM3, XMM3b, 1185 XMM4, XMM4b, 1186 XMM5, XMM5b, 1187 XMM6, XMM6b, 1188 XMM7, XMM7b 1189 #ifdef _LP64 1190 ,XMM8, XMM8b, 1191 XMM9, XMM9b, 1192 XMM10, XMM10b, 1193 XMM11, XMM11b, 1194 XMM12, XMM12b, 1195 XMM13, XMM13b, 1196 XMM14, XMM14b, 1197 XMM15, XMM15b 1198 #endif 1199 ); 1200 1201 // Class for evex double registers 1202 reg_class double_reg_evex(XMM0, XMM0b, 1203 XMM1, XMM1b, 1204 XMM2, XMM2b, 1205 XMM3, XMM3b, 1206 XMM4, XMM4b, 1207 XMM5, XMM5b, 1208 XMM6, XMM6b, 1209 XMM7, XMM7b 1210 #ifdef _LP64 1211 ,XMM8, XMM8b, 1212 XMM9, XMM9b, 1213 XMM10, XMM10b, 1214 XMM11, XMM11b, 1215 XMM12, XMM12b, 1216 XMM13, XMM13b, 1217 XMM14, XMM14b, 1218 XMM15, XMM15b, 1219 XMM16, XMM16b, 1220 XMM17, XMM17b, 1221 XMM18, XMM18b, 1222 XMM19, XMM19b, 1223 XMM20, XMM20b, 1224 XMM21, XMM21b, 1225 XMM22, XMM22b, 1226 XMM23, XMM23b, 1227 XMM24, XMM24b, 1228 XMM25, XMM25b, 1229 XMM26, XMM26b, 1230 XMM27, XMM27b, 1231 XMM28, XMM28b, 1232 XMM29, XMM29b, 1233 XMM30, XMM30b, 1234 XMM31, XMM31b 1235 #endif 1236 ); 1237 1238 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} ); 1239 1240 // Class for pre evex 32bit vector registers 1241 reg_class vectors_reg_legacy(XMM0, 1242 XMM1, 1243 XMM2, 1244 XMM3, 1245 XMM4, 1246 XMM5, 1247 XMM6, 1248 XMM7 1249 #ifdef _LP64 1250 ,XMM8, 1251 XMM9, 1252 XMM10, 1253 XMM11, 1254 XMM12, 1255 XMM13, 1256 XMM14, 1257 XMM15 1258 #endif 1259 ); 1260 1261 // Class for evex 32bit vector registers 1262 reg_class vectors_reg_evex(XMM0, 1263 XMM1, 1264 XMM2, 1265 XMM3, 1266 XMM4, 1267 XMM5, 1268 XMM6, 1269 XMM7 1270 #ifdef _LP64 1271 ,XMM8, 1272 XMM9, 1273 XMM10, 1274 XMM11, 1275 XMM12, 1276 XMM13, 1277 XMM14, 1278 XMM15, 1279 XMM16, 1280 XMM17, 1281 XMM18, 1282 XMM19, 1283 XMM20, 1284 XMM21, 1285 XMM22, 1286 XMM23, 1287 XMM24, 1288 XMM25, 1289 XMM26, 1290 XMM27, 1291 XMM28, 1292 XMM29, 1293 XMM30, 1294 XMM31 1295 #endif 1296 ); 1297 1298 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} ); 1299 1300 // Class for all 64bit vector registers 1301 reg_class vectord_reg_legacy(XMM0, XMM0b, 1302 XMM1, XMM1b, 1303 XMM2, XMM2b, 1304 XMM3, XMM3b, 1305 XMM4, XMM4b, 1306 XMM5, XMM5b, 1307 XMM6, XMM6b, 1308 XMM7, XMM7b 1309 #ifdef _LP64 1310 ,XMM8, XMM8b, 1311 XMM9, XMM9b, 1312 XMM10, XMM10b, 1313 XMM11, XMM11b, 1314 XMM12, XMM12b, 1315 XMM13, XMM13b, 1316 XMM14, XMM14b, 1317 XMM15, XMM15b 1318 #endif 1319 ); 1320 1321 // Class for all 64bit vector registers 1322 reg_class vectord_reg_evex(XMM0, XMM0b, 1323 XMM1, XMM1b, 1324 XMM2, XMM2b, 1325 XMM3, XMM3b, 1326 XMM4, XMM4b, 1327 XMM5, XMM5b, 1328 XMM6, XMM6b, 1329 XMM7, XMM7b 1330 #ifdef _LP64 1331 ,XMM8, XMM8b, 1332 XMM9, XMM9b, 1333 XMM10, XMM10b, 1334 XMM11, XMM11b, 1335 XMM12, XMM12b, 1336 XMM13, XMM13b, 1337 XMM14, XMM14b, 1338 XMM15, XMM15b, 1339 XMM16, XMM16b, 1340 XMM17, XMM17b, 1341 XMM18, XMM18b, 1342 XMM19, XMM19b, 1343 XMM20, XMM20b, 1344 XMM21, XMM21b, 1345 XMM22, XMM22b, 1346 XMM23, XMM23b, 1347 XMM24, XMM24b, 1348 XMM25, XMM25b, 1349 XMM26, XMM26b, 1350 XMM27, XMM27b, 1351 XMM28, XMM28b, 1352 XMM29, XMM29b, 1353 XMM30, XMM30b, 1354 XMM31, XMM31b 1355 #endif 1356 ); 1357 1358 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} ); 1359 1360 // Class for all 128bit vector registers 1361 reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, 1362 XMM1, XMM1b, XMM1c, XMM1d, 1363 XMM2, XMM2b, XMM2c, XMM2d, 1364 XMM3, XMM3b, XMM3c, XMM3d, 1365 XMM4, XMM4b, XMM4c, XMM4d, 1366 XMM5, XMM5b, XMM5c, XMM5d, 1367 XMM6, XMM6b, XMM6c, XMM6d, 1368 XMM7, XMM7b, XMM7c, XMM7d 1369 #ifdef _LP64 1370 ,XMM8, XMM8b, XMM8c, XMM8d, 1371 XMM9, XMM9b, XMM9c, XMM9d, 1372 XMM10, XMM10b, XMM10c, XMM10d, 1373 XMM11, XMM11b, XMM11c, XMM11d, 1374 XMM12, XMM12b, XMM12c, XMM12d, 1375 XMM13, XMM13b, XMM13c, XMM13d, 1376 XMM14, XMM14b, XMM14c, XMM14d, 1377 XMM15, XMM15b, XMM15c, XMM15d 1378 #endif 1379 ); 1380 1381 // Class for all 128bit vector registers 1382 reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, 1383 XMM1, XMM1b, XMM1c, XMM1d, 1384 XMM2, XMM2b, XMM2c, XMM2d, 1385 XMM3, XMM3b, XMM3c, XMM3d, 1386 XMM4, XMM4b, XMM4c, XMM4d, 1387 XMM5, XMM5b, XMM5c, XMM5d, 1388 XMM6, XMM6b, XMM6c, XMM6d, 1389 XMM7, XMM7b, XMM7c, XMM7d 1390 #ifdef _LP64 1391 ,XMM8, XMM8b, XMM8c, XMM8d, 1392 XMM9, XMM9b, XMM9c, XMM9d, 1393 XMM10, XMM10b, XMM10c, XMM10d, 1394 XMM11, XMM11b, XMM11c, XMM11d, 1395 XMM12, XMM12b, XMM12c, XMM12d, 1396 XMM13, XMM13b, XMM13c, XMM13d, 1397 XMM14, XMM14b, XMM14c, XMM14d, 1398 XMM15, XMM15b, XMM15c, XMM15d, 1399 XMM16, XMM16b, XMM16c, XMM16d, 1400 XMM17, XMM17b, XMM17c, XMM17d, 1401 XMM18, XMM18b, XMM18c, XMM18d, 1402 XMM19, XMM19b, XMM19c, XMM19d, 1403 XMM20, XMM20b, XMM20c, XMM20d, 1404 XMM21, XMM21b, XMM21c, XMM21d, 1405 XMM22, XMM22b, XMM22c, XMM22d, 1406 XMM23, XMM23b, XMM23c, XMM23d, 1407 XMM24, XMM24b, XMM24c, XMM24d, 1408 XMM25, XMM25b, XMM25c, XMM25d, 1409 XMM26, XMM26b, XMM26c, XMM26d, 1410 XMM27, XMM27b, XMM27c, XMM27d, 1411 XMM28, XMM28b, XMM28c, XMM28d, 1412 XMM29, XMM29b, XMM29c, XMM29d, 1413 XMM30, XMM30b, XMM30c, XMM30d, 1414 XMM31, XMM31b, XMM31c, XMM31d 1415 #endif 1416 ); 1417 1418 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} ); 1419 1420 // Class for all 256bit vector registers 1421 reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 1422 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 1423 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 1424 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 1425 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 1426 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 1427 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 1428 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 1429 #ifdef _LP64 1430 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 1431 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 1432 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 1433 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 1434 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 1435 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1436 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1437 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h 1438 #endif 1439 ); 1440 1441 // Class for all 256bit vector registers 1442 reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 1443 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 1444 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 1445 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 1446 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 1447 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 1448 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 1449 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 1450 #ifdef _LP64 1451 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 1452 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 1453 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 1454 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 1455 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 1456 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1457 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1458 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, 1459 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, 1460 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, 1461 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, 1462 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, 1463 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, 1464 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, 1465 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, 1466 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, 1467 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, 1468 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, 1469 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, 1470 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, 1471 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, 1472 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, 1473 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, 1474 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h 1475 #endif 1476 ); 1477 1478 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} ); 1479 1480 // Class for all 512bit vector registers 1481 reg_class vectorz_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1482 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1483 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1484 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1485 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1486 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1487 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1488 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1489 #ifdef _LP64 1490 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1491 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1492 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1493 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1494 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1495 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1496 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1497 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1498 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 1499 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 1500 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 1501 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 1502 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 1503 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 1504 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 1505 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 1506 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 1507 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 1508 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 1509 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 1510 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 1511 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 1512 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 1513 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 1514 #endif 1515 ); 1516 1517 %} 1518 1519 1520 //----------SOURCE BLOCK------------------------------------------------------- 1521 // This is a block of C++ code which provides values, functions, and 1522 // definitions necessary in the rest of the architecture description 1523 1524 source_hpp %{ 1525 // Header information of the source block. 1526 // Method declarations/definitions which are used outside 1527 // the ad-scope can conveniently be defined here. 1528 // 1529 // To keep related declarations/definitions/uses close together, 1530 // we switch between source %{ }% and source_hpp %{ }% freely as needed. 1531 1532 class NativeJump; 1533 1534 class CallStubImpl { 1535 1536 //-------------------------------------------------------------- 1537 //---< Used for optimization in Compile::shorten_branches >--- 1538 //-------------------------------------------------------------- 1539 1540 public: 1541 // Size of call trampoline stub. 1542 static uint size_call_trampoline() { 1543 return 0; // no call trampolines on this platform 1544 } 1545 1546 // number of relocations needed by a call trampoline stub 1547 static uint reloc_call_trampoline() { 1548 return 0; // no call trampolines on this platform 1549 } 1550 }; 1551 1552 class HandlerImpl { 1553 1554 public: 1555 1556 static int emit_exception_handler(CodeBuffer &cbuf); 1557 static int emit_deopt_handler(CodeBuffer& cbuf); 1558 1559 static uint size_exception_handler() { 1560 // NativeCall instruction size is the same as NativeJump. 1561 // exception handler starts out as jump and can be patched to 1562 // a call be deoptimization. (4932387) 1563 // Note that this value is also credited (in output.cpp) to 1564 // the size of the code section. 1565 return NativeJump::instruction_size; 1566 } 1567 1568 #ifdef _LP64 1569 static uint size_deopt_handler() { 1570 // three 5 byte instructions 1571 return 15; 1572 } 1573 #else 1574 static uint size_deopt_handler() { 1575 // NativeCall instruction size is the same as NativeJump. 1576 // exception handler starts out as jump and can be patched to 1577 // a call be deoptimization. (4932387) 1578 // Note that this value is also credited (in output.cpp) to 1579 // the size of the code section. 1580 return 5 + NativeJump::instruction_size; // pushl(); jmp; 1581 } 1582 #endif 1583 }; 1584 1585 %} // end source_hpp 1586 1587 source %{ 1588 1589 #include "opto/addnode.hpp" 1590 1591 // Emit exception handler code. 1592 // Stuff framesize into a register and call a VM stub routine. 1593 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) { 1594 1595 // Note that the code buffer's insts_mark is always relative to insts. 1596 // That's why we must use the macroassembler to generate a handler. 1597 MacroAssembler _masm(&cbuf); 1598 address base = __ start_a_stub(size_exception_handler()); 1599 if (base == NULL) { 1600 ciEnv::current()->record_failure("CodeCache is full"); 1601 return 0; // CodeBuffer::expand failed 1602 } 1603 int offset = __ offset(); 1604 __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point())); 1605 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); 1606 __ end_a_stub(); 1607 return offset; 1608 } 1609 1610 // Emit deopt handler code. 1611 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) { 1612 1613 // Note that the code buffer's insts_mark is always relative to insts. 1614 // That's why we must use the macroassembler to generate a handler. 1615 MacroAssembler _masm(&cbuf); 1616 address base = __ start_a_stub(size_deopt_handler()); 1617 if (base == NULL) { 1618 ciEnv::current()->record_failure("CodeCache is full"); 1619 return 0; // CodeBuffer::expand failed 1620 } 1621 int offset = __ offset(); 1622 1623 #ifdef _LP64 1624 address the_pc = (address) __ pc(); 1625 Label next; 1626 // push a "the_pc" on the stack without destroying any registers 1627 // as they all may be live. 1628 1629 // push address of "next" 1630 __ call(next, relocInfo::none); // reloc none is fine since it is a disp32 1631 __ bind(next); 1632 // adjust it so it matches "the_pc" 1633 __ subptr(Address(rsp, 0), __ offset() - offset); 1634 #else 1635 InternalAddress here(__ pc()); 1636 __ pushptr(here.addr()); 1637 #endif 1638 1639 __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack())); 1640 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow"); 1641 __ end_a_stub(); 1642 return offset; 1643 } 1644 1645 1646 //============================================================================= 1647 1648 // Float masks come from different places depending on platform. 1649 #ifdef _LP64 1650 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); } 1651 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); } 1652 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); } 1653 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); } 1654 #else 1655 static address float_signmask() { return (address)float_signmask_pool; } 1656 static address float_signflip() { return (address)float_signflip_pool; } 1657 static address double_signmask() { return (address)double_signmask_pool; } 1658 static address double_signflip() { return (address)double_signflip_pool; } 1659 #endif 1660 1661 1662 const bool Matcher::match_rule_supported(Opcodes opcode) { 1663 if (!has_match_rule(opcode)) 1664 return false; 1665 1666 bool ret_value = true; 1667 switch (opcode) { 1668 case Opcodes::Op_PopCountI: 1669 case Opcodes::Op_PopCountL: 1670 if (!UsePopCountInstruction) 1671 ret_value = false; 1672 break; 1673 case Opcodes::Op_MulVI: 1674 if ((UseSSE < 4) && (UseAVX < 1)) // only with SSE4_1 or AVX 1675 ret_value = false; 1676 break; 1677 case Opcodes::Op_MulVL: 1678 case Opcodes::Op_MulReductionVL: 1679 if (VM_Version::supports_avx512dq() == false) 1680 ret_value = false; 1681 break; 1682 case Opcodes::Op_AddReductionVL: 1683 if (UseAVX < 3) // only EVEX : vector connectivity becomes an issue here 1684 ret_value = false; 1685 break; 1686 case Opcodes::Op_AddReductionVI: 1687 if (UseSSE < 3) // requires at least SSE3 1688 ret_value = false; 1689 break; 1690 case Opcodes::Op_MulReductionVI: 1691 if (UseSSE < 4) // requires at least SSE4 1692 ret_value = false; 1693 break; 1694 case Opcodes::Op_AddReductionVF: 1695 case Opcodes::Op_AddReductionVD: 1696 case Opcodes::Op_MulReductionVF: 1697 case Opcodes::Op_MulReductionVD: 1698 if (UseSSE < 1) // requires at least SSE 1699 ret_value = false; 1700 break; 1701 case Opcodes::Op_SqrtVD: 1702 if (UseAVX < 1) // enabled for AVX only 1703 ret_value = false; 1704 break; 1705 case Opcodes::Op_CompareAndSwapL: 1706 #ifdef _LP64 1707 case Opcodes::Op_CompareAndSwapP: 1708 #endif 1709 if (!VM_Version::supports_cx8()) 1710 ret_value = false; 1711 break; 1712 case Opcodes::Op_CMoveVD: 1713 if (UseAVX < 1 || UseAVX > 2) 1714 ret_value = false; 1715 break; 1716 case Opcodes::Op_StrIndexOf: 1717 if (!UseSSE42Intrinsics) 1718 ret_value = false; 1719 break; 1720 case Opcodes::Op_StrIndexOfChar: 1721 if (!UseSSE42Intrinsics) 1722 ret_value = false; 1723 break; 1724 case Opcodes::Op_OnSpinWait: 1725 if (VM_Version::supports_on_spin_wait() == false) 1726 ret_value = false; 1727 break; 1728 } 1729 1730 return ret_value; // Per default match rules are supported. 1731 } 1732 1733 const bool Matcher::match_rule_supported_vector(Opcodes opcode, int vlen) { 1734 // identify extra cases that we might want to provide match rules for 1735 // e.g. Op_ vector nodes and other intrinsics while guarding with vlen 1736 bool ret_value = match_rule_supported(opcode); 1737 if (ret_value) { 1738 switch (opcode) { 1739 case Opcodes::Op_AddVB: 1740 case Opcodes::Op_SubVB: 1741 if ((vlen == 64) && (VM_Version::supports_avx512bw() == false)) 1742 ret_value = false; 1743 break; 1744 case Opcodes::Op_URShiftVS: 1745 case Opcodes::Op_RShiftVS: 1746 case Opcodes::Op_LShiftVS: 1747 case Opcodes::Op_MulVS: 1748 case Opcodes::Op_AddVS: 1749 case Opcodes::Op_SubVS: 1750 if ((vlen == 32) && (VM_Version::supports_avx512bw() == false)) 1751 ret_value = false; 1752 break; 1753 case Opcodes::Op_CMoveVD: 1754 if (vlen != 4) 1755 ret_value = false; 1756 break; 1757 } 1758 } 1759 1760 return ret_value; // Per default match rules are supported. 1761 } 1762 1763 const bool Matcher::has_predicated_vectors(void) { 1764 bool ret_value = false; 1765 if (UseAVX > 2) { 1766 ret_value = VM_Version::supports_avx512vl(); 1767 } 1768 1769 return ret_value; 1770 } 1771 1772 const int Matcher::float_pressure(int default_pressure_threshold) { 1773 int float_pressure_threshold = default_pressure_threshold; 1774 #ifdef _LP64 1775 if (UseAVX > 2) { 1776 // Increase pressure threshold on machines with AVX3 which have 1777 // 2x more XMM registers. 1778 float_pressure_threshold = default_pressure_threshold * 2; 1779 } 1780 #endif 1781 return float_pressure_threshold; 1782 } 1783 1784 // Max vector size in bytes. 0 if not supported. 1785 const int Matcher::vector_width_in_bytes(BasicType bt) { 1786 assert(is_java_primitive(bt), "only primitive type vectors"); 1787 if (UseSSE < 2) return 0; 1788 // SSE2 supports 128bit vectors for all types. 1789 // AVX2 supports 256bit vectors for all types. 1790 // AVX2/EVEX supports 512bit vectors for all types. 1791 int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16; 1792 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE. 1793 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE)) 1794 size = (UseAVX > 2) ? 64 : 32; 1795 // Use flag to limit vector size. 1796 size = MIN2(size,(int)MaxVectorSize); 1797 // Minimum 2 values in vector (or 4 for bytes). 1798 switch (bt) { 1799 case T_DOUBLE: 1800 case T_LONG: 1801 if (size < 16) return 0; 1802 break; 1803 case T_FLOAT: 1804 case T_INT: 1805 if (size < 8) return 0; 1806 break; 1807 case T_BOOLEAN: 1808 if (size < 4) return 0; 1809 break; 1810 case T_CHAR: 1811 if (size < 4) return 0; 1812 break; 1813 case T_BYTE: 1814 if (size < 4) return 0; 1815 break; 1816 case T_SHORT: 1817 if (size < 4) return 0; 1818 break; 1819 default: 1820 ShouldNotReachHere(); 1821 } 1822 return size; 1823 } 1824 1825 // Limits on vector size (number of elements) loaded into vector. 1826 const int Matcher::max_vector_size(const BasicType bt) { 1827 return vector_width_in_bytes(bt)/type2aelembytes(bt); 1828 } 1829 const int Matcher::min_vector_size(const BasicType bt) { 1830 int max_size = max_vector_size(bt); 1831 // Min size which can be loaded into vector is 4 bytes. 1832 int size = (type2aelembytes(bt) == 1) ? 4 : 2; 1833 return MIN2(size,max_size); 1834 } 1835 1836 // Vector ideal reg corresponding to specidied size in bytes 1837 const Opcodes Matcher::vector_ideal_reg(int size) { 1838 assert(MaxVectorSize >= size, ""); 1839 switch(size) { 1840 case 4: return Opcodes::Op_VecS; 1841 case 8: return Opcodes::Op_VecD; 1842 case 16: return Opcodes::Op_VecX; 1843 case 32: return Opcodes::Op_VecY; 1844 case 64: return Opcodes::Op_VecZ; 1845 } 1846 ShouldNotReachHere(); 1847 return Opcodes::Op_Node; 1848 } 1849 1850 // Only lowest bits of xmm reg are used for vector shift count. 1851 const Opcodes Matcher::vector_shift_count_ideal_reg(int size) { 1852 return Opcodes::Op_VecS; 1853 } 1854 1855 // x86 supports misaligned vectors store/load. 1856 const bool Matcher::misaligned_vectors_ok() { 1857 return !AlignVector; // can be changed by flag 1858 } 1859 1860 // x86 AES instructions are compatible with SunJCE expanded 1861 // keys, hence we do not need to pass the original key to stubs 1862 const bool Matcher::pass_original_key_for_aes() { 1863 return false; 1864 } 1865 1866 1867 const bool Matcher::convi2l_type_required = true; 1868 1869 // Check for shift by small constant as well 1870 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) { 1871 if (shift->Opcode() == Opcodes::Op_LShiftX && shift->in(2)->is_Con() && 1872 shift->in(2)->get_int() <= 3 && 1873 // Are there other uses besides address expressions? 1874 !matcher->is_visited(shift)) { 1875 address_visited.set(shift->_idx); // Flag as address_visited 1876 mstack.push(shift->in(2), Matcher::Visit); 1877 Node *conv = shift->in(1); 1878 #ifdef _LP64 1879 // Allow Matcher to match the rule which bypass 1880 // ConvI2L operation for an array index on LP64 1881 // if the index value is positive. 1882 if (conv->Opcode() == Opcodes::Op_ConvI2L && 1883 conv->as_Type()->type()->is_long()->_lo >= 0 && 1884 // Are there other uses besides address expressions? 1885 !matcher->is_visited(conv)) { 1886 address_visited.set(conv->_idx); // Flag as address_visited 1887 mstack.push(conv->in(1), Matcher::Pre_Visit); 1888 } else 1889 #endif 1890 mstack.push(conv, Matcher::Pre_Visit); 1891 return true; 1892 } 1893 return false; 1894 } 1895 1896 // Should the Matcher clone shifts on addressing modes, expecting them 1897 // to be subsumed into complex addressing expressions or compute them 1898 // into registers? 1899 bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { 1900 Node *off = m->in(AddPNode::Offset); 1901 if (off->is_Con()) { 1902 address_visited.test_set(m->_idx); // Flag as address_visited 1903 Node *adr = m->in(AddPNode::Address); 1904 1905 // Intel can handle 2 adds in addressing mode 1906 // AtomicAdd is not an addressing expression. 1907 // Cheap to find it by looking for screwy base. 1908 if (adr->is_AddP() && 1909 !adr->in(AddPNode::Base)->is_top() && 1910 // Are there other uses besides address expressions? 1911 !is_visited(adr)) { 1912 address_visited.set(adr->_idx); // Flag as address_visited 1913 Node *shift = adr->in(AddPNode::Offset); 1914 if (!clone_shift(shift, this, mstack, address_visited)) { 1915 mstack.push(shift, Pre_Visit); 1916 } 1917 mstack.push(adr->in(AddPNode::Address), Pre_Visit); 1918 mstack.push(adr->in(AddPNode::Base), Pre_Visit); 1919 } else { 1920 mstack.push(adr, Pre_Visit); 1921 } 1922 1923 // Clone X+offset as it also folds into most addressing expressions 1924 mstack.push(off, Visit); 1925 mstack.push(m->in(AddPNode::Base), Pre_Visit); 1926 return true; 1927 } else if (clone_shift(off, this, mstack, address_visited)) { 1928 address_visited.test_set(m->_idx); // Flag as address_visited 1929 mstack.push(m->in(AddPNode::Address), Pre_Visit); 1930 mstack.push(m->in(AddPNode::Base), Pre_Visit); 1931 return true; 1932 } 1933 return false; 1934 } 1935 1936 void Compile::reshape_address(AddPNode* addp) { 1937 } 1938 1939 // Helper methods for MachSpillCopyNode::implementation(). 1940 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 1941 int src_hi, int dst_hi, Opcodes ireg, outputStream* st) { 1942 // In 64-bit VM size calculation is very complex. Emitting instructions 1943 // into scratch buffer is used to get size in 64-bit VM. 1944 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 1945 assert(ireg == Opcodes::Op_VecS || // 32bit vector 1946 (src_lo & 1) == 0 && (src_lo + 1) == src_hi && 1947 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi, 1948 "no non-adjacent vector moves" ); 1949 if (cbuf) { 1950 MacroAssembler _masm(cbuf); 1951 int offset = __ offset(); 1952 switch (ireg) { 1953 case Opcodes::Op_VecS: // copy whole register 1954 case Opcodes::Op_VecD: 1955 case Opcodes::Op_VecX: 1956 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1957 break; 1958 case Opcodes::Op_VecY: 1959 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1960 break; 1961 case Opcodes::Op_VecZ: 1962 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2); 1963 break; 1964 default: 1965 ShouldNotReachHere(); 1966 } 1967 int size = __ offset() - offset; 1968 #ifdef ASSERT 1969 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1970 assert(!do_size || size == 4, "incorrect size calculattion"); 1971 #endif 1972 return size; 1973 #ifndef PRODUCT 1974 } else if (!do_size) { 1975 switch (ireg) { 1976 case Opcodes::Op_VecS: 1977 case Opcodes::Op_VecD: 1978 case Opcodes::Op_VecX: 1979 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 1980 break; 1981 case Opcodes::Op_VecY: 1982 case Opcodes::Op_VecZ: 1983 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 1984 break; 1985 default: 1986 ShouldNotReachHere(); 1987 } 1988 #endif 1989 } 1990 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 1991 return (UseAVX > 2) ? 6 : 4; 1992 } 1993 1994 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, 1995 int stack_offset, int reg, Opcodes ireg, outputStream* st) { 1996 // In 64-bit VM size calculation is very complex. Emitting instructions 1997 // into scratch buffer is used to get size in 64-bit VM. 1998 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 1999 if (cbuf) { 2000 MacroAssembler _masm(cbuf); 2001 int offset = __ offset(); 2002 if (is_load) { 2003 switch (ireg) { 2004 case Opcodes::Op_VecS: 2005 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2006 break; 2007 case Opcodes::Op_VecD: 2008 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2009 break; 2010 case Opcodes::Op_VecX: 2011 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2012 break; 2013 case Opcodes::Op_VecY: 2014 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2015 break; 2016 case Opcodes::Op_VecZ: 2017 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2); 2018 break; 2019 default: 2020 ShouldNotReachHere(); 2021 } 2022 } else { // store 2023 switch (ireg) { 2024 case Opcodes::Op_VecS: 2025 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2026 break; 2027 case Opcodes::Op_VecD: 2028 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2029 break; 2030 case Opcodes::Op_VecX: 2031 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2032 break; 2033 case Opcodes::Op_VecY: 2034 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2035 break; 2036 case Opcodes::Op_VecZ: 2037 __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2); 2038 break; 2039 default: 2040 ShouldNotReachHere(); 2041 } 2042 } 2043 int size = __ offset() - offset; 2044 #ifdef ASSERT 2045 int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : (UseAVX > 2) ? 6 : 4); 2046 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 2047 assert(!do_size || size == (5+offset_size), "incorrect size calculattion"); 2048 #endif 2049 return size; 2050 #ifndef PRODUCT 2051 } else if (!do_size) { 2052 if (is_load) { 2053 switch (ireg) { 2054 case Opcodes::Op_VecS: 2055 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2056 break; 2057 case Opcodes::Op_VecD: 2058 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2059 break; 2060 case Opcodes::Op_VecX: 2061 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2062 break; 2063 case Opcodes::Op_VecY: 2064 case Opcodes::Op_VecZ: 2065 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2066 break; 2067 default: 2068 ShouldNotReachHere(); 2069 } 2070 } else { // store 2071 switch (ireg) { 2072 case Opcodes::Op_VecS: 2073 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2074 break; 2075 case Opcodes::Op_VecD: 2076 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2077 break; 2078 case Opcodes::Op_VecX: 2079 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2080 break; 2081 case Opcodes::Op_VecY: 2082 case Opcodes::Op_VecZ: 2083 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2084 break; 2085 default: 2086 ShouldNotReachHere(); 2087 } 2088 } 2089 #endif 2090 } 2091 bool is_single_byte = false; 2092 int vec_len = 0; 2093 if ((UseAVX > 2) && (stack_offset != 0)) { 2094 int tuple_type = Assembler::EVEX_FVM; 2095 int input_size = Assembler::EVEX_32bit; 2096 switch (ireg) { 2097 case Opcodes::Op_VecS: 2098 tuple_type = Assembler::EVEX_T1S; 2099 break; 2100 case Opcodes::Op_VecD: 2101 tuple_type = Assembler::EVEX_T1S; 2102 input_size = Assembler::EVEX_64bit; 2103 break; 2104 case Opcodes::Op_VecX: 2105 break; 2106 case Opcodes::Op_VecY: 2107 vec_len = 1; 2108 break; 2109 case Opcodes::Op_VecZ: 2110 vec_len = 2; 2111 break; 2112 } 2113 is_single_byte = Assembler::query_compressed_disp_byte(stack_offset, true, vec_len, tuple_type, input_size, 0); 2114 } 2115 int offset_size = 0; 2116 int size = 5; 2117 if (UseAVX > 2 ) { 2118 if (VM_Version::supports_avx512novl() && (vec_len == 2)) { 2119 offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 2120 size += 2; // Need an additional two bytes for EVEX encoding 2121 } else if (VM_Version::supports_avx512novl() && (vec_len < 2)) { 2122 offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4); 2123 } else { 2124 offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 2125 size += 2; // Need an additional two bytes for EVEX encodding 2126 } 2127 } else { 2128 offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4); 2129 } 2130 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 2131 return size+offset_size; 2132 } 2133 2134 static inline jfloat replicate4_imm(int con, int width) { 2135 // Load a constant of "width" (in bytes) and replicate it to fill 32bit. 2136 assert(width == 1 || width == 2, "only byte or short types here"); 2137 int bit_width = width * 8; 2138 jint val = con; 2139 val &= (1 << bit_width) - 1; // mask off sign bits 2140 while(bit_width < 32) { 2141 val |= (val << bit_width); 2142 bit_width <<= 1; 2143 } 2144 jfloat fval = *((jfloat*) &val); // coerce to float type 2145 return fval; 2146 } 2147 2148 static inline jdouble replicate8_imm(int con, int width) { 2149 // Load a constant of "width" (in bytes) and replicate it to fill 64bit. 2150 assert(width == 1 || width == 2 || width == 4, "only byte, short or int types here"); 2151 int bit_width = width * 8; 2152 jlong val = con; 2153 val &= (((jlong) 1) << bit_width) - 1; // mask off sign bits 2154 while(bit_width < 64) { 2155 val |= (val << bit_width); 2156 bit_width <<= 1; 2157 } 2158 jdouble dval = *((jdouble*) &val); // coerce to double type 2159 return dval; 2160 } 2161 2162 #ifndef PRODUCT 2163 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const { 2164 st->print("nop \t# %d bytes pad for loops and calls", _count); 2165 } 2166 #endif 2167 2168 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const { 2169 MacroAssembler _masm(&cbuf); 2170 __ nop(_count); 2171 } 2172 2173 uint MachNopNode::size(PhaseRegAlloc*) const { 2174 return _count; 2175 } 2176 2177 #ifndef PRODUCT 2178 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const { 2179 st->print("# breakpoint"); 2180 } 2181 #endif 2182 2183 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const { 2184 MacroAssembler _masm(&cbuf); 2185 __ int3(); 2186 } 2187 2188 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const { 2189 return MachNode::size(ra_); 2190 } 2191 2192 %} 2193 2194 encode %{ 2195 2196 enc_class call_epilog %{ 2197 if (VerifyStackAtCalls) { 2198 // Check that stack depth is unchanged: find majik cookie on stack 2199 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word)); 2200 MacroAssembler _masm(&cbuf); 2201 Label L; 2202 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d); 2203 __ jccb(Assembler::equal, L); 2204 // Die if stack mismatch 2205 __ int3(); 2206 __ bind(L); 2207 } 2208 %} 2209 2210 %} 2211 2212 2213 //----------OPERANDS----------------------------------------------------------- 2214 // Operand definitions must precede instruction definitions for correct parsing 2215 // in the ADLC because operands constitute user defined types which are used in 2216 // instruction definitions. 2217 2218 // This one generically applies only for evex, so only one version 2219 operand vecZ() %{ 2220 constraint(ALLOC_IN_RC(vectorz_reg)); 2221 match(VecZ); 2222 2223 format %{ %} 2224 interface(REG_INTER); 2225 %} 2226 2227 // Comparison Code for FP conditional move 2228 operand cmpOp_vcmppd() %{ 2229 match(Bool); 2230 2231 predicate(n->as_Bool()->_test._test != BoolTest::overflow && 2232 n->as_Bool()->_test._test != BoolTest::no_overflow); 2233 format %{ "" %} 2234 interface(COND_INTER) %{ 2235 equal (0x0, "eq"); 2236 less (0x1, "lt"); 2237 less_equal (0x2, "le"); 2238 not_equal (0xC, "ne"); 2239 greater_equal(0xD, "ge"); 2240 greater (0xE, "gt"); 2241 //TODO cannot compile (adlc breaks) without two next lines with error: 2242 // x86_64.ad(13987) Syntax Error: :In operand cmpOp_vcmppd: Do not support this encode constant: ' %{ 2243 // equal' for overflow. 2244 overflow (0x20, "o"); // not really supported by the instruction 2245 no_overflow (0x21, "no"); // not really supported by the instruction 2246 %} 2247 %} 2248 2249 2250 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit) 2251 2252 // ============================================================================ 2253 2254 instruct ShouldNotReachHere() %{ 2255 match(Halt); 2256 format %{ "int3\t# ShouldNotReachHere" %} 2257 ins_encode %{ 2258 __ int3(); 2259 %} 2260 ins_pipe(pipe_slow); 2261 %} 2262 2263 // =================================EVEX special=============================== 2264 2265 instruct setMask(rRegI dst, rRegI src) %{ 2266 predicate(Matcher::has_predicated_vectors()); 2267 match(Set dst (SetVectMaskI src)); 2268 effect(TEMP dst); 2269 format %{ "setvectmask $dst, $src" %} 2270 ins_encode %{ 2271 __ setvectmask($dst$$Register, $src$$Register); 2272 %} 2273 ins_pipe(pipe_slow); 2274 %} 2275 2276 // ============================================================================ 2277 2278 instruct addF_reg(regF dst, regF src) %{ 2279 predicate((UseSSE>=1) && (UseAVX == 0)); 2280 match(Set dst (AddF dst src)); 2281 2282 format %{ "addss $dst, $src" %} 2283 ins_cost(150); 2284 ins_encode %{ 2285 __ addss($dst$$XMMRegister, $src$$XMMRegister); 2286 %} 2287 ins_pipe(pipe_slow); 2288 %} 2289 2290 instruct addF_mem(regF dst, memory src) %{ 2291 predicate((UseSSE>=1) && (UseAVX == 0)); 2292 match(Set dst (AddF dst (LoadF src))); 2293 2294 format %{ "addss $dst, $src" %} 2295 ins_cost(150); 2296 ins_encode %{ 2297 __ addss($dst$$XMMRegister, $src$$Address); 2298 %} 2299 ins_pipe(pipe_slow); 2300 %} 2301 2302 instruct addF_imm(regF dst, immF con) %{ 2303 predicate((UseSSE>=1) && (UseAVX == 0)); 2304 match(Set dst (AddF dst con)); 2305 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2306 ins_cost(150); 2307 ins_encode %{ 2308 __ addss($dst$$XMMRegister, $constantaddress($con)); 2309 %} 2310 ins_pipe(pipe_slow); 2311 %} 2312 2313 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{ 2314 predicate(UseAVX > 0); 2315 match(Set dst (AddF src1 src2)); 2316 2317 format %{ "vaddss $dst, $src1, $src2" %} 2318 ins_cost(150); 2319 ins_encode %{ 2320 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2321 %} 2322 ins_pipe(pipe_slow); 2323 %} 2324 2325 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{ 2326 predicate(UseAVX > 0); 2327 match(Set dst (AddF src1 (LoadF src2))); 2328 2329 format %{ "vaddss $dst, $src1, $src2" %} 2330 ins_cost(150); 2331 ins_encode %{ 2332 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2333 %} 2334 ins_pipe(pipe_slow); 2335 %} 2336 2337 instruct addF_reg_imm(regF dst, regF src, immF con) %{ 2338 predicate(UseAVX > 0); 2339 match(Set dst (AddF src con)); 2340 2341 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2342 ins_cost(150); 2343 ins_encode %{ 2344 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2345 %} 2346 ins_pipe(pipe_slow); 2347 %} 2348 2349 instruct addD_reg(regD dst, regD src) %{ 2350 predicate((UseSSE>=2) && (UseAVX == 0)); 2351 match(Set dst (AddD dst src)); 2352 2353 format %{ "addsd $dst, $src" %} 2354 ins_cost(150); 2355 ins_encode %{ 2356 __ addsd($dst$$XMMRegister, $src$$XMMRegister); 2357 %} 2358 ins_pipe(pipe_slow); 2359 %} 2360 2361 instruct addD_mem(regD dst, memory src) %{ 2362 predicate((UseSSE>=2) && (UseAVX == 0)); 2363 match(Set dst (AddD dst (LoadD src))); 2364 2365 format %{ "addsd $dst, $src" %} 2366 ins_cost(150); 2367 ins_encode %{ 2368 __ addsd($dst$$XMMRegister, $src$$Address); 2369 %} 2370 ins_pipe(pipe_slow); 2371 %} 2372 2373 instruct addD_imm(regD dst, immD con) %{ 2374 predicate((UseSSE>=2) && (UseAVX == 0)); 2375 match(Set dst (AddD dst con)); 2376 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2377 ins_cost(150); 2378 ins_encode %{ 2379 __ addsd($dst$$XMMRegister, $constantaddress($con)); 2380 %} 2381 ins_pipe(pipe_slow); 2382 %} 2383 2384 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{ 2385 predicate(UseAVX > 0); 2386 match(Set dst (AddD src1 src2)); 2387 2388 format %{ "vaddsd $dst, $src1, $src2" %} 2389 ins_cost(150); 2390 ins_encode %{ 2391 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2392 %} 2393 ins_pipe(pipe_slow); 2394 %} 2395 2396 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{ 2397 predicate(UseAVX > 0); 2398 match(Set dst (AddD src1 (LoadD src2))); 2399 2400 format %{ "vaddsd $dst, $src1, $src2" %} 2401 ins_cost(150); 2402 ins_encode %{ 2403 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2404 %} 2405 ins_pipe(pipe_slow); 2406 %} 2407 2408 instruct addD_reg_imm(regD dst, regD src, immD con) %{ 2409 predicate(UseAVX > 0); 2410 match(Set dst (AddD src con)); 2411 2412 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2413 ins_cost(150); 2414 ins_encode %{ 2415 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2416 %} 2417 ins_pipe(pipe_slow); 2418 %} 2419 2420 instruct subF_reg(regF dst, regF src) %{ 2421 predicate((UseSSE>=1) && (UseAVX == 0)); 2422 match(Set dst (SubF dst src)); 2423 2424 format %{ "subss $dst, $src" %} 2425 ins_cost(150); 2426 ins_encode %{ 2427 __ subss($dst$$XMMRegister, $src$$XMMRegister); 2428 %} 2429 ins_pipe(pipe_slow); 2430 %} 2431 2432 instruct subF_mem(regF dst, memory src) %{ 2433 predicate((UseSSE>=1) && (UseAVX == 0)); 2434 match(Set dst (SubF dst (LoadF src))); 2435 2436 format %{ "subss $dst, $src" %} 2437 ins_cost(150); 2438 ins_encode %{ 2439 __ subss($dst$$XMMRegister, $src$$Address); 2440 %} 2441 ins_pipe(pipe_slow); 2442 %} 2443 2444 instruct subF_imm(regF dst, immF con) %{ 2445 predicate((UseSSE>=1) && (UseAVX == 0)); 2446 match(Set dst (SubF dst con)); 2447 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2448 ins_cost(150); 2449 ins_encode %{ 2450 __ subss($dst$$XMMRegister, $constantaddress($con)); 2451 %} 2452 ins_pipe(pipe_slow); 2453 %} 2454 2455 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{ 2456 predicate(UseAVX > 0); 2457 match(Set dst (SubF src1 src2)); 2458 2459 format %{ "vsubss $dst, $src1, $src2" %} 2460 ins_cost(150); 2461 ins_encode %{ 2462 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2463 %} 2464 ins_pipe(pipe_slow); 2465 %} 2466 2467 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{ 2468 predicate(UseAVX > 0); 2469 match(Set dst (SubF src1 (LoadF src2))); 2470 2471 format %{ "vsubss $dst, $src1, $src2" %} 2472 ins_cost(150); 2473 ins_encode %{ 2474 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2475 %} 2476 ins_pipe(pipe_slow); 2477 %} 2478 2479 instruct subF_reg_imm(regF dst, regF src, immF con) %{ 2480 predicate(UseAVX > 0); 2481 match(Set dst (SubF src con)); 2482 2483 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2484 ins_cost(150); 2485 ins_encode %{ 2486 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2487 %} 2488 ins_pipe(pipe_slow); 2489 %} 2490 2491 instruct subD_reg(regD dst, regD src) %{ 2492 predicate((UseSSE>=2) && (UseAVX == 0)); 2493 match(Set dst (SubD dst src)); 2494 2495 format %{ "subsd $dst, $src" %} 2496 ins_cost(150); 2497 ins_encode %{ 2498 __ subsd($dst$$XMMRegister, $src$$XMMRegister); 2499 %} 2500 ins_pipe(pipe_slow); 2501 %} 2502 2503 instruct subD_mem(regD dst, memory src) %{ 2504 predicate((UseSSE>=2) && (UseAVX == 0)); 2505 match(Set dst (SubD dst (LoadD src))); 2506 2507 format %{ "subsd $dst, $src" %} 2508 ins_cost(150); 2509 ins_encode %{ 2510 __ subsd($dst$$XMMRegister, $src$$Address); 2511 %} 2512 ins_pipe(pipe_slow); 2513 %} 2514 2515 instruct subD_imm(regD dst, immD con) %{ 2516 predicate((UseSSE>=2) && (UseAVX == 0)); 2517 match(Set dst (SubD dst con)); 2518 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2519 ins_cost(150); 2520 ins_encode %{ 2521 __ subsd($dst$$XMMRegister, $constantaddress($con)); 2522 %} 2523 ins_pipe(pipe_slow); 2524 %} 2525 2526 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{ 2527 predicate(UseAVX > 0); 2528 match(Set dst (SubD src1 src2)); 2529 2530 format %{ "vsubsd $dst, $src1, $src2" %} 2531 ins_cost(150); 2532 ins_encode %{ 2533 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2534 %} 2535 ins_pipe(pipe_slow); 2536 %} 2537 2538 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{ 2539 predicate(UseAVX > 0); 2540 match(Set dst (SubD src1 (LoadD src2))); 2541 2542 format %{ "vsubsd $dst, $src1, $src2" %} 2543 ins_cost(150); 2544 ins_encode %{ 2545 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2546 %} 2547 ins_pipe(pipe_slow); 2548 %} 2549 2550 instruct subD_reg_imm(regD dst, regD src, immD con) %{ 2551 predicate(UseAVX > 0); 2552 match(Set dst (SubD src con)); 2553 2554 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2555 ins_cost(150); 2556 ins_encode %{ 2557 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2558 %} 2559 ins_pipe(pipe_slow); 2560 %} 2561 2562 instruct mulF_reg(regF dst, regF src) %{ 2563 predicate((UseSSE>=1) && (UseAVX == 0)); 2564 match(Set dst (MulF dst src)); 2565 2566 format %{ "mulss $dst, $src" %} 2567 ins_cost(150); 2568 ins_encode %{ 2569 __ mulss($dst$$XMMRegister, $src$$XMMRegister); 2570 %} 2571 ins_pipe(pipe_slow); 2572 %} 2573 2574 instruct mulF_mem(regF dst, memory src) %{ 2575 predicate((UseSSE>=1) && (UseAVX == 0)); 2576 match(Set dst (MulF dst (LoadF src))); 2577 2578 format %{ "mulss $dst, $src" %} 2579 ins_cost(150); 2580 ins_encode %{ 2581 __ mulss($dst$$XMMRegister, $src$$Address); 2582 %} 2583 ins_pipe(pipe_slow); 2584 %} 2585 2586 instruct mulF_imm(regF dst, immF con) %{ 2587 predicate((UseSSE>=1) && (UseAVX == 0)); 2588 match(Set dst (MulF dst con)); 2589 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2590 ins_cost(150); 2591 ins_encode %{ 2592 __ mulss($dst$$XMMRegister, $constantaddress($con)); 2593 %} 2594 ins_pipe(pipe_slow); 2595 %} 2596 2597 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{ 2598 predicate(UseAVX > 0); 2599 match(Set dst (MulF src1 src2)); 2600 2601 format %{ "vmulss $dst, $src1, $src2" %} 2602 ins_cost(150); 2603 ins_encode %{ 2604 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2605 %} 2606 ins_pipe(pipe_slow); 2607 %} 2608 2609 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{ 2610 predicate(UseAVX > 0); 2611 match(Set dst (MulF src1 (LoadF src2))); 2612 2613 format %{ "vmulss $dst, $src1, $src2" %} 2614 ins_cost(150); 2615 ins_encode %{ 2616 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2617 %} 2618 ins_pipe(pipe_slow); 2619 %} 2620 2621 instruct mulF_reg_imm(regF dst, regF src, immF con) %{ 2622 predicate(UseAVX > 0); 2623 match(Set dst (MulF src con)); 2624 2625 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2626 ins_cost(150); 2627 ins_encode %{ 2628 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2629 %} 2630 ins_pipe(pipe_slow); 2631 %} 2632 2633 instruct mulD_reg(regD dst, regD src) %{ 2634 predicate((UseSSE>=2) && (UseAVX == 0)); 2635 match(Set dst (MulD dst src)); 2636 2637 format %{ "mulsd $dst, $src" %} 2638 ins_cost(150); 2639 ins_encode %{ 2640 __ mulsd($dst$$XMMRegister, $src$$XMMRegister); 2641 %} 2642 ins_pipe(pipe_slow); 2643 %} 2644 2645 instruct mulD_mem(regD dst, memory src) %{ 2646 predicate((UseSSE>=2) && (UseAVX == 0)); 2647 match(Set dst (MulD dst (LoadD src))); 2648 2649 format %{ "mulsd $dst, $src" %} 2650 ins_cost(150); 2651 ins_encode %{ 2652 __ mulsd($dst$$XMMRegister, $src$$Address); 2653 %} 2654 ins_pipe(pipe_slow); 2655 %} 2656 2657 instruct mulD_imm(regD dst, immD con) %{ 2658 predicate((UseSSE>=2) && (UseAVX == 0)); 2659 match(Set dst (MulD dst con)); 2660 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2661 ins_cost(150); 2662 ins_encode %{ 2663 __ mulsd($dst$$XMMRegister, $constantaddress($con)); 2664 %} 2665 ins_pipe(pipe_slow); 2666 %} 2667 2668 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{ 2669 predicate(UseAVX > 0); 2670 match(Set dst (MulD src1 src2)); 2671 2672 format %{ "vmulsd $dst, $src1, $src2" %} 2673 ins_cost(150); 2674 ins_encode %{ 2675 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2676 %} 2677 ins_pipe(pipe_slow); 2678 %} 2679 2680 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{ 2681 predicate(UseAVX > 0); 2682 match(Set dst (MulD src1 (LoadD src2))); 2683 2684 format %{ "vmulsd $dst, $src1, $src2" %} 2685 ins_cost(150); 2686 ins_encode %{ 2687 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2688 %} 2689 ins_pipe(pipe_slow); 2690 %} 2691 2692 instruct mulD_reg_imm(regD dst, regD src, immD con) %{ 2693 predicate(UseAVX > 0); 2694 match(Set dst (MulD src con)); 2695 2696 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2697 ins_cost(150); 2698 ins_encode %{ 2699 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2700 %} 2701 ins_pipe(pipe_slow); 2702 %} 2703 2704 instruct divF_reg(regF dst, regF src) %{ 2705 predicate((UseSSE>=1) && (UseAVX == 0)); 2706 match(Set dst (DivF dst src)); 2707 2708 format %{ "divss $dst, $src" %} 2709 ins_cost(150); 2710 ins_encode %{ 2711 __ divss($dst$$XMMRegister, $src$$XMMRegister); 2712 %} 2713 ins_pipe(pipe_slow); 2714 %} 2715 2716 instruct divF_mem(regF dst, memory src) %{ 2717 predicate((UseSSE>=1) && (UseAVX == 0)); 2718 match(Set dst (DivF dst (LoadF src))); 2719 2720 format %{ "divss $dst, $src" %} 2721 ins_cost(150); 2722 ins_encode %{ 2723 __ divss($dst$$XMMRegister, $src$$Address); 2724 %} 2725 ins_pipe(pipe_slow); 2726 %} 2727 2728 instruct divF_imm(regF dst, immF con) %{ 2729 predicate((UseSSE>=1) && (UseAVX == 0)); 2730 match(Set dst (DivF dst con)); 2731 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2732 ins_cost(150); 2733 ins_encode %{ 2734 __ divss($dst$$XMMRegister, $constantaddress($con)); 2735 %} 2736 ins_pipe(pipe_slow); 2737 %} 2738 2739 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{ 2740 predicate(UseAVX > 0); 2741 match(Set dst (DivF src1 src2)); 2742 2743 format %{ "vdivss $dst, $src1, $src2" %} 2744 ins_cost(150); 2745 ins_encode %{ 2746 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2747 %} 2748 ins_pipe(pipe_slow); 2749 %} 2750 2751 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{ 2752 predicate(UseAVX > 0); 2753 match(Set dst (DivF src1 (LoadF src2))); 2754 2755 format %{ "vdivss $dst, $src1, $src2" %} 2756 ins_cost(150); 2757 ins_encode %{ 2758 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2759 %} 2760 ins_pipe(pipe_slow); 2761 %} 2762 2763 instruct divF_reg_imm(regF dst, regF src, immF con) %{ 2764 predicate(UseAVX > 0); 2765 match(Set dst (DivF src con)); 2766 2767 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2768 ins_cost(150); 2769 ins_encode %{ 2770 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2771 %} 2772 ins_pipe(pipe_slow); 2773 %} 2774 2775 instruct divD_reg(regD dst, regD src) %{ 2776 predicate((UseSSE>=2) && (UseAVX == 0)); 2777 match(Set dst (DivD dst src)); 2778 2779 format %{ "divsd $dst, $src" %} 2780 ins_cost(150); 2781 ins_encode %{ 2782 __ divsd($dst$$XMMRegister, $src$$XMMRegister); 2783 %} 2784 ins_pipe(pipe_slow); 2785 %} 2786 2787 instruct divD_mem(regD dst, memory src) %{ 2788 predicate((UseSSE>=2) && (UseAVX == 0)); 2789 match(Set dst (DivD dst (LoadD src))); 2790 2791 format %{ "divsd $dst, $src" %} 2792 ins_cost(150); 2793 ins_encode %{ 2794 __ divsd($dst$$XMMRegister, $src$$Address); 2795 %} 2796 ins_pipe(pipe_slow); 2797 %} 2798 2799 instruct divD_imm(regD dst, immD con) %{ 2800 predicate((UseSSE>=2) && (UseAVX == 0)); 2801 match(Set dst (DivD dst con)); 2802 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2803 ins_cost(150); 2804 ins_encode %{ 2805 __ divsd($dst$$XMMRegister, $constantaddress($con)); 2806 %} 2807 ins_pipe(pipe_slow); 2808 %} 2809 2810 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{ 2811 predicate(UseAVX > 0); 2812 match(Set dst (DivD src1 src2)); 2813 2814 format %{ "vdivsd $dst, $src1, $src2" %} 2815 ins_cost(150); 2816 ins_encode %{ 2817 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2818 %} 2819 ins_pipe(pipe_slow); 2820 %} 2821 2822 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{ 2823 predicate(UseAVX > 0); 2824 match(Set dst (DivD src1 (LoadD src2))); 2825 2826 format %{ "vdivsd $dst, $src1, $src2" %} 2827 ins_cost(150); 2828 ins_encode %{ 2829 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2830 %} 2831 ins_pipe(pipe_slow); 2832 %} 2833 2834 instruct divD_reg_imm(regD dst, regD src, immD con) %{ 2835 predicate(UseAVX > 0); 2836 match(Set dst (DivD src con)); 2837 2838 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2839 ins_cost(150); 2840 ins_encode %{ 2841 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2842 %} 2843 ins_pipe(pipe_slow); 2844 %} 2845 2846 instruct absF_reg(regF dst) %{ 2847 predicate((UseSSE>=1) && (UseAVX == 0)); 2848 match(Set dst (AbsF dst)); 2849 ins_cost(150); 2850 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %} 2851 ins_encode %{ 2852 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask())); 2853 %} 2854 ins_pipe(pipe_slow); 2855 %} 2856 2857 instruct absF_reg_reg(regF dst, regF src) %{ 2858 predicate(VM_Version::supports_avxonly()); 2859 match(Set dst (AbsF src)); 2860 ins_cost(150); 2861 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 2862 ins_encode %{ 2863 int vector_len = 0; 2864 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 2865 ExternalAddress(float_signmask()), vector_len); 2866 %} 2867 ins_pipe(pipe_slow); 2868 %} 2869 2870 #ifdef _LP64 2871 instruct absF_reg_reg_evex(regF dst, regF src) %{ 2872 predicate(UseAVX > 2 && VM_Version::supports_avx512vl()); 2873 match(Set dst (AbsF src)); 2874 ins_cost(150); 2875 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 2876 ins_encode %{ 2877 int vector_len = 0; 2878 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 2879 ExternalAddress(float_signmask()), vector_len); 2880 %} 2881 ins_pipe(pipe_slow); 2882 %} 2883 2884 instruct absF_reg_reg_evex_special(regF dst, regF src1, regF src2) %{ 2885 predicate(VM_Version::supports_avx512novl()); 2886 match(Set dst (AbsF src1)); 2887 effect(TEMP src2); 2888 ins_cost(150); 2889 format %{ "vabsss $dst, $src1, $src2, [0x7fffffff]\t# abs float by sign masking" %} 2890 ins_encode %{ 2891 int vector_len = 0; 2892 __ vabsss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 2893 ExternalAddress(float_signmask()), vector_len); 2894 %} 2895 ins_pipe(pipe_slow); 2896 %} 2897 #else // _LP64 2898 instruct absF_reg_reg_evex(regF dst, regF src) %{ 2899 predicate(UseAVX > 2); 2900 match(Set dst (AbsF src)); 2901 ins_cost(150); 2902 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 2903 ins_encode %{ 2904 int vector_len = 0; 2905 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 2906 ExternalAddress(float_signmask()), vector_len); 2907 %} 2908 ins_pipe(pipe_slow); 2909 %} 2910 #endif 2911 2912 instruct absD_reg(regD dst) %{ 2913 predicate((UseSSE>=2) && (UseAVX == 0)); 2914 match(Set dst (AbsD dst)); 2915 ins_cost(150); 2916 format %{ "andpd $dst, [0x7fffffffffffffff]\t" 2917 "# abs double by sign masking" %} 2918 ins_encode %{ 2919 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask())); 2920 %} 2921 ins_pipe(pipe_slow); 2922 %} 2923 2924 instruct absD_reg_reg(regD dst, regD src) %{ 2925 predicate(VM_Version::supports_avxonly()); 2926 match(Set dst (AbsD src)); 2927 ins_cost(150); 2928 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 2929 "# abs double by sign masking" %} 2930 ins_encode %{ 2931 int vector_len = 0; 2932 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 2933 ExternalAddress(double_signmask()), vector_len); 2934 %} 2935 ins_pipe(pipe_slow); 2936 %} 2937 2938 #ifdef _LP64 2939 instruct absD_reg_reg_evex(regD dst, regD src) %{ 2940 predicate(UseAVX > 2 && VM_Version::supports_avx512vl()); 2941 match(Set dst (AbsD src)); 2942 ins_cost(150); 2943 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 2944 "# abs double by sign masking" %} 2945 ins_encode %{ 2946 int vector_len = 0; 2947 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 2948 ExternalAddress(double_signmask()), vector_len); 2949 %} 2950 ins_pipe(pipe_slow); 2951 %} 2952 2953 instruct absD_reg_reg_evex_special(regD dst, regD src1, regD src2) %{ 2954 predicate(VM_Version::supports_avx512novl()); 2955 match(Set dst (AbsD src1)); 2956 effect(TEMP src2); 2957 ins_cost(150); 2958 format %{ "vabssd $dst, $src1, $src2, [0x7fffffffffffffff]\t# abs float by sign masking" %} 2959 ins_encode %{ 2960 int vector_len = 0; 2961 __ vabssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 2962 ExternalAddress(double_signmask()), vector_len); 2963 %} 2964 ins_pipe(pipe_slow); 2965 %} 2966 #else // _LP64 2967 instruct absD_reg_reg_evex(regD dst, regD src) %{ 2968 predicate(UseAVX > 2); 2969 match(Set dst (AbsD src)); 2970 ins_cost(150); 2971 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 2972 "# abs double by sign masking" %} 2973 ins_encode %{ 2974 int vector_len = 0; 2975 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 2976 ExternalAddress(double_signmask()), vector_len); 2977 %} 2978 ins_pipe(pipe_slow); 2979 %} 2980 #endif 2981 2982 instruct negF_reg(regF dst) %{ 2983 predicate((UseSSE>=1) && (UseAVX == 0)); 2984 match(Set dst (NegF dst)); 2985 ins_cost(150); 2986 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %} 2987 ins_encode %{ 2988 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip())); 2989 %} 2990 ins_pipe(pipe_slow); 2991 %} 2992 2993 instruct negF_reg_reg(regF dst, regF src) %{ 2994 predicate(UseAVX > 0); 2995 match(Set dst (NegF src)); 2996 ins_cost(150); 2997 format %{ "vnegatess $dst, $src, [0x80000000]\t# neg float by sign flipping" %} 2998 ins_encode %{ 2999 __ vnegatess($dst$$XMMRegister, $src$$XMMRegister, 3000 ExternalAddress(float_signflip())); 3001 %} 3002 ins_pipe(pipe_slow); 3003 %} 3004 3005 instruct negD_reg(regD dst) %{ 3006 predicate((UseSSE>=2) && (UseAVX == 0)); 3007 match(Set dst (NegD dst)); 3008 ins_cost(150); 3009 format %{ "xorpd $dst, [0x8000000000000000]\t" 3010 "# neg double by sign flipping" %} 3011 ins_encode %{ 3012 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip())); 3013 %} 3014 ins_pipe(pipe_slow); 3015 %} 3016 3017 instruct negD_reg_reg(regD dst, regD src) %{ 3018 predicate(UseAVX > 0); 3019 match(Set dst (NegD src)); 3020 ins_cost(150); 3021 format %{ "vnegatess $dst, $src, [0x8000000000000000]\t" 3022 "# neg double by sign flipping" %} 3023 ins_encode %{ 3024 __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister, 3025 ExternalAddress(double_signflip())); 3026 %} 3027 ins_pipe(pipe_slow); 3028 %} 3029 3030 instruct sqrtF_reg(regF dst, regF src) %{ 3031 predicate(UseSSE>=1); 3032 match(Set dst (ConvD2F (SqrtD (ConvF2D src)))); 3033 3034 format %{ "sqrtss $dst, $src" %} 3035 ins_cost(150); 3036 ins_encode %{ 3037 __ sqrtss($dst$$XMMRegister, $src$$XMMRegister); 3038 %} 3039 ins_pipe(pipe_slow); 3040 %} 3041 3042 instruct sqrtF_mem(regF dst, memory src) %{ 3043 predicate(UseSSE>=1); 3044 match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF src))))); 3045 3046 format %{ "sqrtss $dst, $src" %} 3047 ins_cost(150); 3048 ins_encode %{ 3049 __ sqrtss($dst$$XMMRegister, $src$$Address); 3050 %} 3051 ins_pipe(pipe_slow); 3052 %} 3053 3054 instruct sqrtF_imm(regF dst, immF con) %{ 3055 predicate(UseSSE>=1); 3056 match(Set dst (ConvD2F (SqrtD (ConvF2D con)))); 3057 format %{ "sqrtss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 3058 ins_cost(150); 3059 ins_encode %{ 3060 __ sqrtss($dst$$XMMRegister, $constantaddress($con)); 3061 %} 3062 ins_pipe(pipe_slow); 3063 %} 3064 3065 instruct sqrtD_reg(regD dst, regD src) %{ 3066 predicate(UseSSE>=2); 3067 match(Set dst (SqrtD src)); 3068 3069 format %{ "sqrtsd $dst, $src" %} 3070 ins_cost(150); 3071 ins_encode %{ 3072 __ sqrtsd($dst$$XMMRegister, $src$$XMMRegister); 3073 %} 3074 ins_pipe(pipe_slow); 3075 %} 3076 3077 instruct sqrtD_mem(regD dst, memory src) %{ 3078 predicate(UseSSE>=2); 3079 match(Set dst (SqrtD (LoadD src))); 3080 3081 format %{ "sqrtsd $dst, $src" %} 3082 ins_cost(150); 3083 ins_encode %{ 3084 __ sqrtsd($dst$$XMMRegister, $src$$Address); 3085 %} 3086 ins_pipe(pipe_slow); 3087 %} 3088 3089 instruct sqrtD_imm(regD dst, immD con) %{ 3090 predicate(UseSSE>=2); 3091 match(Set dst (SqrtD con)); 3092 format %{ "sqrtsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 3093 ins_cost(150); 3094 ins_encode %{ 3095 __ sqrtsd($dst$$XMMRegister, $constantaddress($con)); 3096 %} 3097 ins_pipe(pipe_slow); 3098 %} 3099 3100 instruct onspinwait() %{ 3101 match(OnSpinWait); 3102 ins_cost(200); 3103 3104 format %{ 3105 $$template 3106 if (os::is_MP()) { 3107 $$emit$$"pause\t! membar_onspinwait" 3108 } else { 3109 $$emit$$"MEMBAR-onspinwait ! (empty encoding)" 3110 } 3111 %} 3112 ins_encode %{ 3113 __ pause(); 3114 %} 3115 ins_pipe(pipe_slow); 3116 %} 3117 3118 // ====================VECTOR INSTRUCTIONS===================================== 3119 3120 // Load vectors (4 bytes long) 3121 instruct loadV4(vecS dst, memory mem) %{ 3122 predicate(n->as_LoadVector()->memory_size() == 4); 3123 match(Set dst (LoadVector mem)); 3124 ins_cost(125); 3125 format %{ "movd $dst,$mem\t! load vector (4 bytes)" %} 3126 ins_encode %{ 3127 __ movdl($dst$$XMMRegister, $mem$$Address); 3128 %} 3129 ins_pipe( pipe_slow ); 3130 %} 3131 3132 // Load vectors (8 bytes long) 3133 instruct loadV8(vecD dst, memory mem) %{ 3134 predicate(n->as_LoadVector()->memory_size() == 8); 3135 match(Set dst (LoadVector mem)); 3136 ins_cost(125); 3137 format %{ "movq $dst,$mem\t! load vector (8 bytes)" %} 3138 ins_encode %{ 3139 __ movq($dst$$XMMRegister, $mem$$Address); 3140 %} 3141 ins_pipe( pipe_slow ); 3142 %} 3143 3144 // Load vectors (16 bytes long) 3145 instruct loadV16(vecX dst, memory mem) %{ 3146 predicate(n->as_LoadVector()->memory_size() == 16); 3147 match(Set dst (LoadVector mem)); 3148 ins_cost(125); 3149 format %{ "movdqu $dst,$mem\t! load vector (16 bytes)" %} 3150 ins_encode %{ 3151 __ movdqu($dst$$XMMRegister, $mem$$Address); 3152 %} 3153 ins_pipe( pipe_slow ); 3154 %} 3155 3156 // Load vectors (32 bytes long) 3157 instruct loadV32(vecY dst, memory mem) %{ 3158 predicate(n->as_LoadVector()->memory_size() == 32); 3159 match(Set dst (LoadVector mem)); 3160 ins_cost(125); 3161 format %{ "vmovdqu $dst,$mem\t! load vector (32 bytes)" %} 3162 ins_encode %{ 3163 __ vmovdqu($dst$$XMMRegister, $mem$$Address); 3164 %} 3165 ins_pipe( pipe_slow ); 3166 %} 3167 3168 // Load vectors (64 bytes long) 3169 instruct loadV64_dword(vecZ dst, memory mem) %{ 3170 predicate(n->as_LoadVector()->memory_size() == 64 && n->as_LoadVector()->element_size() <= 4); 3171 match(Set dst (LoadVector mem)); 3172 ins_cost(125); 3173 format %{ "vmovdqul $dst k0,$mem\t! load vector (64 bytes)" %} 3174 ins_encode %{ 3175 int vector_len = 2; 3176 __ evmovdqul($dst$$XMMRegister, $mem$$Address, vector_len); 3177 %} 3178 ins_pipe( pipe_slow ); 3179 %} 3180 3181 // Load vectors (64 bytes long) 3182 instruct loadV64_qword(vecZ dst, memory mem) %{ 3183 predicate(n->as_LoadVector()->memory_size() == 64 && n->as_LoadVector()->element_size() > 4); 3184 match(Set dst (LoadVector mem)); 3185 ins_cost(125); 3186 format %{ "vmovdquq $dst k0,$mem\t! load vector (64 bytes)" %} 3187 ins_encode %{ 3188 int vector_len = 2; 3189 __ evmovdquq($dst$$XMMRegister, $mem$$Address, vector_len); 3190 %} 3191 ins_pipe( pipe_slow ); 3192 %} 3193 3194 // Store vectors 3195 instruct storeV4(memory mem, vecS src) %{ 3196 predicate(n->as_StoreVector()->memory_size() == 4); 3197 match(Set mem (StoreVector mem src)); 3198 ins_cost(145); 3199 format %{ "movd $mem,$src\t! store vector (4 bytes)" %} 3200 ins_encode %{ 3201 __ movdl($mem$$Address, $src$$XMMRegister); 3202 %} 3203 ins_pipe( pipe_slow ); 3204 %} 3205 3206 instruct storeV8(memory mem, vecD src) %{ 3207 predicate(n->as_StoreVector()->memory_size() == 8); 3208 match(Set mem (StoreVector mem src)); 3209 ins_cost(145); 3210 format %{ "movq $mem,$src\t! store vector (8 bytes)" %} 3211 ins_encode %{ 3212 __ movq($mem$$Address, $src$$XMMRegister); 3213 %} 3214 ins_pipe( pipe_slow ); 3215 %} 3216 3217 instruct storeV16(memory mem, vecX src) %{ 3218 predicate(n->as_StoreVector()->memory_size() == 16); 3219 match(Set mem (StoreVector mem src)); 3220 ins_cost(145); 3221 format %{ "movdqu $mem,$src\t! store vector (16 bytes)" %} 3222 ins_encode %{ 3223 __ movdqu($mem$$Address, $src$$XMMRegister); 3224 %} 3225 ins_pipe( pipe_slow ); 3226 %} 3227 3228 instruct storeV32(memory mem, vecY src) %{ 3229 predicate(n->as_StoreVector()->memory_size() == 32); 3230 match(Set mem (StoreVector mem src)); 3231 ins_cost(145); 3232 format %{ "vmovdqu $mem,$src\t! store vector (32 bytes)" %} 3233 ins_encode %{ 3234 __ vmovdqu($mem$$Address, $src$$XMMRegister); 3235 %} 3236 ins_pipe( pipe_slow ); 3237 %} 3238 3239 instruct storeV64_dword(memory mem, vecZ src) %{ 3240 predicate(n->as_StoreVector()->memory_size() == 64 && n->as_StoreVector()->element_size() <= 4); 3241 match(Set mem (StoreVector mem src)); 3242 ins_cost(145); 3243 format %{ "vmovdqul $mem k0,$src\t! store vector (64 bytes)" %} 3244 ins_encode %{ 3245 int vector_len = 2; 3246 __ evmovdqul($mem$$Address, $src$$XMMRegister, vector_len); 3247 %} 3248 ins_pipe( pipe_slow ); 3249 %} 3250 3251 instruct storeV64_qword(memory mem, vecZ src) %{ 3252 predicate(n->as_StoreVector()->memory_size() == 64 && n->as_StoreVector()->element_size() > 4); 3253 match(Set mem (StoreVector mem src)); 3254 ins_cost(145); 3255 format %{ "vmovdquq $mem k0,$src\t! store vector (64 bytes)" %} 3256 ins_encode %{ 3257 int vector_len = 2; 3258 __ evmovdquq($mem$$Address, $src$$XMMRegister, vector_len); 3259 %} 3260 ins_pipe( pipe_slow ); 3261 %} 3262 3263 // ====================LEGACY REPLICATE======================================= 3264 3265 instruct Repl4B_mem(vecS dst, memory mem) %{ 3266 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3267 match(Set dst (ReplicateB (LoadB mem))); 3268 format %{ "punpcklbw $dst,$mem\n\t" 3269 "pshuflw $dst,$dst,0x00\t! replicate4B" %} 3270 ins_encode %{ 3271 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 3272 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3273 %} 3274 ins_pipe( pipe_slow ); 3275 %} 3276 3277 instruct Repl8B_mem(vecD dst, memory mem) %{ 3278 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3279 match(Set dst (ReplicateB (LoadB mem))); 3280 format %{ "punpcklbw $dst,$mem\n\t" 3281 "pshuflw $dst,$dst,0x00\t! replicate8B" %} 3282 ins_encode %{ 3283 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 3284 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3285 %} 3286 ins_pipe( pipe_slow ); 3287 %} 3288 3289 instruct Repl16B(vecX dst, rRegI src) %{ 3290 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3291 match(Set dst (ReplicateB src)); 3292 format %{ "movd $dst,$src\n\t" 3293 "punpcklbw $dst,$dst\n\t" 3294 "pshuflw $dst,$dst,0x00\n\t" 3295 "punpcklqdq $dst,$dst\t! replicate16B" %} 3296 ins_encode %{ 3297 __ movdl($dst$$XMMRegister, $src$$Register); 3298 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3299 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3300 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3301 %} 3302 ins_pipe( pipe_slow ); 3303 %} 3304 3305 instruct Repl16B_mem(vecX dst, memory mem) %{ 3306 predicate(n->as_Vector()->length() == 16 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3307 match(Set dst (ReplicateB (LoadB mem))); 3308 format %{ "punpcklbw $dst,$mem\n\t" 3309 "pshuflw $dst,$dst,0x00\n\t" 3310 "punpcklqdq $dst,$dst\t! replicate16B" %} 3311 ins_encode %{ 3312 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 3313 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3314 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3315 %} 3316 ins_pipe( pipe_slow ); 3317 %} 3318 3319 instruct Repl32B(vecY dst, rRegI src) %{ 3320 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3321 match(Set dst (ReplicateB src)); 3322 format %{ "movd $dst,$src\n\t" 3323 "punpcklbw $dst,$dst\n\t" 3324 "pshuflw $dst,$dst,0x00\n\t" 3325 "punpcklqdq $dst,$dst\n\t" 3326 "vinserti128_high $dst,$dst\t! replicate32B" %} 3327 ins_encode %{ 3328 __ movdl($dst$$XMMRegister, $src$$Register); 3329 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3330 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3331 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3332 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3333 %} 3334 ins_pipe( pipe_slow ); 3335 %} 3336 3337 instruct Repl32B_mem(vecY dst, memory mem) %{ 3338 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3339 match(Set dst (ReplicateB (LoadB mem))); 3340 format %{ "punpcklbw $dst,$mem\n\t" 3341 "pshuflw $dst,$dst,0x00\n\t" 3342 "punpcklqdq $dst,$dst\n\t" 3343 "vinserti128_high $dst,$dst\t! replicate32B" %} 3344 ins_encode %{ 3345 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 3346 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3347 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3348 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3349 %} 3350 ins_pipe( pipe_slow ); 3351 %} 3352 3353 instruct Repl16B_imm(vecX dst, immI con) %{ 3354 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3355 match(Set dst (ReplicateB con)); 3356 format %{ "movq $dst,[$constantaddress]\n\t" 3357 "punpcklqdq $dst,$dst\t! replicate16B($con)" %} 3358 ins_encode %{ 3359 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3360 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3361 %} 3362 ins_pipe( pipe_slow ); 3363 %} 3364 3365 instruct Repl32B_imm(vecY dst, immI con) %{ 3366 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3367 match(Set dst (ReplicateB con)); 3368 format %{ "movq $dst,[$constantaddress]\n\t" 3369 "punpcklqdq $dst,$dst\n\t" 3370 "vinserti128_high $dst,$dst\t! lreplicate32B($con)" %} 3371 ins_encode %{ 3372 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3373 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3374 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3375 %} 3376 ins_pipe( pipe_slow ); 3377 %} 3378 3379 instruct Repl4S(vecD dst, rRegI src) %{ 3380 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vlbw()); 3381 match(Set dst (ReplicateS src)); 3382 format %{ "movd $dst,$src\n\t" 3383 "pshuflw $dst,$dst,0x00\t! replicate4S" %} 3384 ins_encode %{ 3385 __ movdl($dst$$XMMRegister, $src$$Register); 3386 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3387 %} 3388 ins_pipe( pipe_slow ); 3389 %} 3390 3391 instruct Repl4S_mem(vecD dst, memory mem) %{ 3392 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3393 match(Set dst (ReplicateS (LoadS mem))); 3394 format %{ "pshuflw $dst,$mem,0x00\t! replicate4S" %} 3395 ins_encode %{ 3396 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3397 %} 3398 ins_pipe( pipe_slow ); 3399 %} 3400 3401 instruct Repl8S(vecX dst, rRegI src) %{ 3402 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vlbw()); 3403 match(Set dst (ReplicateS src)); 3404 format %{ "movd $dst,$src\n\t" 3405 "pshuflw $dst,$dst,0x00\n\t" 3406 "punpcklqdq $dst,$dst\t! replicate8S" %} 3407 ins_encode %{ 3408 __ movdl($dst$$XMMRegister, $src$$Register); 3409 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3410 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3411 %} 3412 ins_pipe( pipe_slow ); 3413 %} 3414 3415 instruct Repl8S_mem(vecX dst, memory mem) %{ 3416 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3417 match(Set dst (ReplicateS (LoadS mem))); 3418 format %{ "pshuflw $dst,$mem,0x00\n\t" 3419 "punpcklqdq $dst,$dst\t! replicate8S" %} 3420 ins_encode %{ 3421 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3422 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3423 %} 3424 ins_pipe( pipe_slow ); 3425 %} 3426 3427 instruct Repl8S_imm(vecX dst, immI con) %{ 3428 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vlbw()); 3429 match(Set dst (ReplicateS con)); 3430 format %{ "movq $dst,[$constantaddress]\n\t" 3431 "punpcklqdq $dst,$dst\t! replicate8S($con)" %} 3432 ins_encode %{ 3433 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3434 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3435 %} 3436 ins_pipe( pipe_slow ); 3437 %} 3438 3439 instruct Repl16S(vecY dst, rRegI src) %{ 3440 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3441 match(Set dst (ReplicateS src)); 3442 format %{ "movd $dst,$src\n\t" 3443 "pshuflw $dst,$dst,0x00\n\t" 3444 "punpcklqdq $dst,$dst\n\t" 3445 "vinserti128_high $dst,$dst\t! replicate16S" %} 3446 ins_encode %{ 3447 __ movdl($dst$$XMMRegister, $src$$Register); 3448 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3449 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3450 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3451 %} 3452 ins_pipe( pipe_slow ); 3453 %} 3454 3455 instruct Repl16S_mem(vecY dst, memory mem) %{ 3456 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3457 match(Set dst (ReplicateS (LoadS mem))); 3458 format %{ "pshuflw $dst,$mem,0x00\n\t" 3459 "punpcklqdq $dst,$dst\n\t" 3460 "vinserti128_high $dst,$dst\t! replicate16S" %} 3461 ins_encode %{ 3462 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3463 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3464 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3465 %} 3466 ins_pipe( pipe_slow ); 3467 %} 3468 3469 instruct Repl16S_imm(vecY dst, immI con) %{ 3470 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3471 match(Set dst (ReplicateS con)); 3472 format %{ "movq $dst,[$constantaddress]\n\t" 3473 "punpcklqdq $dst,$dst\n\t" 3474 "vinserti128_high $dst,$dst\t! replicate16S($con)" %} 3475 ins_encode %{ 3476 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3477 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3478 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3479 %} 3480 ins_pipe( pipe_slow ); 3481 %} 3482 3483 instruct Repl4I(vecX dst, rRegI src) %{ 3484 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3485 match(Set dst (ReplicateI src)); 3486 format %{ "movd $dst,$src\n\t" 3487 "pshufd $dst,$dst,0x00\t! replicate4I" %} 3488 ins_encode %{ 3489 __ movdl($dst$$XMMRegister, $src$$Register); 3490 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3491 %} 3492 ins_pipe( pipe_slow ); 3493 %} 3494 3495 instruct Repl4I_mem(vecX dst, memory mem) %{ 3496 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3497 match(Set dst (ReplicateI (LoadI mem))); 3498 format %{ "pshufd $dst,$mem,0x00\t! replicate4I" %} 3499 ins_encode %{ 3500 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3501 %} 3502 ins_pipe( pipe_slow ); 3503 %} 3504 3505 instruct Repl8I(vecY dst, rRegI src) %{ 3506 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3507 match(Set dst (ReplicateI src)); 3508 format %{ "movd $dst,$src\n\t" 3509 "pshufd $dst,$dst,0x00\n\t" 3510 "vinserti128_high $dst,$dst\t! replicate8I" %} 3511 ins_encode %{ 3512 __ movdl($dst$$XMMRegister, $src$$Register); 3513 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3514 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3515 %} 3516 ins_pipe( pipe_slow ); 3517 %} 3518 3519 instruct Repl8I_mem(vecY dst, memory mem) %{ 3520 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3521 match(Set dst (ReplicateI (LoadI mem))); 3522 format %{ "pshufd $dst,$mem,0x00\n\t" 3523 "vinserti128_high $dst,$dst\t! replicate8I" %} 3524 ins_encode %{ 3525 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3526 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3527 %} 3528 ins_pipe( pipe_slow ); 3529 %} 3530 3531 instruct Repl4I_imm(vecX dst, immI con) %{ 3532 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3533 match(Set dst (ReplicateI con)); 3534 format %{ "movq $dst,[$constantaddress]\t! replicate4I($con)\n\t" 3535 "punpcklqdq $dst,$dst" %} 3536 ins_encode %{ 3537 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3538 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3539 %} 3540 ins_pipe( pipe_slow ); 3541 %} 3542 3543 instruct Repl8I_imm(vecY dst, immI con) %{ 3544 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3545 match(Set dst (ReplicateI con)); 3546 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 3547 "punpcklqdq $dst,$dst\n\t" 3548 "vinserti128_high $dst,$dst" %} 3549 ins_encode %{ 3550 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3551 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3552 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3553 %} 3554 ins_pipe( pipe_slow ); 3555 %} 3556 3557 // Long could be loaded into xmm register directly from memory. 3558 instruct Repl2L_mem(vecX dst, memory mem) %{ 3559 predicate(n->as_Vector()->length() == 2 && !VM_Version::supports_avx512vlbw()); 3560 match(Set dst (ReplicateL (LoadL mem))); 3561 format %{ "movq $dst,$mem\n\t" 3562 "punpcklqdq $dst,$dst\t! replicate2L" %} 3563 ins_encode %{ 3564 __ movq($dst$$XMMRegister, $mem$$Address); 3565 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3566 %} 3567 ins_pipe( pipe_slow ); 3568 %} 3569 3570 // Replicate long (8 byte) scalar to be vector 3571 #ifdef _LP64 3572 instruct Repl4L(vecY dst, rRegL src) %{ 3573 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3574 match(Set dst (ReplicateL src)); 3575 format %{ "movdq $dst,$src\n\t" 3576 "punpcklqdq $dst,$dst\n\t" 3577 "vinserti128_high $dst,$dst\t! replicate4L" %} 3578 ins_encode %{ 3579 __ movdq($dst$$XMMRegister, $src$$Register); 3580 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3581 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3582 %} 3583 ins_pipe( pipe_slow ); 3584 %} 3585 #else // _LP64 3586 instruct Repl4L(vecY dst, eRegL src, regD tmp) %{ 3587 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3588 match(Set dst (ReplicateL src)); 3589 effect(TEMP dst, USE src, TEMP tmp); 3590 format %{ "movdl $dst,$src.lo\n\t" 3591 "movdl $tmp,$src.hi\n\t" 3592 "punpckldq $dst,$tmp\n\t" 3593 "punpcklqdq $dst,$dst\n\t" 3594 "vinserti128_high $dst,$dst\t! replicate4L" %} 3595 ins_encode %{ 3596 __ movdl($dst$$XMMRegister, $src$$Register); 3597 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3598 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3599 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3600 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3601 %} 3602 ins_pipe( pipe_slow ); 3603 %} 3604 #endif // _LP64 3605 3606 instruct Repl4L_imm(vecY dst, immL con) %{ 3607 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3608 match(Set dst (ReplicateL con)); 3609 format %{ "movq $dst,[$constantaddress]\n\t" 3610 "punpcklqdq $dst,$dst\n\t" 3611 "vinserti128_high $dst,$dst\t! replicate4L($con)" %} 3612 ins_encode %{ 3613 __ movq($dst$$XMMRegister, $constantaddress($con)); 3614 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3615 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3616 %} 3617 ins_pipe( pipe_slow ); 3618 %} 3619 3620 instruct Repl4L_mem(vecY dst, memory mem) %{ 3621 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3622 match(Set dst (ReplicateL (LoadL mem))); 3623 format %{ "movq $dst,$mem\n\t" 3624 "punpcklqdq $dst,$dst\n\t" 3625 "vinserti128_high $dst,$dst\t! replicate4L" %} 3626 ins_encode %{ 3627 __ movq($dst$$XMMRegister, $mem$$Address); 3628 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3629 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3630 %} 3631 ins_pipe( pipe_slow ); 3632 %} 3633 3634 instruct Repl2F_mem(vecD dst, memory mem) %{ 3635 predicate(n->as_Vector()->length() == 2 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3636 match(Set dst (ReplicateF (LoadF mem))); 3637 format %{ "pshufd $dst,$mem,0x00\t! replicate2F" %} 3638 ins_encode %{ 3639 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3640 %} 3641 ins_pipe( pipe_slow ); 3642 %} 3643 3644 instruct Repl4F_mem(vecX dst, memory mem) %{ 3645 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3646 match(Set dst (ReplicateF (LoadF mem))); 3647 format %{ "pshufd $dst,$mem,0x00\t! replicate4F" %} 3648 ins_encode %{ 3649 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3650 %} 3651 ins_pipe( pipe_slow ); 3652 %} 3653 3654 instruct Repl8F(vecY dst, regF src) %{ 3655 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3656 match(Set dst (ReplicateF src)); 3657 format %{ "pshufd $dst,$src,0x00\n\t" 3658 "vinsertf128_high $dst,$dst\t! replicate8F" %} 3659 ins_encode %{ 3660 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3661 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3662 %} 3663 ins_pipe( pipe_slow ); 3664 %} 3665 3666 instruct Repl8F_mem(vecY dst, memory mem) %{ 3667 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3668 match(Set dst (ReplicateF (LoadF mem))); 3669 format %{ "pshufd $dst,$mem,0x00\n\t" 3670 "vinsertf128_high $dst,$dst\t! replicate8F" %} 3671 ins_encode %{ 3672 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3673 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3674 %} 3675 ins_pipe( pipe_slow ); 3676 %} 3677 3678 instruct Repl2F_zero(vecD dst, immF0 zero) %{ 3679 predicate(n->as_Vector()->length() == 2 && UseAVX < 3); 3680 match(Set dst (ReplicateF zero)); 3681 format %{ "xorps $dst,$dst\t! replicate2F zero" %} 3682 ins_encode %{ 3683 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 3684 %} 3685 ins_pipe( fpu_reg_reg ); 3686 %} 3687 3688 instruct Repl4F_zero(vecX dst, immF0 zero) %{ 3689 predicate(n->as_Vector()->length() == 4 && UseAVX < 3); 3690 match(Set dst (ReplicateF zero)); 3691 format %{ "xorps $dst,$dst\t! replicate4F zero" %} 3692 ins_encode %{ 3693 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 3694 %} 3695 ins_pipe( fpu_reg_reg ); 3696 %} 3697 3698 instruct Repl8F_zero(vecY dst, immF0 zero) %{ 3699 predicate(n->as_Vector()->length() == 8 && UseAVX < 3); 3700 match(Set dst (ReplicateF zero)); 3701 format %{ "vxorps $dst,$dst,$dst\t! replicate8F zero" %} 3702 ins_encode %{ 3703 int vector_len = 1; 3704 __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3705 %} 3706 ins_pipe( fpu_reg_reg ); 3707 %} 3708 3709 instruct Repl2D_mem(vecX dst, memory mem) %{ 3710 predicate(n->as_Vector()->length() == 2 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3711 match(Set dst (ReplicateD (LoadD mem))); 3712 format %{ "pshufd $dst,$mem,0x44\t! replicate2D" %} 3713 ins_encode %{ 3714 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44); 3715 %} 3716 ins_pipe( pipe_slow ); 3717 %} 3718 3719 instruct Repl4D(vecY dst, regD src) %{ 3720 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3721 match(Set dst (ReplicateD src)); 3722 format %{ "pshufd $dst,$src,0x44\n\t" 3723 "vinsertf128_high $dst,$dst\t! replicate4D" %} 3724 ins_encode %{ 3725 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 3726 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3727 %} 3728 ins_pipe( pipe_slow ); 3729 %} 3730 3731 instruct Repl4D_mem(vecY dst, memory mem) %{ 3732 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3733 match(Set dst (ReplicateD (LoadD mem))); 3734 format %{ "pshufd $dst,$mem,0x44\n\t" 3735 "vinsertf128_high $dst,$dst\t! replicate4D" %} 3736 ins_encode %{ 3737 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44); 3738 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3739 %} 3740 ins_pipe( pipe_slow ); 3741 %} 3742 3743 // Replicate double (8 byte) scalar zero to be vector 3744 instruct Repl2D_zero(vecX dst, immD0 zero) %{ 3745 predicate(n->as_Vector()->length() == 2 && UseAVX < 3); 3746 match(Set dst (ReplicateD zero)); 3747 format %{ "xorpd $dst,$dst\t! replicate2D zero" %} 3748 ins_encode %{ 3749 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister); 3750 %} 3751 ins_pipe( fpu_reg_reg ); 3752 %} 3753 3754 instruct Repl4D_zero(vecY dst, immD0 zero) %{ 3755 predicate(n->as_Vector()->length() == 4 && UseAVX < 3); 3756 match(Set dst (ReplicateD zero)); 3757 format %{ "vxorpd $dst,$dst,$dst,vect256\t! replicate4D zero" %} 3758 ins_encode %{ 3759 int vector_len = 1; 3760 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3761 %} 3762 ins_pipe( fpu_reg_reg ); 3763 %} 3764 3765 // ====================GENERIC REPLICATE========================================== 3766 3767 // Replicate byte scalar to be vector 3768 instruct Repl4B(vecS dst, rRegI src) %{ 3769 predicate(n->as_Vector()->length() == 4); 3770 match(Set dst (ReplicateB src)); 3771 format %{ "movd $dst,$src\n\t" 3772 "punpcklbw $dst,$dst\n\t" 3773 "pshuflw $dst,$dst,0x00\t! replicate4B" %} 3774 ins_encode %{ 3775 __ movdl($dst$$XMMRegister, $src$$Register); 3776 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3777 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3778 %} 3779 ins_pipe( pipe_slow ); 3780 %} 3781 3782 instruct Repl8B(vecD dst, rRegI src) %{ 3783 predicate(n->as_Vector()->length() == 8); 3784 match(Set dst (ReplicateB src)); 3785 format %{ "movd $dst,$src\n\t" 3786 "punpcklbw $dst,$dst\n\t" 3787 "pshuflw $dst,$dst,0x00\t! replicate8B" %} 3788 ins_encode %{ 3789 __ movdl($dst$$XMMRegister, $src$$Register); 3790 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3791 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3792 %} 3793 ins_pipe( pipe_slow ); 3794 %} 3795 3796 // Replicate byte scalar immediate to be vector by loading from const table. 3797 instruct Repl4B_imm(vecS dst, immI con) %{ 3798 predicate(n->as_Vector()->length() == 4); 3799 match(Set dst (ReplicateB con)); 3800 format %{ "movdl $dst,[$constantaddress]\t! replicate4B($con)" %} 3801 ins_encode %{ 3802 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 1))); 3803 %} 3804 ins_pipe( pipe_slow ); 3805 %} 3806 3807 instruct Repl8B_imm(vecD dst, immI con) %{ 3808 predicate(n->as_Vector()->length() == 8); 3809 match(Set dst (ReplicateB con)); 3810 format %{ "movq $dst,[$constantaddress]\t! replicate8B($con)" %} 3811 ins_encode %{ 3812 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3813 %} 3814 ins_pipe( pipe_slow ); 3815 %} 3816 3817 // Replicate byte scalar zero to be vector 3818 instruct Repl4B_zero(vecS dst, immI0 zero) %{ 3819 predicate(n->as_Vector()->length() == 4); 3820 match(Set dst (ReplicateB zero)); 3821 format %{ "pxor $dst,$dst\t! replicate4B zero" %} 3822 ins_encode %{ 3823 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3824 %} 3825 ins_pipe( fpu_reg_reg ); 3826 %} 3827 3828 instruct Repl8B_zero(vecD dst, immI0 zero) %{ 3829 predicate(n->as_Vector()->length() == 8); 3830 match(Set dst (ReplicateB zero)); 3831 format %{ "pxor $dst,$dst\t! replicate8B zero" %} 3832 ins_encode %{ 3833 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3834 %} 3835 ins_pipe( fpu_reg_reg ); 3836 %} 3837 3838 instruct Repl16B_zero(vecX dst, immI0 zero) %{ 3839 predicate(n->as_Vector()->length() == 16); 3840 match(Set dst (ReplicateB zero)); 3841 format %{ "pxor $dst,$dst\t! replicate16B zero" %} 3842 ins_encode %{ 3843 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3844 %} 3845 ins_pipe( fpu_reg_reg ); 3846 %} 3847 3848 instruct Repl32B_zero(vecY dst, immI0 zero) %{ 3849 predicate(n->as_Vector()->length() == 32); 3850 match(Set dst (ReplicateB zero)); 3851 format %{ "vpxor $dst,$dst,$dst\t! replicate32B zero" %} 3852 ins_encode %{ 3853 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3854 int vector_len = 1; 3855 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3856 %} 3857 ins_pipe( fpu_reg_reg ); 3858 %} 3859 3860 // Replicate char/short (2 byte) scalar to be vector 3861 instruct Repl2S(vecS dst, rRegI src) %{ 3862 predicate(n->as_Vector()->length() == 2); 3863 match(Set dst (ReplicateS src)); 3864 format %{ "movd $dst,$src\n\t" 3865 "pshuflw $dst,$dst,0x00\t! replicate2S" %} 3866 ins_encode %{ 3867 __ movdl($dst$$XMMRegister, $src$$Register); 3868 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3869 %} 3870 ins_pipe( fpu_reg_reg ); 3871 %} 3872 3873 // Replicate char/short (2 byte) scalar immediate to be vector by loading from const table. 3874 instruct Repl2S_imm(vecS dst, immI con) %{ 3875 predicate(n->as_Vector()->length() == 2); 3876 match(Set dst (ReplicateS con)); 3877 format %{ "movdl $dst,[$constantaddress]\t! replicate2S($con)" %} 3878 ins_encode %{ 3879 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 2))); 3880 %} 3881 ins_pipe( fpu_reg_reg ); 3882 %} 3883 3884 instruct Repl4S_imm(vecD dst, immI con) %{ 3885 predicate(n->as_Vector()->length() == 4); 3886 match(Set dst (ReplicateS con)); 3887 format %{ "movq $dst,[$constantaddress]\t! replicate4S($con)" %} 3888 ins_encode %{ 3889 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3890 %} 3891 ins_pipe( fpu_reg_reg ); 3892 %} 3893 3894 // Replicate char/short (2 byte) scalar zero to be vector 3895 instruct Repl2S_zero(vecS dst, immI0 zero) %{ 3896 predicate(n->as_Vector()->length() == 2); 3897 match(Set dst (ReplicateS zero)); 3898 format %{ "pxor $dst,$dst\t! replicate2S zero" %} 3899 ins_encode %{ 3900 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3901 %} 3902 ins_pipe( fpu_reg_reg ); 3903 %} 3904 3905 instruct Repl4S_zero(vecD dst, immI0 zero) %{ 3906 predicate(n->as_Vector()->length() == 4); 3907 match(Set dst (ReplicateS zero)); 3908 format %{ "pxor $dst,$dst\t! replicate4S zero" %} 3909 ins_encode %{ 3910 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3911 %} 3912 ins_pipe( fpu_reg_reg ); 3913 %} 3914 3915 instruct Repl8S_zero(vecX dst, immI0 zero) %{ 3916 predicate(n->as_Vector()->length() == 8); 3917 match(Set dst (ReplicateS zero)); 3918 format %{ "pxor $dst,$dst\t! replicate8S zero" %} 3919 ins_encode %{ 3920 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3921 %} 3922 ins_pipe( fpu_reg_reg ); 3923 %} 3924 3925 instruct Repl16S_zero(vecY dst, immI0 zero) %{ 3926 predicate(n->as_Vector()->length() == 16); 3927 match(Set dst (ReplicateS zero)); 3928 format %{ "vpxor $dst,$dst,$dst\t! replicate16S zero" %} 3929 ins_encode %{ 3930 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3931 int vector_len = 1; 3932 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3933 %} 3934 ins_pipe( fpu_reg_reg ); 3935 %} 3936 3937 // Replicate integer (4 byte) scalar to be vector 3938 instruct Repl2I(vecD dst, rRegI src) %{ 3939 predicate(n->as_Vector()->length() == 2); 3940 match(Set dst (ReplicateI src)); 3941 format %{ "movd $dst,$src\n\t" 3942 "pshufd $dst,$dst,0x00\t! replicate2I" %} 3943 ins_encode %{ 3944 __ movdl($dst$$XMMRegister, $src$$Register); 3945 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3946 %} 3947 ins_pipe( fpu_reg_reg ); 3948 %} 3949 3950 // Integer could be loaded into xmm register directly from memory. 3951 instruct Repl2I_mem(vecD dst, memory mem) %{ 3952 predicate(n->as_Vector()->length() == 2); 3953 match(Set dst (ReplicateI (LoadI mem))); 3954 format %{ "movd $dst,$mem\n\t" 3955 "pshufd $dst,$dst,0x00\t! replicate2I" %} 3956 ins_encode %{ 3957 __ movdl($dst$$XMMRegister, $mem$$Address); 3958 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3959 %} 3960 ins_pipe( fpu_reg_reg ); 3961 %} 3962 3963 // Replicate integer (4 byte) scalar immediate to be vector by loading from const table. 3964 instruct Repl2I_imm(vecD dst, immI con) %{ 3965 predicate(n->as_Vector()->length() == 2); 3966 match(Set dst (ReplicateI con)); 3967 format %{ "movq $dst,[$constantaddress]\t! replicate2I($con)" %} 3968 ins_encode %{ 3969 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3970 %} 3971 ins_pipe( fpu_reg_reg ); 3972 %} 3973 3974 // Replicate integer (4 byte) scalar zero to be vector 3975 instruct Repl2I_zero(vecD dst, immI0 zero) %{ 3976 predicate(n->as_Vector()->length() == 2); 3977 match(Set dst (ReplicateI zero)); 3978 format %{ "pxor $dst,$dst\t! replicate2I" %} 3979 ins_encode %{ 3980 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3981 %} 3982 ins_pipe( fpu_reg_reg ); 3983 %} 3984 3985 instruct Repl4I_zero(vecX dst, immI0 zero) %{ 3986 predicate(n->as_Vector()->length() == 4); 3987 match(Set dst (ReplicateI zero)); 3988 format %{ "pxor $dst,$dst\t! replicate4I zero)" %} 3989 ins_encode %{ 3990 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3991 %} 3992 ins_pipe( fpu_reg_reg ); 3993 %} 3994 3995 instruct Repl8I_zero(vecY dst, immI0 zero) %{ 3996 predicate(n->as_Vector()->length() == 8); 3997 match(Set dst (ReplicateI zero)); 3998 format %{ "vpxor $dst,$dst,$dst\t! replicate8I zero" %} 3999 ins_encode %{ 4000 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 4001 int vector_len = 1; 4002 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4003 %} 4004 ins_pipe( fpu_reg_reg ); 4005 %} 4006 4007 // Replicate long (8 byte) scalar to be vector 4008 #ifdef _LP64 4009 instruct Repl2L(vecX dst, rRegL src) %{ 4010 predicate(n->as_Vector()->length() == 2); 4011 match(Set dst (ReplicateL src)); 4012 format %{ "movdq $dst,$src\n\t" 4013 "punpcklqdq $dst,$dst\t! replicate2L" %} 4014 ins_encode %{ 4015 __ movdq($dst$$XMMRegister, $src$$Register); 4016 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4017 %} 4018 ins_pipe( pipe_slow ); 4019 %} 4020 #else // _LP64 4021 instruct Repl2L(vecX dst, eRegL src, regD tmp) %{ 4022 predicate(n->as_Vector()->length() == 2); 4023 match(Set dst (ReplicateL src)); 4024 effect(TEMP dst, USE src, TEMP tmp); 4025 format %{ "movdl $dst,$src.lo\n\t" 4026 "movdl $tmp,$src.hi\n\t" 4027 "punpckldq $dst,$tmp\n\t" 4028 "punpcklqdq $dst,$dst\t! replicate2L"%} 4029 ins_encode %{ 4030 __ movdl($dst$$XMMRegister, $src$$Register); 4031 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4032 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4033 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4034 %} 4035 ins_pipe( pipe_slow ); 4036 %} 4037 #endif // _LP64 4038 4039 // Replicate long (8 byte) scalar immediate to be vector by loading from const table. 4040 instruct Repl2L_imm(vecX dst, immL con) %{ 4041 predicate(n->as_Vector()->length() == 2); 4042 match(Set dst (ReplicateL con)); 4043 format %{ "movq $dst,[$constantaddress]\n\t" 4044 "punpcklqdq $dst,$dst\t! replicate2L($con)" %} 4045 ins_encode %{ 4046 __ movq($dst$$XMMRegister, $constantaddress($con)); 4047 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4048 %} 4049 ins_pipe( pipe_slow ); 4050 %} 4051 4052 // Replicate long (8 byte) scalar zero to be vector 4053 instruct Repl2L_zero(vecX dst, immL0 zero) %{ 4054 predicate(n->as_Vector()->length() == 2); 4055 match(Set dst (ReplicateL zero)); 4056 format %{ "pxor $dst,$dst\t! replicate2L zero" %} 4057 ins_encode %{ 4058 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4059 %} 4060 ins_pipe( fpu_reg_reg ); 4061 %} 4062 4063 instruct Repl4L_zero(vecY dst, immL0 zero) %{ 4064 predicate(n->as_Vector()->length() == 4); 4065 match(Set dst (ReplicateL zero)); 4066 format %{ "vpxor $dst,$dst,$dst\t! replicate4L zero" %} 4067 ins_encode %{ 4068 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 4069 int vector_len = 1; 4070 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4071 %} 4072 ins_pipe( fpu_reg_reg ); 4073 %} 4074 4075 // Replicate float (4 byte) scalar to be vector 4076 instruct Repl2F(vecD dst, regF src) %{ 4077 predicate(n->as_Vector()->length() == 2); 4078 match(Set dst (ReplicateF src)); 4079 format %{ "pshufd $dst,$dst,0x00\t! replicate2F" %} 4080 ins_encode %{ 4081 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 4082 %} 4083 ins_pipe( fpu_reg_reg ); 4084 %} 4085 4086 instruct Repl4F(vecX dst, regF src) %{ 4087 predicate(n->as_Vector()->length() == 4); 4088 match(Set dst (ReplicateF src)); 4089 format %{ "pshufd $dst,$dst,0x00\t! replicate4F" %} 4090 ins_encode %{ 4091 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 4092 %} 4093 ins_pipe( pipe_slow ); 4094 %} 4095 4096 // Replicate double (8 bytes) scalar to be vector 4097 instruct Repl2D(vecX dst, regD src) %{ 4098 predicate(n->as_Vector()->length() == 2); 4099 match(Set dst (ReplicateD src)); 4100 format %{ "pshufd $dst,$src,0x44\t! replicate2D" %} 4101 ins_encode %{ 4102 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 4103 %} 4104 ins_pipe( pipe_slow ); 4105 %} 4106 4107 // ====================EVEX REPLICATE============================================= 4108 4109 instruct Repl4B_mem_evex(vecS dst, memory mem) %{ 4110 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vlbw()); 4111 match(Set dst (ReplicateB (LoadB mem))); 4112 format %{ "vpbroadcastb $dst,$mem\t! replicate4B" %} 4113 ins_encode %{ 4114 int vector_len = 0; 4115 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 4116 %} 4117 ins_pipe( pipe_slow ); 4118 %} 4119 4120 instruct Repl8B_mem_evex(vecD dst, memory mem) %{ 4121 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); 4122 match(Set dst (ReplicateB (LoadB mem))); 4123 format %{ "vpbroadcastb $dst,$mem\t! replicate8B" %} 4124 ins_encode %{ 4125 int vector_len = 0; 4126 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 4127 %} 4128 ins_pipe( pipe_slow ); 4129 %} 4130 4131 instruct Repl16B_evex(vecX dst, rRegI src) %{ 4132 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 4133 match(Set dst (ReplicateB src)); 4134 format %{ "vpbroadcastb $dst,$src\t! replicate16B" %} 4135 ins_encode %{ 4136 int vector_len = 0; 4137 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 4138 %} 4139 ins_pipe( pipe_slow ); 4140 %} 4141 4142 instruct Repl16B_mem_evex(vecX dst, memory mem) %{ 4143 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 4144 match(Set dst (ReplicateB (LoadB mem))); 4145 format %{ "vpbroadcastb $dst,$mem\t! replicate16B" %} 4146 ins_encode %{ 4147 int vector_len = 0; 4148 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 4149 %} 4150 ins_pipe( pipe_slow ); 4151 %} 4152 4153 instruct Repl32B_evex(vecY dst, rRegI src) %{ 4154 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw()); 4155 match(Set dst (ReplicateB src)); 4156 format %{ "vpbroadcastb $dst,$src\t! replicate32B" %} 4157 ins_encode %{ 4158 int vector_len = 1; 4159 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 4160 %} 4161 ins_pipe( pipe_slow ); 4162 %} 4163 4164 instruct Repl32B_mem_evex(vecY dst, memory mem) %{ 4165 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw()); 4166 match(Set dst (ReplicateB (LoadB mem))); 4167 format %{ "vpbroadcastb $dst,$mem\t! replicate32B" %} 4168 ins_encode %{ 4169 int vector_len = 1; 4170 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 4171 %} 4172 ins_pipe( pipe_slow ); 4173 %} 4174 4175 instruct Repl64B_evex(vecZ dst, rRegI src) %{ 4176 predicate(n->as_Vector()->length() == 64 && VM_Version::supports_avx512bw()); 4177 match(Set dst (ReplicateB src)); 4178 format %{ "vpbroadcastb $dst,$src\t! upper replicate64B" %} 4179 ins_encode %{ 4180 int vector_len = 2; 4181 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 4182 %} 4183 ins_pipe( pipe_slow ); 4184 %} 4185 4186 instruct Repl64B_mem_evex(vecZ dst, memory mem) %{ 4187 predicate(n->as_Vector()->length() == 64 && VM_Version::supports_avx512bw()); 4188 match(Set dst (ReplicateB (LoadB mem))); 4189 format %{ "vpbroadcastb $dst,$mem\t! replicate64B" %} 4190 ins_encode %{ 4191 int vector_len = 2; 4192 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 4193 %} 4194 ins_pipe( pipe_slow ); 4195 %} 4196 4197 instruct Repl16B_imm_evex(vecX dst, immI con) %{ 4198 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 4199 match(Set dst (ReplicateB con)); 4200 format %{ "movq $dst,[$constantaddress]\n\t" 4201 "vpbroadcastb $dst,$dst\t! replicate16B" %} 4202 ins_encode %{ 4203 int vector_len = 0; 4204 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 4205 __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4206 %} 4207 ins_pipe( pipe_slow ); 4208 %} 4209 4210 instruct Repl32B_imm_evex(vecY dst, immI con) %{ 4211 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw()); 4212 match(Set dst (ReplicateB con)); 4213 format %{ "movq $dst,[$constantaddress]\n\t" 4214 "vpbroadcastb $dst,$dst\t! replicate32B" %} 4215 ins_encode %{ 4216 int vector_len = 1; 4217 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 4218 __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4219 %} 4220 ins_pipe( pipe_slow ); 4221 %} 4222 4223 instruct Repl64B_imm_evex(vecZ dst, immI con) %{ 4224 predicate(n->as_Vector()->length() == 64 && VM_Version::supports_avx512bw()); 4225 match(Set dst (ReplicateB con)); 4226 format %{ "movq $dst,[$constantaddress]\n\t" 4227 "vpbroadcastb $dst,$dst\t! upper replicate64B" %} 4228 ins_encode %{ 4229 int vector_len = 2; 4230 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 4231 __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4232 %} 4233 ins_pipe( pipe_slow ); 4234 %} 4235 4236 instruct Repl64B_zero_evex(vecZ dst, immI0 zero) %{ 4237 predicate(n->as_Vector()->length() == 64 && UseAVX > 2); 4238 match(Set dst (ReplicateB zero)); 4239 format %{ "vpxor $dst k0,$dst,$dst\t! replicate64B zero" %} 4240 ins_encode %{ 4241 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 4242 int vector_len = 2; 4243 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4244 %} 4245 ins_pipe( fpu_reg_reg ); 4246 %} 4247 4248 instruct Repl4S_evex(vecD dst, rRegI src) %{ 4249 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vlbw()); 4250 match(Set dst (ReplicateS src)); 4251 format %{ "vpbroadcastw $dst,$src\t! replicate4S" %} 4252 ins_encode %{ 4253 int vector_len = 0; 4254 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 4255 %} 4256 ins_pipe( pipe_slow ); 4257 %} 4258 4259 instruct Repl4S_mem_evex(vecD dst, memory mem) %{ 4260 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vlbw()); 4261 match(Set dst (ReplicateS (LoadS mem))); 4262 format %{ "vpbroadcastw $dst,$mem\t! replicate4S" %} 4263 ins_encode %{ 4264 int vector_len = 0; 4265 __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 4266 %} 4267 ins_pipe( pipe_slow ); 4268 %} 4269 4270 instruct Repl8S_evex(vecX dst, rRegI src) %{ 4271 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); 4272 match(Set dst (ReplicateS src)); 4273 format %{ "vpbroadcastw $dst,$src\t! replicate8S" %} 4274 ins_encode %{ 4275 int vector_len = 0; 4276 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 4277 %} 4278 ins_pipe( pipe_slow ); 4279 %} 4280 4281 instruct Repl8S_mem_evex(vecX dst, memory mem) %{ 4282 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); 4283 match(Set dst (ReplicateS (LoadS mem))); 4284 format %{ "vpbroadcastw $dst,$mem\t! replicate8S" %} 4285 ins_encode %{ 4286 int vector_len = 0; 4287 __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 4288 %} 4289 ins_pipe( pipe_slow ); 4290 %} 4291 4292 instruct Repl16S_evex(vecY dst, rRegI src) %{ 4293 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 4294 match(Set dst (ReplicateS src)); 4295 format %{ "vpbroadcastw $dst,$src\t! replicate16S" %} 4296 ins_encode %{ 4297 int vector_len = 1; 4298 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 4299 %} 4300 ins_pipe( pipe_slow ); 4301 %} 4302 4303 instruct Repl16S_mem_evex(vecY dst, memory mem) %{ 4304 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 4305 match(Set dst (ReplicateS (LoadS mem))); 4306 format %{ "vpbroadcastw $dst,$mem\t! replicate16S" %} 4307 ins_encode %{ 4308 int vector_len = 1; 4309 __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 4310 %} 4311 ins_pipe( pipe_slow ); 4312 %} 4313 4314 instruct Repl32S_evex(vecZ dst, rRegI src) %{ 4315 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512bw()); 4316 match(Set dst (ReplicateS src)); 4317 format %{ "vpbroadcastw $dst,$src\t! replicate32S" %} 4318 ins_encode %{ 4319 int vector_len = 2; 4320 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 4321 %} 4322 ins_pipe( pipe_slow ); 4323 %} 4324 4325 instruct Repl32S_mem_evex(vecZ dst, memory mem) %{ 4326 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512bw()); 4327 match(Set dst (ReplicateS (LoadS mem))); 4328 format %{ "vpbroadcastw $dst,$mem\t! replicate32S" %} 4329 ins_encode %{ 4330 int vector_len = 2; 4331 __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 4332 %} 4333 ins_pipe( pipe_slow ); 4334 %} 4335 4336 instruct Repl8S_imm_evex(vecX dst, immI con) %{ 4337 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); 4338 match(Set dst (ReplicateS con)); 4339 format %{ "movq $dst,[$constantaddress]\n\t" 4340 "vpbroadcastw $dst,$dst\t! replicate8S" %} 4341 ins_encode %{ 4342 int vector_len = 0; 4343 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 4344 __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4345 %} 4346 ins_pipe( pipe_slow ); 4347 %} 4348 4349 instruct Repl16S_imm_evex(vecY dst, immI con) %{ 4350 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 4351 match(Set dst (ReplicateS con)); 4352 format %{ "movq $dst,[$constantaddress]\n\t" 4353 "vpbroadcastw $dst,$dst\t! replicate16S" %} 4354 ins_encode %{ 4355 int vector_len = 1; 4356 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 4357 __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4358 %} 4359 ins_pipe( pipe_slow ); 4360 %} 4361 4362 instruct Repl32S_imm_evex(vecZ dst, immI con) %{ 4363 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512bw()); 4364 match(Set dst (ReplicateS con)); 4365 format %{ "movq $dst,[$constantaddress]\n\t" 4366 "vpbroadcastw $dst,$dst\t! replicate32S" %} 4367 ins_encode %{ 4368 int vector_len = 2; 4369 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 4370 __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4371 %} 4372 ins_pipe( pipe_slow ); 4373 %} 4374 4375 instruct Repl32S_zero_evex(vecZ dst, immI0 zero) %{ 4376 predicate(n->as_Vector()->length() == 32 && UseAVX > 2); 4377 match(Set dst (ReplicateS zero)); 4378 format %{ "vpxor $dst k0,$dst,$dst\t! replicate32S zero" %} 4379 ins_encode %{ 4380 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 4381 int vector_len = 2; 4382 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4383 %} 4384 ins_pipe( fpu_reg_reg ); 4385 %} 4386 4387 instruct Repl4I_evex(vecX dst, rRegI src) %{ 4388 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4389 match(Set dst (ReplicateI src)); 4390 format %{ "vpbroadcastd $dst,$src\t! replicate4I" %} 4391 ins_encode %{ 4392 int vector_len = 0; 4393 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 4394 %} 4395 ins_pipe( pipe_slow ); 4396 %} 4397 4398 instruct Repl4I_mem_evex(vecX dst, memory mem) %{ 4399 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4400 match(Set dst (ReplicateI (LoadI mem))); 4401 format %{ "vpbroadcastd $dst,$mem\t! replicate4I" %} 4402 ins_encode %{ 4403 int vector_len = 0; 4404 __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 4405 %} 4406 ins_pipe( pipe_slow ); 4407 %} 4408 4409 instruct Repl8I_evex(vecY dst, rRegI src) %{ 4410 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4411 match(Set dst (ReplicateI src)); 4412 format %{ "vpbroadcastd $dst,$src\t! replicate8I" %} 4413 ins_encode %{ 4414 int vector_len = 1; 4415 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 4416 %} 4417 ins_pipe( pipe_slow ); 4418 %} 4419 4420 instruct Repl8I_mem_evex(vecY dst, memory mem) %{ 4421 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4422 match(Set dst (ReplicateI (LoadI mem))); 4423 format %{ "vpbroadcastd $dst,$mem\t! replicate8I" %} 4424 ins_encode %{ 4425 int vector_len = 1; 4426 __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 4427 %} 4428 ins_pipe( pipe_slow ); 4429 %} 4430 4431 instruct Repl16I_evex(vecZ dst, rRegI src) %{ 4432 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4433 match(Set dst (ReplicateI src)); 4434 format %{ "vpbroadcastd $dst,$src\t! replicate16I" %} 4435 ins_encode %{ 4436 int vector_len = 2; 4437 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 4438 %} 4439 ins_pipe( pipe_slow ); 4440 %} 4441 4442 instruct Repl16I_mem_evex(vecZ dst, memory mem) %{ 4443 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4444 match(Set dst (ReplicateI (LoadI mem))); 4445 format %{ "vpbroadcastd $dst,$mem\t! replicate16I" %} 4446 ins_encode %{ 4447 int vector_len = 2; 4448 __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 4449 %} 4450 ins_pipe( pipe_slow ); 4451 %} 4452 4453 instruct Repl4I_imm_evex(vecX dst, immI con) %{ 4454 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4455 match(Set dst (ReplicateI con)); 4456 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 4457 "vpbroadcastd $dst,$dst\t! replicate4I" %} 4458 ins_encode %{ 4459 int vector_len = 0; 4460 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4461 __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4462 %} 4463 ins_pipe( pipe_slow ); 4464 %} 4465 4466 instruct Repl8I_imm_evex(vecY dst, immI con) %{ 4467 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4468 match(Set dst (ReplicateI con)); 4469 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 4470 "vpbroadcastd $dst,$dst\t! replicate8I" %} 4471 ins_encode %{ 4472 int vector_len = 1; 4473 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4474 __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4475 %} 4476 ins_pipe( pipe_slow ); 4477 %} 4478 4479 instruct Repl16I_imm_evex(vecZ dst, immI con) %{ 4480 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4481 match(Set dst (ReplicateI con)); 4482 format %{ "movq $dst,[$constantaddress]\t! replicate16I($con)\n\t" 4483 "vpbroadcastd $dst,$dst\t! replicate16I" %} 4484 ins_encode %{ 4485 int vector_len = 2; 4486 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4487 __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4488 %} 4489 ins_pipe( pipe_slow ); 4490 %} 4491 4492 instruct Repl16I_zero_evex(vecZ dst, immI0 zero) %{ 4493 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4494 match(Set dst (ReplicateI zero)); 4495 format %{ "vpxor $dst k0,$dst,$dst\t! replicate16I zero" %} 4496 ins_encode %{ 4497 // Use vxorpd since AVX does not have vpxor for 512-bit (AVX2 will have it). 4498 int vector_len = 2; 4499 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4500 %} 4501 ins_pipe( fpu_reg_reg ); 4502 %} 4503 4504 // Replicate long (8 byte) scalar to be vector 4505 #ifdef _LP64 4506 instruct Repl4L_evex(vecY dst, rRegL src) %{ 4507 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4508 match(Set dst (ReplicateL src)); 4509 format %{ "vpbroadcastq $dst,$src\t! replicate4L" %} 4510 ins_encode %{ 4511 int vector_len = 1; 4512 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vector_len); 4513 %} 4514 ins_pipe( pipe_slow ); 4515 %} 4516 4517 instruct Repl8L_evex(vecZ dst, rRegL src) %{ 4518 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4519 match(Set dst (ReplicateL src)); 4520 format %{ "vpbroadcastq $dst,$src\t! replicate8L" %} 4521 ins_encode %{ 4522 int vector_len = 2; 4523 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vector_len); 4524 %} 4525 ins_pipe( pipe_slow ); 4526 %} 4527 #else // _LP64 4528 instruct Repl4L_evex(vecY dst, eRegL src, regD tmp) %{ 4529 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4530 match(Set dst (ReplicateL src)); 4531 effect(TEMP dst, USE src, TEMP tmp); 4532 format %{ "movdl $dst,$src.lo\n\t" 4533 "movdl $tmp,$src.hi\n\t" 4534 "punpckldq $dst,$tmp\n\t" 4535 "vpbroadcastq $dst,$dst\t! replicate4L" %} 4536 ins_encode %{ 4537 int vector_len = 1; 4538 __ movdl($dst$$XMMRegister, $src$$Register); 4539 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4540 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4541 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4542 %} 4543 ins_pipe( pipe_slow ); 4544 %} 4545 4546 instruct Repl8L_evex(vecZ dst, eRegL src, regD tmp) %{ 4547 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4548 match(Set dst (ReplicateL src)); 4549 effect(TEMP dst, USE src, TEMP tmp); 4550 format %{ "movdl $dst,$src.lo\n\t" 4551 "movdl $tmp,$src.hi\n\t" 4552 "punpckldq $dst,$tmp\n\t" 4553 "vpbroadcastq $dst,$dst\t! replicate8L" %} 4554 ins_encode %{ 4555 int vector_len = 2; 4556 __ movdl($dst$$XMMRegister, $src$$Register); 4557 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4558 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4559 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4560 %} 4561 ins_pipe( pipe_slow ); 4562 %} 4563 #endif // _LP64 4564 4565 instruct Repl4L_imm_evex(vecY dst, immL con) %{ 4566 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4567 match(Set dst (ReplicateL con)); 4568 format %{ "movq $dst,[$constantaddress]\n\t" 4569 "vpbroadcastq $dst,$dst\t! replicate4L" %} 4570 ins_encode %{ 4571 int vector_len = 1; 4572 __ movq($dst$$XMMRegister, $constantaddress($con)); 4573 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4574 %} 4575 ins_pipe( pipe_slow ); 4576 %} 4577 4578 instruct Repl8L_imm_evex(vecZ dst, immL con) %{ 4579 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4580 match(Set dst (ReplicateL con)); 4581 format %{ "movq $dst,[$constantaddress]\n\t" 4582 "vpbroadcastq $dst,$dst\t! replicate8L" %} 4583 ins_encode %{ 4584 int vector_len = 2; 4585 __ movq($dst$$XMMRegister, $constantaddress($con)); 4586 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4587 %} 4588 ins_pipe( pipe_slow ); 4589 %} 4590 4591 instruct Repl2L_mem_evex(vecX dst, memory mem) %{ 4592 predicate(n->as_Vector()->length() == 2 && VM_Version::supports_avx512vl()); 4593 match(Set dst (ReplicateL (LoadL mem))); 4594 format %{ "vpbroadcastd $dst,$mem\t! replicate2L" %} 4595 ins_encode %{ 4596 int vector_len = 0; 4597 __ evpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4598 %} 4599 ins_pipe( pipe_slow ); 4600 %} 4601 4602 instruct Repl4L_mem_evex(vecY dst, memory mem) %{ 4603 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4604 match(Set dst (ReplicateL (LoadL mem))); 4605 format %{ "vpbroadcastd $dst,$mem\t! replicate4L" %} 4606 ins_encode %{ 4607 int vector_len = 1; 4608 __ evpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4609 %} 4610 ins_pipe( pipe_slow ); 4611 %} 4612 4613 instruct Repl8L_mem_evex(vecZ dst, memory mem) %{ 4614 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4615 match(Set dst (ReplicateL (LoadL mem))); 4616 format %{ "vpbroadcastd $dst,$mem\t! replicate8L" %} 4617 ins_encode %{ 4618 int vector_len = 2; 4619 __ evpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4620 %} 4621 ins_pipe( pipe_slow ); 4622 %} 4623 4624 instruct Repl8L_zero_evex(vecZ dst, immL0 zero) %{ 4625 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4626 match(Set dst (ReplicateL zero)); 4627 format %{ "vpxor $dst k0,$dst,$dst\t! replicate8L zero" %} 4628 ins_encode %{ 4629 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 4630 int vector_len = 2; 4631 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4632 %} 4633 ins_pipe( fpu_reg_reg ); 4634 %} 4635 4636 instruct Repl8F_evex(vecY dst, regF src) %{ 4637 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4638 match(Set dst (ReplicateF src)); 4639 format %{ "vbroadcastss $dst,$src\t! replicate8F" %} 4640 ins_encode %{ 4641 int vector_len = 1; 4642 __ evpbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4643 %} 4644 ins_pipe( pipe_slow ); 4645 %} 4646 4647 instruct Repl8F_mem_evex(vecY dst, memory mem) %{ 4648 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4649 match(Set dst (ReplicateF (LoadF mem))); 4650 format %{ "vbroadcastss $dst,$mem\t! replicate8F" %} 4651 ins_encode %{ 4652 int vector_len = 1; 4653 __ evpbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len); 4654 %} 4655 ins_pipe( pipe_slow ); 4656 %} 4657 4658 instruct Repl16F_evex(vecZ dst, regF src) %{ 4659 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4660 match(Set dst (ReplicateF src)); 4661 format %{ "vbroadcastss $dst,$src\t! replicate16F" %} 4662 ins_encode %{ 4663 int vector_len = 2; 4664 __ evpbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4665 %} 4666 ins_pipe( pipe_slow ); 4667 %} 4668 4669 instruct Repl16F_mem_evex(vecZ dst, memory mem) %{ 4670 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4671 match(Set dst (ReplicateF (LoadF mem))); 4672 format %{ "vbroadcastss $dst,$mem\t! replicate16F" %} 4673 ins_encode %{ 4674 int vector_len = 2; 4675 __ evpbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len); 4676 %} 4677 ins_pipe( pipe_slow ); 4678 %} 4679 4680 instruct Repl2F_zero_evex(vecD dst, immF0 zero) %{ 4681 predicate(n->as_Vector()->length() == 2 && UseAVX > 2); 4682 match(Set dst (ReplicateF zero)); 4683 format %{ "vpxor $dst k0,$dst,$dst\t! replicate2F zero" %} 4684 ins_encode %{ 4685 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4686 int vector_len = 2; 4687 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4688 %} 4689 ins_pipe( fpu_reg_reg ); 4690 %} 4691 4692 instruct Repl4F_zero_evex(vecX dst, immF0 zero) %{ 4693 predicate(n->as_Vector()->length() == 4 && UseAVX > 2); 4694 match(Set dst (ReplicateF zero)); 4695 format %{ "vpxor $dst k0,$dst,$dst\t! replicate4F zero" %} 4696 ins_encode %{ 4697 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4698 int vector_len = 2; 4699 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4700 %} 4701 ins_pipe( fpu_reg_reg ); 4702 %} 4703 4704 instruct Repl8F_zero_evex(vecY dst, immF0 zero) %{ 4705 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4706 match(Set dst (ReplicateF zero)); 4707 format %{ "vpxor $dst k0,$dst,$dst\t! replicate8F zero" %} 4708 ins_encode %{ 4709 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4710 int vector_len = 2; 4711 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4712 %} 4713 ins_pipe( fpu_reg_reg ); 4714 %} 4715 4716 instruct Repl16F_zero_evex(vecZ dst, immF0 zero) %{ 4717 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4718 match(Set dst (ReplicateF zero)); 4719 format %{ "vpxor $dst k0,$dst,$dst\t! replicate16F zero" %} 4720 ins_encode %{ 4721 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4722 int vector_len = 2; 4723 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4724 %} 4725 ins_pipe( fpu_reg_reg ); 4726 %} 4727 4728 instruct Repl4D_evex(vecY dst, regD src) %{ 4729 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4730 match(Set dst (ReplicateD src)); 4731 format %{ "vbroadcastsd $dst,$src\t! replicate4D" %} 4732 ins_encode %{ 4733 int vector_len = 1; 4734 __ evpbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4735 %} 4736 ins_pipe( pipe_slow ); 4737 %} 4738 4739 instruct Repl4D_mem_evex(vecY dst, memory mem) %{ 4740 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4741 match(Set dst (ReplicateD (LoadD mem))); 4742 format %{ "vbroadcastsd $dst,$mem\t! replicate4D" %} 4743 ins_encode %{ 4744 int vector_len = 1; 4745 __ evpbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len); 4746 %} 4747 ins_pipe( pipe_slow ); 4748 %} 4749 4750 instruct Repl8D_evex(vecZ dst, regD src) %{ 4751 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4752 match(Set dst (ReplicateD src)); 4753 format %{ "vbroadcastsd $dst,$src\t! replicate8D" %} 4754 ins_encode %{ 4755 int vector_len = 2; 4756 __ evpbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4757 %} 4758 ins_pipe( pipe_slow ); 4759 %} 4760 4761 instruct Repl8D_mem_evex(vecZ dst, memory mem) %{ 4762 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4763 match(Set dst (ReplicateD (LoadD mem))); 4764 format %{ "vbroadcastsd $dst,$mem\t! replicate8D" %} 4765 ins_encode %{ 4766 int vector_len = 2; 4767 __ evpbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len); 4768 %} 4769 ins_pipe( pipe_slow ); 4770 %} 4771 4772 instruct Repl2D_zero_evex(vecX dst, immD0 zero) %{ 4773 predicate(n->as_Vector()->length() == 2 && UseAVX > 2); 4774 match(Set dst (ReplicateD zero)); 4775 format %{ "vpxor $dst k0,$dst,$dst\t! replicate2D zero" %} 4776 ins_encode %{ 4777 // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation 4778 int vector_len = 2; 4779 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4780 %} 4781 ins_pipe( fpu_reg_reg ); 4782 %} 4783 4784 instruct Repl4D_zero_evex(vecY dst, immD0 zero) %{ 4785 predicate(n->as_Vector()->length() == 4 && UseAVX > 2); 4786 match(Set dst (ReplicateD zero)); 4787 format %{ "vpxor $dst k0,$dst,$dst\t! replicate4D zero" %} 4788 ins_encode %{ 4789 // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation 4790 int vector_len = 2; 4791 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4792 %} 4793 ins_pipe( fpu_reg_reg ); 4794 %} 4795 4796 instruct Repl8D_zero_evex(vecZ dst, immD0 zero) %{ 4797 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4798 match(Set dst (ReplicateD zero)); 4799 format %{ "vpxor $dst k0,$dst,$dst,vect512\t! replicate8D zero" %} 4800 ins_encode %{ 4801 // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation 4802 int vector_len = 2; 4803 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4804 %} 4805 ins_pipe( fpu_reg_reg ); 4806 %} 4807 4808 // ====================REDUCTION ARITHMETIC======================================= 4809 4810 instruct rsadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 4811 predicate(UseSSE > 2 && UseAVX == 0); 4812 match(Set dst (AddReductionVI src1 src2)); 4813 effect(TEMP tmp2, TEMP tmp); 4814 format %{ "movdqu $tmp2,$src2\n\t" 4815 "phaddd $tmp2,$tmp2\n\t" 4816 "movd $tmp,$src1\n\t" 4817 "paddd $tmp,$tmp2\n\t" 4818 "movd $dst,$tmp\t! add reduction2I" %} 4819 ins_encode %{ 4820 __ movdqu($tmp2$$XMMRegister, $src2$$XMMRegister); 4821 __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister); 4822 __ movdl($tmp$$XMMRegister, $src1$$Register); 4823 __ paddd($tmp$$XMMRegister, $tmp2$$XMMRegister); 4824 __ movdl($dst$$Register, $tmp$$XMMRegister); 4825 %} 4826 ins_pipe( pipe_slow ); 4827 %} 4828 4829 instruct rvadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 4830 predicate(VM_Version::supports_avxonly()); 4831 match(Set dst (AddReductionVI src1 src2)); 4832 effect(TEMP tmp, TEMP tmp2); 4833 format %{ "vphaddd $tmp,$src2,$src2\n\t" 4834 "movd $tmp2,$src1\n\t" 4835 "vpaddd $tmp2,$tmp2,$tmp\n\t" 4836 "movd $dst,$tmp2\t! add reduction2I" %} 4837 ins_encode %{ 4838 int vector_len = 0; 4839 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 4840 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4841 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 4842 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4843 %} 4844 ins_pipe( pipe_slow ); 4845 %} 4846 4847 instruct rvadd2I_reduction_reg_evex(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 4848 predicate(UseAVX > 2); 4849 match(Set dst (AddReductionVI src1 src2)); 4850 effect(TEMP tmp, TEMP tmp2); 4851 format %{ "pshufd $tmp2,$src2,0x1\n\t" 4852 "vpaddd $tmp,$src2,$tmp2\n\t" 4853 "movd $tmp2,$src1\n\t" 4854 "vpaddd $tmp2,$tmp,$tmp2\n\t" 4855 "movd $dst,$tmp2\t! add reduction2I" %} 4856 ins_encode %{ 4857 int vector_len = 0; 4858 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 4859 __ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4860 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4861 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4862 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4863 %} 4864 ins_pipe( pipe_slow ); 4865 %} 4866 4867 instruct rsadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 4868 predicate(UseSSE > 2 && UseAVX == 0); 4869 match(Set dst (AddReductionVI src1 src2)); 4870 effect(TEMP tmp, TEMP tmp2); 4871 format %{ "movdqu $tmp,$src2\n\t" 4872 "phaddd $tmp,$tmp\n\t" 4873 "phaddd $tmp,$tmp\n\t" 4874 "movd $tmp2,$src1\n\t" 4875 "paddd $tmp2,$tmp\n\t" 4876 "movd $dst,$tmp2\t! add reduction4I" %} 4877 ins_encode %{ 4878 __ movdqu($tmp$$XMMRegister, $src2$$XMMRegister); 4879 __ phaddd($tmp$$XMMRegister, $tmp$$XMMRegister); 4880 __ phaddd($tmp$$XMMRegister, $tmp$$XMMRegister); 4881 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4882 __ paddd($tmp2$$XMMRegister, $tmp$$XMMRegister); 4883 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4884 %} 4885 ins_pipe( pipe_slow ); 4886 %} 4887 4888 instruct rvadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 4889 predicate(VM_Version::supports_avxonly()); 4890 match(Set dst (AddReductionVI src1 src2)); 4891 effect(TEMP tmp, TEMP tmp2); 4892 format %{ "vphaddd $tmp,$src2,$src2\n\t" 4893 "vphaddd $tmp,$tmp,$tmp\n\t" 4894 "movd $tmp2,$src1\n\t" 4895 "vpaddd $tmp2,$tmp2,$tmp\n\t" 4896 "movd $dst,$tmp2\t! add reduction4I" %} 4897 ins_encode %{ 4898 int vector_len = 0; 4899 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 4900 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); 4901 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4902 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 4903 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4904 %} 4905 ins_pipe( pipe_slow ); 4906 %} 4907 4908 instruct rvadd4I_reduction_reg_evex(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 4909 predicate(UseAVX > 2); 4910 match(Set dst (AddReductionVI src1 src2)); 4911 effect(TEMP tmp, TEMP tmp2); 4912 format %{ "pshufd $tmp2,$src2,0xE\n\t" 4913 "vpaddd $tmp,$src2,$tmp2\n\t" 4914 "pshufd $tmp2,$tmp,0x1\n\t" 4915 "vpaddd $tmp,$tmp,$tmp2\n\t" 4916 "movd $tmp2,$src1\n\t" 4917 "vpaddd $tmp2,$tmp,$tmp2\n\t" 4918 "movd $dst,$tmp2\t! add reduction4I" %} 4919 ins_encode %{ 4920 int vector_len = 0; 4921 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 4922 __ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4923 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 4924 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4925 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4926 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4927 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4928 %} 4929 ins_pipe( pipe_slow ); 4930 %} 4931 4932 instruct rvadd8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 4933 predicate(VM_Version::supports_avxonly()); 4934 match(Set dst (AddReductionVI src1 src2)); 4935 effect(TEMP tmp, TEMP tmp2); 4936 format %{ "vphaddd $tmp,$src2,$src2\n\t" 4937 "vphaddd $tmp,$tmp,$tmp2\n\t" 4938 "vextracti128_high $tmp2,$tmp\n\t" 4939 "vpaddd $tmp,$tmp,$tmp2\n\t" 4940 "movd $tmp2,$src1\n\t" 4941 "vpaddd $tmp2,$tmp2,$tmp\n\t" 4942 "movd $dst,$tmp2\t! add reduction8I" %} 4943 ins_encode %{ 4944 int vector_len = 1; 4945 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 4946 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4947 __ vextracti128_high($tmp2$$XMMRegister, $tmp$$XMMRegister); 4948 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4949 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4950 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4951 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4952 %} 4953 ins_pipe( pipe_slow ); 4954 %} 4955 4956 instruct rvadd8I_reduction_reg_evex(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 4957 predicate(UseAVX > 2); 4958 match(Set dst (AddReductionVI src1 src2)); 4959 effect(TEMP tmp, TEMP tmp2); 4960 format %{ "vextracti128_high $tmp,$src2\n\t" 4961 "vpaddd $tmp,$tmp,$src2\n\t" 4962 "pshufd $tmp2,$tmp,0xE\n\t" 4963 "vpaddd $tmp,$tmp,$tmp2\n\t" 4964 "pshufd $tmp2,$tmp,0x1\n\t" 4965 "vpaddd $tmp,$tmp,$tmp2\n\t" 4966 "movd $tmp2,$src1\n\t" 4967 "vpaddd $tmp2,$tmp,$tmp2\n\t" 4968 "movd $dst,$tmp2\t! add reduction8I" %} 4969 ins_encode %{ 4970 int vector_len = 0; 4971 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 4972 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 4973 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 4974 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4975 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 4976 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4977 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4978 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4979 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4980 %} 4981 ins_pipe( pipe_slow ); 4982 %} 4983 4984 instruct rvadd16I_reduction_reg_evex(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ 4985 predicate(UseAVX > 2); 4986 match(Set dst (AddReductionVI src1 src2)); 4987 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 4988 format %{ "vextracti64x4_high $tmp3,$src2\n\t" 4989 "vpaddd $tmp3,$tmp3,$src2\n\t" 4990 "vextracti128_high $tmp,$tmp3\n\t" 4991 "vpaddd $tmp,$tmp,$tmp3\n\t" 4992 "pshufd $tmp2,$tmp,0xE\n\t" 4993 "vpaddd $tmp,$tmp,$tmp2\n\t" 4994 "pshufd $tmp2,$tmp,0x1\n\t" 4995 "vpaddd $tmp,$tmp,$tmp2\n\t" 4996 "movd $tmp2,$src1\n\t" 4997 "vpaddd $tmp2,$tmp,$tmp2\n\t" 4998 "movd $dst,$tmp2\t! mul reduction16I" %} 4999 ins_encode %{ 5000 __ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister); 5001 __ vpaddd($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); 5002 __ vextracti128_high($tmp$$XMMRegister, $tmp3$$XMMRegister); 5003 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); 5004 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5005 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5006 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5007 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5008 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5009 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5010 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5011 %} 5012 ins_pipe( pipe_slow ); 5013 %} 5014 5015 #ifdef _LP64 5016 instruct rvadd2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, regF tmp, regF tmp2) %{ 5017 predicate(UseAVX > 2); 5018 match(Set dst (AddReductionVL src1 src2)); 5019 effect(TEMP tmp, TEMP tmp2); 5020 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5021 "vpaddq $tmp,$src2,$tmp2\n\t" 5022 "movdq $tmp2,$src1\n\t" 5023 "vpaddq $tmp2,$tmp,$tmp2\n\t" 5024 "movdq $dst,$tmp2\t! add reduction2L" %} 5025 ins_encode %{ 5026 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5027 __ vpaddq($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, 0); 5028 __ movdq($tmp2$$XMMRegister, $src1$$Register); 5029 __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5030 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5031 %} 5032 ins_pipe( pipe_slow ); 5033 %} 5034 5035 instruct rvadd4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{ 5036 predicate(UseAVX > 2); 5037 match(Set dst (AddReductionVL src1 src2)); 5038 effect(TEMP tmp, TEMP tmp2); 5039 format %{ "vextracti128_high $tmp,$src2\n\t" 5040 "vpaddq $tmp2,$tmp,$src2\n\t" 5041 "pshufd $tmp,$tmp2,0xE\n\t" 5042 "vpaddq $tmp2,$tmp2,$tmp\n\t" 5043 "movdq $tmp,$src1\n\t" 5044 "vpaddq $tmp2,$tmp2,$tmp\n\t" 5045 "movdq $dst,$tmp2\t! add reduction4L" %} 5046 ins_encode %{ 5047 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 5048 __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); 5049 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5050 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5051 __ movdq($tmp$$XMMRegister, $src1$$Register); 5052 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5053 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5054 %} 5055 ins_pipe( pipe_slow ); 5056 %} 5057 5058 instruct rvadd8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{ 5059 predicate(UseAVX > 2); 5060 match(Set dst (AddReductionVL src1 src2)); 5061 effect(TEMP tmp, TEMP tmp2); 5062 format %{ "vextracti64x4_high $tmp2,$src2\n\t" 5063 "vpaddq $tmp2,$tmp2,$src2\n\t" 5064 "vextracti128_high $tmp,$tmp2\n\t" 5065 "vpaddq $tmp2,$tmp2,$tmp\n\t" 5066 "pshufd $tmp,$tmp2,0xE\n\t" 5067 "vpaddq $tmp2,$tmp2,$tmp\n\t" 5068 "movdq $tmp,$src1\n\t" 5069 "vpaddq $tmp2,$tmp2,$tmp\n\t" 5070 "movdq $dst,$tmp2\t! add reduction8L" %} 5071 ins_encode %{ 5072 __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5073 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 5074 __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); 5075 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5076 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5077 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5078 __ movdq($tmp$$XMMRegister, $src1$$Register); 5079 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5080 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5081 %} 5082 ins_pipe( pipe_slow ); 5083 %} 5084 #endif 5085 5086 instruct rsadd2F_reduction_reg(regF dst, vecD src2, regF tmp) %{ 5087 predicate(UseSSE >= 1 && UseAVX == 0); 5088 match(Set dst (AddReductionVF dst src2)); 5089 effect(TEMP dst, TEMP tmp); 5090 format %{ "addss $dst,$src2\n\t" 5091 "pshufd $tmp,$src2,0x01\n\t" 5092 "addss $dst,$tmp\t! add reduction2F" %} 5093 ins_encode %{ 5094 __ addss($dst$$XMMRegister, $src2$$XMMRegister); 5095 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5096 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 5097 %} 5098 ins_pipe( pipe_slow ); 5099 %} 5100 5101 instruct rvadd2F_reduction_reg(regF dst, vecD src2, regF tmp) %{ 5102 predicate(UseAVX > 0); 5103 match(Set dst (AddReductionVF dst src2)); 5104 effect(TEMP dst, TEMP tmp); 5105 format %{ "vaddss $dst,$dst,$src2\n\t" 5106 "pshufd $tmp,$src2,0x01\n\t" 5107 "vaddss $dst,$dst,$tmp\t! add reduction2F" %} 5108 ins_encode %{ 5109 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5110 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5111 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5112 %} 5113 ins_pipe( pipe_slow ); 5114 %} 5115 5116 instruct rsadd4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ 5117 predicate(UseSSE >= 1 && UseAVX == 0); 5118 match(Set dst (AddReductionVF dst src2)); 5119 effect(TEMP dst, TEMP tmp); 5120 format %{ "addss $dst,$src2\n\t" 5121 "pshufd $tmp,$src2,0x01\n\t" 5122 "addss $dst,$tmp\n\t" 5123 "pshufd $tmp,$src2,0x02\n\t" 5124 "addss $dst,$tmp\n\t" 5125 "pshufd $tmp,$src2,0x03\n\t" 5126 "addss $dst,$tmp\t! add reduction4F" %} 5127 ins_encode %{ 5128 __ addss($dst$$XMMRegister, $src2$$XMMRegister); 5129 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5130 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 5131 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5132 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 5133 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5134 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 5135 %} 5136 ins_pipe( pipe_slow ); 5137 %} 5138 5139 instruct rvadd4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ 5140 predicate(UseAVX > 0); 5141 match(Set dst (AddReductionVF dst src2)); 5142 effect(TEMP tmp, TEMP dst); 5143 format %{ "vaddss $dst,dst,$src2\n\t" 5144 "pshufd $tmp,$src2,0x01\n\t" 5145 "vaddss $dst,$dst,$tmp\n\t" 5146 "pshufd $tmp,$src2,0x02\n\t" 5147 "vaddss $dst,$dst,$tmp\n\t" 5148 "pshufd $tmp,$src2,0x03\n\t" 5149 "vaddss $dst,$dst,$tmp\t! add reduction4F" %} 5150 ins_encode %{ 5151 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5152 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5153 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5154 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5155 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5156 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5157 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5158 %} 5159 ins_pipe( pipe_slow ); 5160 %} 5161 5162 instruct radd8F_reduction_reg(regF dst, vecY src2, regF tmp, regF tmp2) %{ 5163 predicate(UseAVX > 0); 5164 match(Set dst (AddReductionVF dst src2)); 5165 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5166 format %{ "vaddss $dst,$dst,$src2\n\t" 5167 "pshufd $tmp,$src2,0x01\n\t" 5168 "vaddss $dst,$dst,$tmp\n\t" 5169 "pshufd $tmp,$src2,0x02\n\t" 5170 "vaddss $dst,$dst,$tmp\n\t" 5171 "pshufd $tmp,$src2,0x03\n\t" 5172 "vaddss $dst,$dst,$tmp\n\t" 5173 "vextractf128_high $tmp2,$src2\n\t" 5174 "vaddss $dst,$dst,$tmp2\n\t" 5175 "pshufd $tmp,$tmp2,0x01\n\t" 5176 "vaddss $dst,$dst,$tmp\n\t" 5177 "pshufd $tmp,$tmp2,0x02\n\t" 5178 "vaddss $dst,$dst,$tmp\n\t" 5179 "pshufd $tmp,$tmp2,0x03\n\t" 5180 "vaddss $dst,$dst,$tmp\t! add reduction8F" %} 5181 ins_encode %{ 5182 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5183 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5184 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5185 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5186 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5187 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5188 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5189 __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5190 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5191 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5192 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5193 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5194 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5195 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5196 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5197 %} 5198 ins_pipe( pipe_slow ); 5199 %} 5200 5201 instruct radd16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{ 5202 predicate(UseAVX > 2); 5203 match(Set dst (AddReductionVF dst src2)); 5204 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5205 format %{ "vaddss $dst,$dst,$src2\n\t" 5206 "pshufd $tmp,$src2,0x01\n\t" 5207 "vaddss $dst,$dst,$tmp\n\t" 5208 "pshufd $tmp,$src2,0x02\n\t" 5209 "vaddss $dst,$dst,$tmp\n\t" 5210 "pshufd $tmp,$src2,0x03\n\t" 5211 "vaddss $dst,$dst,$tmp\n\t" 5212 "vextractf32x4 $tmp2,$src2,0x1\n\t" 5213 "vaddss $dst,$dst,$tmp2\n\t" 5214 "pshufd $tmp,$tmp2,0x01\n\t" 5215 "vaddss $dst,$dst,$tmp\n\t" 5216 "pshufd $tmp,$tmp2,0x02\n\t" 5217 "vaddss $dst,$dst,$tmp\n\t" 5218 "pshufd $tmp,$tmp2,0x03\n\t" 5219 "vaddss $dst,$dst,$tmp\n\t" 5220 "vextractf32x4 $tmp2,$src2,0x2\n\t" 5221 "vaddss $dst,$dst,$tmp2\n\t" 5222 "pshufd $tmp,$tmp2,0x01\n\t" 5223 "vaddss $dst,$dst,$tmp\n\t" 5224 "pshufd $tmp,$tmp2,0x02\n\t" 5225 "vaddss $dst,$dst,$tmp\n\t" 5226 "pshufd $tmp,$tmp2,0x03\n\t" 5227 "vaddss $dst,$dst,$tmp\n\t" 5228 "vextractf32x4 $tmp2,$src2,0x3\n\t" 5229 "vaddss $dst,$dst,$tmp2\n\t" 5230 "pshufd $tmp,$tmp2,0x01\n\t" 5231 "vaddss $dst,$dst,$tmp\n\t" 5232 "pshufd $tmp,$tmp2,0x02\n\t" 5233 "vaddss $dst,$dst,$tmp\n\t" 5234 "pshufd $tmp,$tmp2,0x03\n\t" 5235 "vaddss $dst,$dst,$tmp\t! add reduction16F" %} 5236 ins_encode %{ 5237 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5238 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5239 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5240 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5241 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5242 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5243 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5244 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5245 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5246 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5247 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5248 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5249 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5250 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5251 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5252 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 5253 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5254 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5255 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5256 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5257 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5258 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5259 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5260 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 5261 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5262 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5263 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5264 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5265 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5266 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5267 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5268 %} 5269 ins_pipe( pipe_slow ); 5270 %} 5271 5272 instruct rsadd2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ 5273 predicate(UseSSE >= 1 && UseAVX == 0); 5274 match(Set dst (AddReductionVD dst src2)); 5275 effect(TEMP tmp, TEMP dst); 5276 format %{ "addsd $dst,$src2\n\t" 5277 "pshufd $tmp,$src2,0xE\n\t" 5278 "addsd $dst,$tmp\t! add reduction2D" %} 5279 ins_encode %{ 5280 __ addsd($dst$$XMMRegister, $src2$$XMMRegister); 5281 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5282 __ addsd($dst$$XMMRegister, $tmp$$XMMRegister); 5283 %} 5284 ins_pipe( pipe_slow ); 5285 %} 5286 5287 instruct rvadd2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ 5288 predicate(UseAVX > 0); 5289 match(Set dst (AddReductionVD dst src2)); 5290 effect(TEMP tmp, TEMP dst); 5291 format %{ "vaddsd $dst,$dst,$src2\n\t" 5292 "pshufd $tmp,$src2,0xE\n\t" 5293 "vaddsd $dst,$dst,$tmp\t! add reduction2D" %} 5294 ins_encode %{ 5295 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5296 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5297 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5298 %} 5299 ins_pipe( pipe_slow ); 5300 %} 5301 5302 instruct rvadd4D_reduction_reg(regD dst, vecY src2, regD tmp, regD tmp2) %{ 5303 predicate(UseAVX > 0); 5304 match(Set dst (AddReductionVD dst src2)); 5305 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5306 format %{ "vaddsd $dst,$dst,$src2\n\t" 5307 "pshufd $tmp,$src2,0xE\n\t" 5308 "vaddsd $dst,$dst,$tmp\n\t" 5309 "vextractf32x4 $tmp2,$src2,0x1\n\t" 5310 "vaddsd $dst,$dst,$tmp2\n\t" 5311 "pshufd $tmp,$tmp2,0xE\n\t" 5312 "vaddsd $dst,$dst,$tmp\t! add reduction4D" %} 5313 ins_encode %{ 5314 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5315 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5316 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5317 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5318 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5319 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5320 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5321 %} 5322 ins_pipe( pipe_slow ); 5323 %} 5324 5325 instruct rvadd8D_reduction_reg(regD dst, vecZ src2, regD tmp, regD tmp2) %{ 5326 predicate(UseAVX > 2); 5327 match(Set dst (AddReductionVD dst src2)); 5328 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5329 format %{ "vaddsd $dst,$dst,$src2\n\t" 5330 "pshufd $tmp,$src2,0xE\n\t" 5331 "vaddsd $dst,$dst,$tmp\n\t" 5332 "vextractf32x4 $tmp2,$src2,0x1\n\t" 5333 "vaddsd $dst,$dst,$tmp2\n\t" 5334 "pshufd $tmp,$tmp2,0xE\n\t" 5335 "vaddsd $dst,$dst,$tmp\n\t" 5336 "vextractf32x4 $tmp2,$src2,0x2\n\t" 5337 "vaddsd $dst,$dst,$tmp2\n\t" 5338 "pshufd $tmp,$tmp2,0xE\n\t" 5339 "vaddsd $dst,$dst,$tmp\n\t" 5340 "vextractf32x4 $tmp2,$src2,0x3\n\t" 5341 "vaddsd $dst,$dst,$tmp2\n\t" 5342 "pshufd $tmp,$tmp2,0xE\n\t" 5343 "vaddsd $dst,$dst,$tmp\t! add reduction8D" %} 5344 ins_encode %{ 5345 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5346 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5347 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5348 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5349 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5350 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5351 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5352 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 5353 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5354 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5355 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5356 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 5357 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5358 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5359 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5360 %} 5361 ins_pipe( pipe_slow ); 5362 %} 5363 5364 instruct rsmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 5365 predicate(UseSSE > 3 && UseAVX == 0); 5366 match(Set dst (MulReductionVI src1 src2)); 5367 effect(TEMP tmp, TEMP tmp2); 5368 format %{ "pshufd $tmp2,$src2,0x1\n\t" 5369 "pmulld $tmp2,$src2\n\t" 5370 "movd $tmp,$src1\n\t" 5371 "pmulld $tmp2,$tmp\n\t" 5372 "movd $dst,$tmp2\t! mul reduction2I" %} 5373 ins_encode %{ 5374 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5375 __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister); 5376 __ movdl($tmp$$XMMRegister, $src1$$Register); 5377 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 5378 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5379 %} 5380 ins_pipe( pipe_slow ); 5381 %} 5382 5383 instruct rvmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 5384 predicate(UseAVX > 0); 5385 match(Set dst (MulReductionVI src1 src2)); 5386 effect(TEMP tmp, TEMP tmp2); 5387 format %{ "pshufd $tmp2,$src2,0x1\n\t" 5388 "vpmulld $tmp,$src2,$tmp2\n\t" 5389 "movd $tmp2,$src1\n\t" 5390 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5391 "movd $dst,$tmp2\t! mul reduction2I" %} 5392 ins_encode %{ 5393 int vector_len = 0; 5394 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5395 __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5396 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5397 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5398 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5399 %} 5400 ins_pipe( pipe_slow ); 5401 %} 5402 5403 instruct rsmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 5404 predicate(UseSSE > 3 && UseAVX == 0); 5405 match(Set dst (MulReductionVI src1 src2)); 5406 effect(TEMP tmp, TEMP tmp2); 5407 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5408 "pmulld $tmp2,$src2\n\t" 5409 "pshufd $tmp,$tmp2,0x1\n\t" 5410 "pmulld $tmp2,$tmp\n\t" 5411 "movd $tmp,$src1\n\t" 5412 "pmulld $tmp2,$tmp\n\t" 5413 "movd $dst,$tmp2\t! mul reduction4I" %} 5414 ins_encode %{ 5415 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5416 __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister); 5417 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x1); 5418 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 5419 __ movdl($tmp$$XMMRegister, $src1$$Register); 5420 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 5421 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5422 %} 5423 ins_pipe( pipe_slow ); 5424 %} 5425 5426 instruct rvmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 5427 predicate(UseAVX > 0); 5428 match(Set dst (MulReductionVI src1 src2)); 5429 effect(TEMP tmp, TEMP tmp2); 5430 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5431 "vpmulld $tmp,$src2,$tmp2\n\t" 5432 "pshufd $tmp2,$tmp,0x1\n\t" 5433 "vpmulld $tmp,$tmp,$tmp2\n\t" 5434 "movd $tmp2,$src1\n\t" 5435 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5436 "movd $dst,$tmp2\t! mul reduction4I" %} 5437 ins_encode %{ 5438 int vector_len = 0; 5439 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5440 __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5441 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5442 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5443 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5444 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5445 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5446 %} 5447 ins_pipe( pipe_slow ); 5448 %} 5449 5450 instruct rvmul8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 5451 predicate(UseAVX > 0); 5452 match(Set dst (MulReductionVI src1 src2)); 5453 effect(TEMP tmp, TEMP tmp2); 5454 format %{ "vextracti128_high $tmp,$src2\n\t" 5455 "vpmulld $tmp,$tmp,$src2\n\t" 5456 "pshufd $tmp2,$tmp,0xE\n\t" 5457 "vpmulld $tmp,$tmp,$tmp2\n\t" 5458 "pshufd $tmp2,$tmp,0x1\n\t" 5459 "vpmulld $tmp,$tmp,$tmp2\n\t" 5460 "movd $tmp2,$src1\n\t" 5461 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5462 "movd $dst,$tmp2\t! mul reduction8I" %} 5463 ins_encode %{ 5464 int vector_len = 0; 5465 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 5466 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 5467 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5468 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5469 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5470 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5471 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5472 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5473 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5474 %} 5475 ins_pipe( pipe_slow ); 5476 %} 5477 5478 instruct rvmul16I_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ 5479 predicate(UseAVX > 2); 5480 match(Set dst (MulReductionVI src1 src2)); 5481 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 5482 format %{ "vextracti64x4_high $tmp3,$src2\n\t" 5483 "vpmulld $tmp3,$tmp3,$src2\n\t" 5484 "vextracti128_high $tmp,$tmp3\n\t" 5485 "vpmulld $tmp,$tmp,$src2\n\t" 5486 "pshufd $tmp2,$tmp,0xE\n\t" 5487 "vpmulld $tmp,$tmp,$tmp2\n\t" 5488 "pshufd $tmp2,$tmp,0x1\n\t" 5489 "vpmulld $tmp,$tmp,$tmp2\n\t" 5490 "movd $tmp2,$src1\n\t" 5491 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5492 "movd $dst,$tmp2\t! mul reduction16I" %} 5493 ins_encode %{ 5494 __ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister); 5495 __ vpmulld($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); 5496 __ vextracti128_high($tmp$$XMMRegister, $tmp3$$XMMRegister); 5497 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); 5498 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5499 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5500 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5501 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5502 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5503 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5504 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5505 %} 5506 ins_pipe( pipe_slow ); 5507 %} 5508 5509 #ifdef _LP64 5510 instruct rvmul2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, regF tmp, regF tmp2) %{ 5511 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 5512 match(Set dst (MulReductionVL src1 src2)); 5513 effect(TEMP tmp, TEMP tmp2); 5514 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5515 "vpmullq $tmp,$src2,$tmp2\n\t" 5516 "movdq $tmp2,$src1\n\t" 5517 "vpmullq $tmp2,$tmp,$tmp2\n\t" 5518 "movdq $dst,$tmp2\t! mul reduction2L" %} 5519 ins_encode %{ 5520 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5521 __ vpmullq($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, 0); 5522 __ movdq($tmp2$$XMMRegister, $src1$$Register); 5523 __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5524 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5525 %} 5526 ins_pipe( pipe_slow ); 5527 %} 5528 5529 instruct rvmul4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{ 5530 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 5531 match(Set dst (MulReductionVL src1 src2)); 5532 effect(TEMP tmp, TEMP tmp2); 5533 format %{ "vextracti128_high $tmp,$src2\n\t" 5534 "vpmullq $tmp2,$tmp,$src2\n\t" 5535 "pshufd $tmp,$tmp2,0xE\n\t" 5536 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5537 "movdq $tmp,$src1\n\t" 5538 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5539 "movdq $dst,$tmp2\t! mul reduction4L" %} 5540 ins_encode %{ 5541 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 5542 __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); 5543 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5544 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5545 __ movdq($tmp$$XMMRegister, $src1$$Register); 5546 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5547 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5548 %} 5549 ins_pipe( pipe_slow ); 5550 %} 5551 5552 instruct rvmul8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{ 5553 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 5554 match(Set dst (MulReductionVL src1 src2)); 5555 effect(TEMP tmp, TEMP tmp2); 5556 format %{ "vextracti64x4_high $tmp2,$src2\n\t" 5557 "vpmullq $tmp2,$tmp2,$src2\n\t" 5558 "vextracti128_high $tmp,$tmp2\n\t" 5559 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5560 "pshufd $tmp,$tmp2,0xE\n\t" 5561 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5562 "movdq $tmp,$src1\n\t" 5563 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5564 "movdq $dst,$tmp2\t! mul reduction8L" %} 5565 ins_encode %{ 5566 __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5567 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 5568 __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); 5569 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5570 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5571 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5572 __ movdq($tmp$$XMMRegister, $src1$$Register); 5573 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5574 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5575 %} 5576 ins_pipe( pipe_slow ); 5577 %} 5578 #endif 5579 5580 instruct rsmul2F_reduction(regF dst, vecD src2, regF tmp) %{ 5581 predicate(UseSSE >= 1 && UseAVX == 0); 5582 match(Set dst (MulReductionVF dst src2)); 5583 effect(TEMP dst, TEMP tmp); 5584 format %{ "mulss $dst,$src2\n\t" 5585 "pshufd $tmp,$src2,0x01\n\t" 5586 "mulss $dst,$tmp\t! mul reduction2F" %} 5587 ins_encode %{ 5588 __ mulss($dst$$XMMRegister, $src2$$XMMRegister); 5589 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5590 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5591 %} 5592 ins_pipe( pipe_slow ); 5593 %} 5594 5595 instruct rvmul2F_reduction_reg(regF dst, vecD src2, regF tmp) %{ 5596 predicate(UseAVX > 0); 5597 match(Set dst (MulReductionVF dst src2)); 5598 effect(TEMP tmp, TEMP dst); 5599 format %{ "vmulss $dst,$dst,$src2\n\t" 5600 "pshufd $tmp,$src2,0x01\n\t" 5601 "vmulss $dst,$dst,$tmp\t! mul reduction2F" %} 5602 ins_encode %{ 5603 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5604 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5605 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5606 %} 5607 ins_pipe( pipe_slow ); 5608 %} 5609 5610 instruct rsmul4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ 5611 predicate(UseSSE >= 1 && UseAVX == 0); 5612 match(Set dst (MulReductionVF dst src2)); 5613 effect(TEMP dst, TEMP tmp); 5614 format %{ "mulss $dst,$src2\n\t" 5615 "pshufd $tmp,$src2,0x01\n\t" 5616 "mulss $dst,$tmp\n\t" 5617 "pshufd $tmp,$src2,0x02\n\t" 5618 "mulss $dst,$tmp\n\t" 5619 "pshufd $tmp,$src2,0x03\n\t" 5620 "mulss $dst,$tmp\t! mul reduction4F" %} 5621 ins_encode %{ 5622 __ mulss($dst$$XMMRegister, $src2$$XMMRegister); 5623 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5624 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5625 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5626 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5627 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5628 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5629 %} 5630 ins_pipe( pipe_slow ); 5631 %} 5632 5633 instruct rvmul4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ 5634 predicate(UseAVX > 0); 5635 match(Set dst (MulReductionVF dst src2)); 5636 effect(TEMP tmp, TEMP dst); 5637 format %{ "vmulss $dst,$dst,$src2\n\t" 5638 "pshufd $tmp,$src2,0x01\n\t" 5639 "vmulss $dst,$dst,$tmp\n\t" 5640 "pshufd $tmp,$src2,0x02\n\t" 5641 "vmulss $dst,$dst,$tmp\n\t" 5642 "pshufd $tmp,$src2,0x03\n\t" 5643 "vmulss $dst,$dst,$tmp\t! mul reduction4F" %} 5644 ins_encode %{ 5645 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5646 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5647 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5648 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5649 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5650 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5651 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5652 %} 5653 ins_pipe( pipe_slow ); 5654 %} 5655 5656 instruct rvmul8F_reduction_reg(regF dst, vecY src2, regF tmp, regF tmp2) %{ 5657 predicate(UseAVX > 0); 5658 match(Set dst (MulReductionVF dst src2)); 5659 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5660 format %{ "vmulss $dst,$dst,$src2\n\t" 5661 "pshufd $tmp,$src2,0x01\n\t" 5662 "vmulss $dst,$dst,$tmp\n\t" 5663 "pshufd $tmp,$src2,0x02\n\t" 5664 "vmulss $dst,$dst,$tmp\n\t" 5665 "pshufd $tmp,$src2,0x03\n\t" 5666 "vmulss $dst,$dst,$tmp\n\t" 5667 "vextractf128_high $tmp2,$src2\n\t" 5668 "vmulss $dst,$dst,$tmp2\n\t" 5669 "pshufd $tmp,$tmp2,0x01\n\t" 5670 "vmulss $dst,$dst,$tmp\n\t" 5671 "pshufd $tmp,$tmp2,0x02\n\t" 5672 "vmulss $dst,$dst,$tmp\n\t" 5673 "pshufd $tmp,$tmp2,0x03\n\t" 5674 "vmulss $dst,$dst,$tmp\t! mul reduction8F" %} 5675 ins_encode %{ 5676 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5677 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5678 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5679 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5680 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5681 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5682 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5683 __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5684 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5685 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5686 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5687 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5688 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5689 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5690 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5691 %} 5692 ins_pipe( pipe_slow ); 5693 %} 5694 5695 instruct rvmul16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{ 5696 predicate(UseAVX > 2); 5697 match(Set dst (MulReductionVF dst src2)); 5698 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5699 format %{ "vmulss $dst,$dst,$src2\n\t" 5700 "pshufd $tmp,$src2,0x01\n\t" 5701 "vmulss $dst,$dst,$tmp\n\t" 5702 "pshufd $tmp,$src2,0x02\n\t" 5703 "vmulss $dst,$dst,$tmp\n\t" 5704 "pshufd $tmp,$src2,0x03\n\t" 5705 "vmulss $dst,$dst,$tmp\n\t" 5706 "vextractf32x4 $tmp2,$src2,0x1\n\t" 5707 "vmulss $dst,$dst,$tmp2\n\t" 5708 "pshufd $tmp,$tmp2,0x01\n\t" 5709 "vmulss $dst,$dst,$tmp\n\t" 5710 "pshufd $tmp,$tmp2,0x02\n\t" 5711 "vmulss $dst,$dst,$tmp\n\t" 5712 "pshufd $tmp,$tmp2,0x03\n\t" 5713 "vmulss $dst,$dst,$tmp\n\t" 5714 "vextractf32x4 $tmp2,$src2,0x2\n\t" 5715 "vmulss $dst,$dst,$tmp2\n\t" 5716 "pshufd $tmp,$tmp2,0x01\n\t" 5717 "vmulss $dst,$dst,$tmp\n\t" 5718 "pshufd $tmp,$tmp2,0x02\n\t" 5719 "vmulss $dst,$dst,$tmp\n\t" 5720 "pshufd $tmp,$tmp2,0x03\n\t" 5721 "vmulss $dst,$dst,$tmp\n\t" 5722 "vextractf32x4 $tmp2,$src2,0x3\n\t" 5723 "vmulss $dst,$dst,$tmp2\n\t" 5724 "pshufd $tmp,$tmp2,0x01\n\t" 5725 "vmulss $dst,$dst,$tmp\n\t" 5726 "pshufd $tmp,$tmp2,0x02\n\t" 5727 "vmulss $dst,$dst,$tmp\n\t" 5728 "pshufd $tmp,$tmp2,0x03\n\t" 5729 "vmulss $dst,$dst,$tmp\t! mul reduction16F" %} 5730 ins_encode %{ 5731 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5732 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5733 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5734 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5735 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5736 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5737 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5738 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5739 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5740 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5741 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5742 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5743 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5744 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5745 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5746 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 5747 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5748 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5749 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5750 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5751 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5752 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5753 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5754 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 5755 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5756 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5757 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5758 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5759 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5760 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5761 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5762 %} 5763 ins_pipe( pipe_slow ); 5764 %} 5765 5766 instruct rsmul2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ 5767 predicate(UseSSE >= 1 && UseAVX == 0); 5768 match(Set dst (MulReductionVD dst src2)); 5769 effect(TEMP dst, TEMP tmp); 5770 format %{ "mulsd $dst,$src2\n\t" 5771 "pshufd $tmp,$src2,0xE\n\t" 5772 "mulsd $dst,$tmp\t! mul reduction2D" %} 5773 ins_encode %{ 5774 __ mulsd($dst$$XMMRegister, $src2$$XMMRegister); 5775 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5776 __ mulsd($dst$$XMMRegister, $tmp$$XMMRegister); 5777 %} 5778 ins_pipe( pipe_slow ); 5779 %} 5780 5781 instruct rvmul2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ 5782 predicate(UseAVX > 0); 5783 match(Set dst (MulReductionVD dst src2)); 5784 effect(TEMP tmp, TEMP dst); 5785 format %{ "vmulsd $dst,$dst,$src2\n\t" 5786 "pshufd $tmp,$src2,0xE\n\t" 5787 "vmulsd $dst,$dst,$tmp\t! mul reduction2D" %} 5788 ins_encode %{ 5789 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5790 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5791 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5792 %} 5793 ins_pipe( pipe_slow ); 5794 %} 5795 5796 instruct rvmul4D_reduction_reg(regD dst, vecY src2, regD tmp, regD tmp2) %{ 5797 predicate(UseAVX > 0); 5798 match(Set dst (MulReductionVD dst src2)); 5799 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5800 format %{ "vmulsd $dst,$dst,$src2\n\t" 5801 "pshufd $tmp,$src2,0xE\n\t" 5802 "vmulsd $dst,$dst,$tmp\n\t" 5803 "vextractf128_high $tmp2,$src2\n\t" 5804 "vmulsd $dst,$dst,$tmp2\n\t" 5805 "pshufd $tmp,$tmp2,0xE\n\t" 5806 "vmulsd $dst,$dst,$tmp\t! mul reduction4D" %} 5807 ins_encode %{ 5808 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5809 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5810 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5811 __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5812 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5813 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5814 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5815 %} 5816 ins_pipe( pipe_slow ); 5817 %} 5818 5819 instruct rvmul8D_reduction_reg(regD dst, vecZ src2, regD tmp, regD tmp2) %{ 5820 predicate(UseAVX > 2); 5821 match(Set dst (MulReductionVD dst src2)); 5822 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5823 format %{ "vmulsd $dst,$dst,$src2\n\t" 5824 "pshufd $tmp,$src2,0xE\n\t" 5825 "vmulsd $dst,$dst,$tmp\n\t" 5826 "vextractf32x4 $tmp2,$src2,0x1\n\t" 5827 "vmulsd $dst,$dst,$tmp2\n\t" 5828 "pshufd $tmp,$src2,0xE\n\t" 5829 "vmulsd $dst,$dst,$tmp\n\t" 5830 "vextractf32x4 $tmp2,$src2,0x2\n\t" 5831 "vmulsd $dst,$dst,$tmp2\n\t" 5832 "pshufd $tmp,$tmp2,0xE\n\t" 5833 "vmulsd $dst,$dst,$tmp\n\t" 5834 "vextractf32x4 $tmp2,$src2,0x3\n\t" 5835 "vmulsd $dst,$dst,$tmp2\n\t" 5836 "pshufd $tmp,$tmp2,0xE\n\t" 5837 "vmulsd $dst,$dst,$tmp\t! mul reduction8D" %} 5838 ins_encode %{ 5839 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5840 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5841 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5842 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5843 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5844 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5845 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5846 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 5847 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5848 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5849 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5850 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 5851 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5852 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5853 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5854 %} 5855 ins_pipe( pipe_slow ); 5856 %} 5857 5858 // ====================VECTOR ARITHMETIC======================================= 5859 5860 // --------------------------------- ADD -------------------------------------- 5861 5862 // Bytes vector add 5863 instruct vadd4B(vecS dst, vecS src) %{ 5864 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 5865 match(Set dst (AddVB dst src)); 5866 format %{ "paddb $dst,$src\t! add packed4B" %} 5867 ins_encode %{ 5868 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5869 %} 5870 ins_pipe( pipe_slow ); 5871 %} 5872 5873 instruct vadd4B_reg_avx(vecS dst, vecS src1, vecS src2) %{ 5874 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 5875 match(Set dst (AddVB src1 src2)); 5876 format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %} 5877 ins_encode %{ 5878 int vector_len = 0; 5879 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5880 %} 5881 ins_pipe( pipe_slow ); 5882 %} 5883 5884 instruct vadd4B_reg_evex(vecS dst, vecS src1, vecS src2) %{ 5885 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 5886 match(Set dst (AddVB src1 src2)); 5887 format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %} 5888 ins_encode %{ 5889 int vector_len = 0; 5890 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5891 %} 5892 ins_pipe( pipe_slow ); 5893 %} 5894 5895 instruct vadd4B_reg_evex_special(vecS dst, vecS src1, vecS src2) %{ 5896 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 5897 match(Set dst (AddVB dst src2)); 5898 effect(TEMP src1); 5899 format %{ "vpaddb $dst,$dst,$src2\t! add packed4B" %} 5900 ins_encode %{ 5901 int vector_len = 0; 5902 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5903 %} 5904 ins_pipe( pipe_slow ); 5905 %} 5906 5907 instruct vadd4B_mem_avx(vecS dst, vecS src, memory mem) %{ 5908 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 5909 match(Set dst (AddVB src (LoadVector mem))); 5910 format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %} 5911 ins_encode %{ 5912 int vector_len = 0; 5913 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5914 %} 5915 ins_pipe( pipe_slow ); 5916 %} 5917 5918 instruct vadd4B_mem_evex(vecS dst, vecS src, memory mem) %{ 5919 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 5920 match(Set dst (AddVB src (LoadVector mem))); 5921 format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %} 5922 ins_encode %{ 5923 int vector_len = 0; 5924 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5925 %} 5926 ins_pipe( pipe_slow ); 5927 %} 5928 5929 instruct vadd4B_mem_evex_special(vecS dst, vecS src, memory mem) %{ 5930 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 5931 match(Set dst (AddVB dst (LoadVector mem))); 5932 effect(TEMP src); 5933 format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %} 5934 ins_encode %{ 5935 int vector_len = 0; 5936 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5937 %} 5938 ins_pipe( pipe_slow ); 5939 %} 5940 5941 instruct vadd8B(vecD dst, vecD src) %{ 5942 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 5943 match(Set dst (AddVB dst src)); 5944 format %{ "paddb $dst,$src\t! add packed8B" %} 5945 ins_encode %{ 5946 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5947 %} 5948 ins_pipe( pipe_slow ); 5949 %} 5950 5951 instruct vadd8B_reg_avx(vecD dst, vecD src1, vecD src2) %{ 5952 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 5953 match(Set dst (AddVB src1 src2)); 5954 format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %} 5955 ins_encode %{ 5956 int vector_len = 0; 5957 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5958 %} 5959 ins_pipe( pipe_slow ); 5960 %} 5961 5962 instruct vadd8B_reg_evex(vecD dst, vecD src1, vecD src2) %{ 5963 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 5964 match(Set dst (AddVB src1 src2)); 5965 format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %} 5966 ins_encode %{ 5967 int vector_len = 0; 5968 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5969 %} 5970 ins_pipe( pipe_slow ); 5971 %} 5972 5973 instruct vadd8B_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 5974 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 5975 match(Set dst (AddVB dst src2)); 5976 effect(TEMP src1); 5977 format %{ "vpaddb $dst,$dst,$src2\t! add packed8B" %} 5978 ins_encode %{ 5979 int vector_len = 0; 5980 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5981 %} 5982 ins_pipe( pipe_slow ); 5983 %} 5984 5985 instruct vadd8B_mem_avx(vecD dst, vecD src, memory mem) %{ 5986 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 5987 match(Set dst (AddVB src (LoadVector mem))); 5988 format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %} 5989 ins_encode %{ 5990 int vector_len = 0; 5991 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5992 %} 5993 ins_pipe( pipe_slow ); 5994 %} 5995 5996 instruct vadd8B_mem_evex(vecD dst, vecD src, memory mem) %{ 5997 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 5998 match(Set dst (AddVB src (LoadVector mem))); 5999 format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %} 6000 ins_encode %{ 6001 int vector_len = 0; 6002 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6003 %} 6004 ins_pipe( pipe_slow ); 6005 %} 6006 6007 instruct vadd8B_mem_evex_special(vecD dst, vecD src, memory mem) %{ 6008 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 6009 match(Set dst (AddVB dst (LoadVector mem))); 6010 effect(TEMP src); 6011 format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %} 6012 ins_encode %{ 6013 int vector_len = 0; 6014 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6015 %} 6016 ins_pipe( pipe_slow ); 6017 %} 6018 6019 instruct vadd16B(vecX dst, vecX src) %{ 6020 predicate(UseAVX == 0 && n->as_Vector()->length() == 16); 6021 match(Set dst (AddVB dst src)); 6022 format %{ "paddb $dst,$src\t! add packed16B" %} 6023 ins_encode %{ 6024 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 6025 %} 6026 ins_pipe( pipe_slow ); 6027 %} 6028 6029 instruct vadd16B_reg_avx(vecX dst, vecX src1, vecX src2) %{ 6030 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); 6031 match(Set dst (AddVB src1 src2)); 6032 format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %} 6033 ins_encode %{ 6034 int vector_len = 0; 6035 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6036 %} 6037 ins_pipe( pipe_slow ); 6038 %} 6039 6040 instruct vadd16B_reg_evex(vecX dst, vecX src1, vecX src2) %{ 6041 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 6042 match(Set dst (AddVB src1 src2)); 6043 format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %} 6044 ins_encode %{ 6045 int vector_len = 0; 6046 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6047 %} 6048 ins_pipe( pipe_slow ); 6049 %} 6050 6051 instruct vadd16B_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 6052 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 6053 match(Set dst (AddVB dst src2)); 6054 effect(TEMP src1); 6055 format %{ "vpaddb $dst,$dst,$src2\t! add packed16B" %} 6056 ins_encode %{ 6057 int vector_len = 0; 6058 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6059 %} 6060 ins_pipe( pipe_slow ); 6061 %} 6062 6063 instruct vadd16B_mem_avx(vecX dst, vecX src, memory mem) %{ 6064 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); 6065 match(Set dst (AddVB src (LoadVector mem))); 6066 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} 6067 ins_encode %{ 6068 int vector_len = 0; 6069 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6070 %} 6071 ins_pipe( pipe_slow ); 6072 %} 6073 6074 instruct vadd16B_mem_evex(vecX dst, vecX src, memory mem) %{ 6075 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 6076 match(Set dst (AddVB src (LoadVector mem))); 6077 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} 6078 ins_encode %{ 6079 int vector_len = 0; 6080 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6081 %} 6082 ins_pipe( pipe_slow ); 6083 %} 6084 6085 instruct vadd16B_mem_evex_special(vecX dst, vecX src, memory mem) %{ 6086 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 6087 match(Set dst (AddVB dst (LoadVector mem))); 6088 effect(TEMP src); 6089 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} 6090 ins_encode %{ 6091 int vector_len = 0; 6092 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6093 %} 6094 ins_pipe( pipe_slow ); 6095 %} 6096 6097 instruct vadd32B_reg_avx(vecY dst, vecY src1, vecY src2) %{ 6098 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32); 6099 match(Set dst (AddVB src1 src2)); 6100 format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %} 6101 ins_encode %{ 6102 int vector_len = 1; 6103 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6104 %} 6105 ins_pipe( pipe_slow ); 6106 %} 6107 6108 instruct vadd32B_reg_evex(vecY dst, vecY src1, vecY src2) %{ 6109 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6110 match(Set dst (AddVB src1 src2)); 6111 format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %} 6112 ins_encode %{ 6113 int vector_len = 1; 6114 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6115 %} 6116 ins_pipe( pipe_slow ); 6117 %} 6118 6119 instruct vadd32B_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 6120 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32); 6121 match(Set dst (AddVB dst src2)); 6122 effect(TEMP src1); 6123 format %{ "vpaddb $dst,$dst,$src2\t! add packed32B" %} 6124 ins_encode %{ 6125 int vector_len = 1; 6126 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6127 %} 6128 ins_pipe( pipe_slow ); 6129 %} 6130 6131 instruct vadd32B_mem_avx(vecY dst, vecY src, memory mem) %{ 6132 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32); 6133 match(Set dst (AddVB src (LoadVector mem))); 6134 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} 6135 ins_encode %{ 6136 int vector_len = 1; 6137 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6138 %} 6139 ins_pipe( pipe_slow ); 6140 %} 6141 6142 instruct vadd32B_mem_evex(vecY dst, vecY src, memory mem) %{ 6143 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6144 match(Set dst (AddVB src (LoadVector mem))); 6145 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} 6146 ins_encode %{ 6147 int vector_len = 1; 6148 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6149 %} 6150 ins_pipe( pipe_slow ); 6151 %} 6152 6153 instruct vadd32B_mem_evex_special(vecY dst, vecY src, memory mem) %{ 6154 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6155 match(Set dst (AddVB dst (LoadVector mem))); 6156 effect(TEMP src); 6157 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} 6158 ins_encode %{ 6159 int vector_len = 1; 6160 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6161 %} 6162 ins_pipe( pipe_slow ); 6163 %} 6164 6165 instruct vadd64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6166 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 6167 match(Set dst (AddVB src1 src2)); 6168 format %{ "vpaddb $dst,$src1,$src2\t! add packed64B" %} 6169 ins_encode %{ 6170 int vector_len = 2; 6171 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6172 %} 6173 ins_pipe( pipe_slow ); 6174 %} 6175 6176 instruct vadd64B_mem(vecZ dst, vecZ src, memory mem) %{ 6177 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 6178 match(Set dst (AddVB src (LoadVector mem))); 6179 format %{ "vpaddb $dst,$src,$mem\t! add packed64B" %} 6180 ins_encode %{ 6181 int vector_len = 2; 6182 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6183 %} 6184 ins_pipe( pipe_slow ); 6185 %} 6186 6187 // Shorts/Chars vector add 6188 instruct vadd2S(vecS dst, vecS src) %{ 6189 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 6190 match(Set dst (AddVS dst src)); 6191 format %{ "paddw $dst,$src\t! add packed2S" %} 6192 ins_encode %{ 6193 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 6194 %} 6195 ins_pipe( pipe_slow ); 6196 %} 6197 6198 instruct vadd2S_reg_avx(vecS dst, vecS src1, vecS src2) %{ 6199 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 6200 match(Set dst (AddVS src1 src2)); 6201 format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %} 6202 ins_encode %{ 6203 int vector_len = 0; 6204 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6205 %} 6206 ins_pipe( pipe_slow ); 6207 %} 6208 6209 instruct vadd2S_reg_evex(vecS dst, vecS src1, vecS src2) %{ 6210 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 6211 match(Set dst (AddVS src1 src2)); 6212 format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %} 6213 ins_encode %{ 6214 int vector_len = 0; 6215 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6216 %} 6217 ins_pipe( pipe_slow ); 6218 %} 6219 6220 instruct vadd2S_reg_evex_special(vecS dst, vecS src1, vecS src2) %{ 6221 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 6222 match(Set dst (AddVS dst src2)); 6223 effect(TEMP src1); 6224 format %{ "vpaddw $dst,$dst,$src2\t! add packed2S" %} 6225 ins_encode %{ 6226 int vector_len = 0; 6227 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6228 %} 6229 ins_pipe( pipe_slow ); 6230 %} 6231 6232 instruct vadd2S_mem_avx(vecS dst, vecS src, memory mem) %{ 6233 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 6234 match(Set dst (AddVS src (LoadVector mem))); 6235 format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %} 6236 ins_encode %{ 6237 int vector_len = 0; 6238 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6239 %} 6240 ins_pipe( pipe_slow ); 6241 %} 6242 6243 instruct vadd2S_mem_evex(vecS dst, vecS src, memory mem) %{ 6244 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 6245 match(Set dst (AddVS src (LoadVector mem))); 6246 format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %} 6247 ins_encode %{ 6248 int vector_len = 0; 6249 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6250 %} 6251 ins_pipe( pipe_slow ); 6252 %} 6253 6254 instruct vadd2S_mem_evex_special(vecS dst, vecS src, memory mem) %{ 6255 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 6256 match(Set dst (AddVS dst (LoadVector mem))); 6257 effect(TEMP src); 6258 format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %} 6259 ins_encode %{ 6260 int vector_len = 0; 6261 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6262 %} 6263 ins_pipe( pipe_slow ); 6264 %} 6265 6266 instruct vadd4S(vecD dst, vecD src) %{ 6267 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 6268 match(Set dst (AddVS dst src)); 6269 format %{ "paddw $dst,$src\t! add packed4S" %} 6270 ins_encode %{ 6271 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 6272 %} 6273 ins_pipe( pipe_slow ); 6274 %} 6275 6276 instruct vadd4S_reg_avx(vecD dst, vecD src1, vecD src2) %{ 6277 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 6278 match(Set dst (AddVS src1 src2)); 6279 format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %} 6280 ins_encode %{ 6281 int vector_len = 0; 6282 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6283 %} 6284 ins_pipe( pipe_slow ); 6285 %} 6286 6287 instruct vadd4S_reg_evex(vecD dst, vecD src1, vecD src2) %{ 6288 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 6289 match(Set dst (AddVS src1 src2)); 6290 format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %} 6291 ins_encode %{ 6292 int vector_len = 0; 6293 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6294 %} 6295 ins_pipe( pipe_slow ); 6296 %} 6297 6298 instruct vadd4S_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 6299 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 6300 match(Set dst (AddVS dst src2)); 6301 effect(TEMP src1); 6302 format %{ "vpaddw $dst,$dst,$src2\t! add packed4S" %} 6303 ins_encode %{ 6304 int vector_len = 0; 6305 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6306 %} 6307 ins_pipe( pipe_slow ); 6308 %} 6309 6310 instruct vadd4S_mem_avx(vecD dst, vecD src, memory mem) %{ 6311 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 6312 match(Set dst (AddVS src (LoadVector mem))); 6313 format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %} 6314 ins_encode %{ 6315 int vector_len = 0; 6316 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6317 %} 6318 ins_pipe( pipe_slow ); 6319 %} 6320 6321 instruct vadd4S_mem_evex(vecD dst, vecD src, memory mem) %{ 6322 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 6323 match(Set dst (AddVS src (LoadVector mem))); 6324 format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %} 6325 ins_encode %{ 6326 int vector_len = 0; 6327 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6328 %} 6329 ins_pipe( pipe_slow ); 6330 %} 6331 6332 instruct vadd4S_mem_evex_special(vecD dst, vecD src, memory mem) %{ 6333 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 6334 match(Set dst (AddVS dst (LoadVector mem))); 6335 effect(TEMP src); 6336 format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %} 6337 ins_encode %{ 6338 int vector_len = 0; 6339 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6340 %} 6341 ins_pipe( pipe_slow ); 6342 %} 6343 6344 instruct vadd8S(vecX dst, vecX src) %{ 6345 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 6346 match(Set dst (AddVS dst src)); 6347 format %{ "paddw $dst,$src\t! add packed8S" %} 6348 ins_encode %{ 6349 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 6350 %} 6351 ins_pipe( pipe_slow ); 6352 %} 6353 6354 instruct vadd8S_reg_avx(vecX dst, vecX src1, vecX src2) %{ 6355 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 6356 match(Set dst (AddVS src1 src2)); 6357 format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %} 6358 ins_encode %{ 6359 int vector_len = 0; 6360 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6361 %} 6362 ins_pipe( pipe_slow ); 6363 %} 6364 6365 instruct vadd8S_reg_evex(vecX dst, vecX src1, vecX src2) %{ 6366 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 6367 match(Set dst (AddVS src1 src2)); 6368 format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %} 6369 ins_encode %{ 6370 int vector_len = 0; 6371 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6372 %} 6373 ins_pipe( pipe_slow ); 6374 %} 6375 6376 instruct vadd8S_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 6377 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 6378 match(Set dst (AddVS dst src2)); 6379 effect(TEMP src1); 6380 format %{ "vpaddw $dst,$dst,$src2\t! add packed8S" %} 6381 ins_encode %{ 6382 int vector_len = 0; 6383 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6384 %} 6385 ins_pipe( pipe_slow ); 6386 %} 6387 6388 instruct vadd8S_mem_avx(vecX dst, vecX src, memory mem) %{ 6389 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 6390 match(Set dst (AddVS src (LoadVector mem))); 6391 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} 6392 ins_encode %{ 6393 int vector_len = 0; 6394 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6395 %} 6396 ins_pipe( pipe_slow ); 6397 %} 6398 6399 instruct vadd8S_mem_evex(vecX dst, vecX src, memory mem) %{ 6400 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 6401 match(Set dst (AddVS src (LoadVector mem))); 6402 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} 6403 ins_encode %{ 6404 int vector_len = 0; 6405 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6406 %} 6407 ins_pipe( pipe_slow ); 6408 %} 6409 6410 instruct vadd8S_mem_evex_special(vecX dst, vecX src, memory mem) %{ 6411 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 6412 match(Set dst (AddVS dst (LoadVector mem))); 6413 effect(TEMP src); 6414 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} 6415 ins_encode %{ 6416 int vector_len = 0; 6417 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6418 %} 6419 ins_pipe( pipe_slow ); 6420 %} 6421 6422 instruct vadd16S_reg_avx(vecY dst, vecY src1, vecY src2) %{ 6423 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 6424 match(Set dst (AddVS src1 src2)); 6425 format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %} 6426 ins_encode %{ 6427 int vector_len = 1; 6428 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6429 %} 6430 ins_pipe( pipe_slow ); 6431 %} 6432 6433 instruct vadd16S_reg_evex(vecY dst, vecY src1, vecY src2) %{ 6434 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 6435 match(Set dst (AddVS src1 src2)); 6436 format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %} 6437 ins_encode %{ 6438 int vector_len = 1; 6439 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6440 %} 6441 ins_pipe( pipe_slow ); 6442 %} 6443 6444 instruct vadd16S_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 6445 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 6446 match(Set dst (AddVS dst src2)); 6447 effect(TEMP src1); 6448 format %{ "vpaddw $dst,$dst,$src2\t! add packed16S" %} 6449 ins_encode %{ 6450 int vector_len = 1; 6451 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6452 %} 6453 ins_pipe( pipe_slow ); 6454 %} 6455 6456 instruct vadd16S_mem_avx(vecY dst, vecY src, memory mem) %{ 6457 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 6458 match(Set dst (AddVS src (LoadVector mem))); 6459 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} 6460 ins_encode %{ 6461 int vector_len = 1; 6462 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6463 %} 6464 ins_pipe( pipe_slow ); 6465 %} 6466 6467 instruct vadd16S_mem_evex(vecY dst, vecY src, memory mem) %{ 6468 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 6469 match(Set dst (AddVS src (LoadVector mem))); 6470 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} 6471 ins_encode %{ 6472 int vector_len = 1; 6473 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6474 %} 6475 ins_pipe( pipe_slow ); 6476 %} 6477 6478 instruct vadd16S_mem_evex_special(vecY dst, vecY src, memory mem) %{ 6479 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 6480 match(Set dst (AddVS dst (LoadVector mem))); 6481 effect(TEMP src); 6482 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} 6483 ins_encode %{ 6484 int vector_len = 1; 6485 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6486 %} 6487 ins_pipe( pipe_slow ); 6488 %} 6489 6490 instruct vadd32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6491 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6492 match(Set dst (AddVS src1 src2)); 6493 format %{ "vpaddw $dst,$src1,$src2\t! add packed32S" %} 6494 ins_encode %{ 6495 int vector_len = 2; 6496 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6497 %} 6498 ins_pipe( pipe_slow ); 6499 %} 6500 6501 instruct vadd32S_mem(vecZ dst, vecZ src, memory mem) %{ 6502 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6503 match(Set dst (AddVS src (LoadVector mem))); 6504 format %{ "vpaddw $dst,$src,$mem\t! add packed32S" %} 6505 ins_encode %{ 6506 int vector_len = 2; 6507 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6508 %} 6509 ins_pipe( pipe_slow ); 6510 %} 6511 6512 // Integers vector add 6513 instruct vadd2I(vecD dst, vecD src) %{ 6514 predicate(n->as_Vector()->length() == 2); 6515 match(Set dst (AddVI dst src)); 6516 format %{ "paddd $dst,$src\t! add packed2I" %} 6517 ins_encode %{ 6518 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 6519 %} 6520 ins_pipe( pipe_slow ); 6521 %} 6522 6523 instruct vadd2I_reg(vecD dst, vecD src1, vecD src2) %{ 6524 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6525 match(Set dst (AddVI src1 src2)); 6526 format %{ "vpaddd $dst,$src1,$src2\t! add packed2I" %} 6527 ins_encode %{ 6528 int vector_len = 0; 6529 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6530 %} 6531 ins_pipe( pipe_slow ); 6532 %} 6533 6534 instruct vadd2I_mem(vecD dst, vecD src, memory mem) %{ 6535 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6536 match(Set dst (AddVI src (LoadVector mem))); 6537 format %{ "vpaddd $dst,$src,$mem\t! add packed2I" %} 6538 ins_encode %{ 6539 int vector_len = 0; 6540 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6541 %} 6542 ins_pipe( pipe_slow ); 6543 %} 6544 6545 instruct vadd4I(vecX dst, vecX src) %{ 6546 predicate(n->as_Vector()->length() == 4); 6547 match(Set dst (AddVI dst src)); 6548 format %{ "paddd $dst,$src\t! add packed4I" %} 6549 ins_encode %{ 6550 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 6551 %} 6552 ins_pipe( pipe_slow ); 6553 %} 6554 6555 instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{ 6556 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6557 match(Set dst (AddVI src1 src2)); 6558 format %{ "vpaddd $dst,$src1,$src2\t! add packed4I" %} 6559 ins_encode %{ 6560 int vector_len = 0; 6561 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6562 %} 6563 ins_pipe( pipe_slow ); 6564 %} 6565 6566 instruct vadd4I_mem(vecX dst, vecX src, memory mem) %{ 6567 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6568 match(Set dst (AddVI src (LoadVector mem))); 6569 format %{ "vpaddd $dst,$src,$mem\t! add packed4I" %} 6570 ins_encode %{ 6571 int vector_len = 0; 6572 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6573 %} 6574 ins_pipe( pipe_slow ); 6575 %} 6576 6577 instruct vadd8I_reg(vecY dst, vecY src1, vecY src2) %{ 6578 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 6579 match(Set dst (AddVI src1 src2)); 6580 format %{ "vpaddd $dst,$src1,$src2\t! add packed8I" %} 6581 ins_encode %{ 6582 int vector_len = 1; 6583 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6584 %} 6585 ins_pipe( pipe_slow ); 6586 %} 6587 6588 instruct vadd8I_mem(vecY dst, vecY src, memory mem) %{ 6589 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 6590 match(Set dst (AddVI src (LoadVector mem))); 6591 format %{ "vpaddd $dst,$src,$mem\t! add packed8I" %} 6592 ins_encode %{ 6593 int vector_len = 1; 6594 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6595 %} 6596 ins_pipe( pipe_slow ); 6597 %} 6598 6599 instruct vadd16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6600 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6601 match(Set dst (AddVI src1 src2)); 6602 format %{ "vpaddd $dst,$src1,$src2\t! add packed16I" %} 6603 ins_encode %{ 6604 int vector_len = 2; 6605 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6606 %} 6607 ins_pipe( pipe_slow ); 6608 %} 6609 6610 instruct vadd16I_mem(vecZ dst, vecZ src, memory mem) %{ 6611 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6612 match(Set dst (AddVI src (LoadVector mem))); 6613 format %{ "vpaddd $dst,$src,$mem\t! add packed16I" %} 6614 ins_encode %{ 6615 int vector_len = 2; 6616 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6617 %} 6618 ins_pipe( pipe_slow ); 6619 %} 6620 6621 // Longs vector add 6622 instruct vadd2L(vecX dst, vecX src) %{ 6623 predicate(n->as_Vector()->length() == 2); 6624 match(Set dst (AddVL dst src)); 6625 format %{ "paddq $dst,$src\t! add packed2L" %} 6626 ins_encode %{ 6627 __ paddq($dst$$XMMRegister, $src$$XMMRegister); 6628 %} 6629 ins_pipe( pipe_slow ); 6630 %} 6631 6632 instruct vadd2L_reg(vecX dst, vecX src1, vecX src2) %{ 6633 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6634 match(Set dst (AddVL src1 src2)); 6635 format %{ "vpaddq $dst,$src1,$src2\t! add packed2L" %} 6636 ins_encode %{ 6637 int vector_len = 0; 6638 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6639 %} 6640 ins_pipe( pipe_slow ); 6641 %} 6642 6643 instruct vadd2L_mem(vecX dst, vecX src, memory mem) %{ 6644 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6645 match(Set dst (AddVL src (LoadVector mem))); 6646 format %{ "vpaddq $dst,$src,$mem\t! add packed2L" %} 6647 ins_encode %{ 6648 int vector_len = 0; 6649 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6650 %} 6651 ins_pipe( pipe_slow ); 6652 %} 6653 6654 instruct vadd4L_reg(vecY dst, vecY src1, vecY src2) %{ 6655 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 6656 match(Set dst (AddVL src1 src2)); 6657 format %{ "vpaddq $dst,$src1,$src2\t! add packed4L" %} 6658 ins_encode %{ 6659 int vector_len = 1; 6660 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6661 %} 6662 ins_pipe( pipe_slow ); 6663 %} 6664 6665 instruct vadd4L_mem(vecY dst, vecY src, memory mem) %{ 6666 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 6667 match(Set dst (AddVL src (LoadVector mem))); 6668 format %{ "vpaddq $dst,$src,$mem\t! add packed4L" %} 6669 ins_encode %{ 6670 int vector_len = 1; 6671 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6672 %} 6673 ins_pipe( pipe_slow ); 6674 %} 6675 6676 instruct vadd8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6677 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6678 match(Set dst (AddVL src1 src2)); 6679 format %{ "vpaddq $dst,$src1,$src2\t! add packed8L" %} 6680 ins_encode %{ 6681 int vector_len = 2; 6682 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6683 %} 6684 ins_pipe( pipe_slow ); 6685 %} 6686 6687 instruct vadd8L_mem(vecZ dst, vecZ src, memory mem) %{ 6688 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6689 match(Set dst (AddVL src (LoadVector mem))); 6690 format %{ "vpaddq $dst,$src,$mem\t! add packed8L" %} 6691 ins_encode %{ 6692 int vector_len = 2; 6693 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6694 %} 6695 ins_pipe( pipe_slow ); 6696 %} 6697 6698 // Floats vector add 6699 instruct vadd2F(vecD dst, vecD src) %{ 6700 predicate(n->as_Vector()->length() == 2); 6701 match(Set dst (AddVF dst src)); 6702 format %{ "addps $dst,$src\t! add packed2F" %} 6703 ins_encode %{ 6704 __ addps($dst$$XMMRegister, $src$$XMMRegister); 6705 %} 6706 ins_pipe( pipe_slow ); 6707 %} 6708 6709 instruct vadd2F_reg(vecD dst, vecD src1, vecD src2) %{ 6710 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6711 match(Set dst (AddVF src1 src2)); 6712 format %{ "vaddps $dst,$src1,$src2\t! add packed2F" %} 6713 ins_encode %{ 6714 int vector_len = 0; 6715 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6716 %} 6717 ins_pipe( pipe_slow ); 6718 %} 6719 6720 instruct vadd2F_mem(vecD dst, vecD src, memory mem) %{ 6721 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6722 match(Set dst (AddVF src (LoadVector mem))); 6723 format %{ "vaddps $dst,$src,$mem\t! add packed2F" %} 6724 ins_encode %{ 6725 int vector_len = 0; 6726 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6727 %} 6728 ins_pipe( pipe_slow ); 6729 %} 6730 6731 instruct vadd4F(vecX dst, vecX src) %{ 6732 predicate(n->as_Vector()->length() == 4); 6733 match(Set dst (AddVF dst src)); 6734 format %{ "addps $dst,$src\t! add packed4F" %} 6735 ins_encode %{ 6736 __ addps($dst$$XMMRegister, $src$$XMMRegister); 6737 %} 6738 ins_pipe( pipe_slow ); 6739 %} 6740 6741 instruct vadd4F_reg(vecX dst, vecX src1, vecX src2) %{ 6742 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6743 match(Set dst (AddVF src1 src2)); 6744 format %{ "vaddps $dst,$src1,$src2\t! add packed4F" %} 6745 ins_encode %{ 6746 int vector_len = 0; 6747 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6748 %} 6749 ins_pipe( pipe_slow ); 6750 %} 6751 6752 instruct vadd4F_mem(vecX dst, vecX src, memory mem) %{ 6753 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6754 match(Set dst (AddVF src (LoadVector mem))); 6755 format %{ "vaddps $dst,$src,$mem\t! add packed4F" %} 6756 ins_encode %{ 6757 int vector_len = 0; 6758 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6759 %} 6760 ins_pipe( pipe_slow ); 6761 %} 6762 6763 instruct vadd8F_reg(vecY dst, vecY src1, vecY src2) %{ 6764 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6765 match(Set dst (AddVF src1 src2)); 6766 format %{ "vaddps $dst,$src1,$src2\t! add packed8F" %} 6767 ins_encode %{ 6768 int vector_len = 1; 6769 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6770 %} 6771 ins_pipe( pipe_slow ); 6772 %} 6773 6774 instruct vadd8F_mem(vecY dst, vecY src, memory mem) %{ 6775 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6776 match(Set dst (AddVF src (LoadVector mem))); 6777 format %{ "vaddps $dst,$src,$mem\t! add packed8F" %} 6778 ins_encode %{ 6779 int vector_len = 1; 6780 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6781 %} 6782 ins_pipe( pipe_slow ); 6783 %} 6784 6785 instruct vadd16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6786 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6787 match(Set dst (AddVF src1 src2)); 6788 format %{ "vaddps $dst,$src1,$src2\t! add packed16F" %} 6789 ins_encode %{ 6790 int vector_len = 2; 6791 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6792 %} 6793 ins_pipe( pipe_slow ); 6794 %} 6795 6796 instruct vadd16F_mem(vecZ dst, vecZ src, memory mem) %{ 6797 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6798 match(Set dst (AddVF src (LoadVector mem))); 6799 format %{ "vaddps $dst,$src,$mem\t! add packed16F" %} 6800 ins_encode %{ 6801 int vector_len = 2; 6802 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6803 %} 6804 ins_pipe( pipe_slow ); 6805 %} 6806 6807 // Doubles vector add 6808 instruct vadd2D(vecX dst, vecX src) %{ 6809 predicate(n->as_Vector()->length() == 2); 6810 match(Set dst (AddVD dst src)); 6811 format %{ "addpd $dst,$src\t! add packed2D" %} 6812 ins_encode %{ 6813 __ addpd($dst$$XMMRegister, $src$$XMMRegister); 6814 %} 6815 ins_pipe( pipe_slow ); 6816 %} 6817 6818 instruct vadd2D_reg(vecX dst, vecX src1, vecX src2) %{ 6819 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6820 match(Set dst (AddVD src1 src2)); 6821 format %{ "vaddpd $dst,$src1,$src2\t! add packed2D" %} 6822 ins_encode %{ 6823 int vector_len = 0; 6824 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6825 %} 6826 ins_pipe( pipe_slow ); 6827 %} 6828 6829 instruct vadd2D_mem(vecX dst, vecX src, memory mem) %{ 6830 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6831 match(Set dst (AddVD src (LoadVector mem))); 6832 format %{ "vaddpd $dst,$src,$mem\t! add packed2D" %} 6833 ins_encode %{ 6834 int vector_len = 0; 6835 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6836 %} 6837 ins_pipe( pipe_slow ); 6838 %} 6839 6840 instruct vadd4D_reg(vecY dst, vecY src1, vecY src2) %{ 6841 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6842 match(Set dst (AddVD src1 src2)); 6843 format %{ "vaddpd $dst,$src1,$src2\t! add packed4D" %} 6844 ins_encode %{ 6845 int vector_len = 1; 6846 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6847 %} 6848 ins_pipe( pipe_slow ); 6849 %} 6850 6851 instruct vadd4D_mem(vecY dst, vecY src, memory mem) %{ 6852 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6853 match(Set dst (AddVD src (LoadVector mem))); 6854 format %{ "vaddpd $dst,$src,$mem\t! add packed4D" %} 6855 ins_encode %{ 6856 int vector_len = 1; 6857 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6858 %} 6859 ins_pipe( pipe_slow ); 6860 %} 6861 6862 instruct vadd8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6863 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6864 match(Set dst (AddVD src1 src2)); 6865 format %{ "vaddpd $dst,$src1,$src2\t! add packed8D" %} 6866 ins_encode %{ 6867 int vector_len = 2; 6868 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6869 %} 6870 ins_pipe( pipe_slow ); 6871 %} 6872 6873 instruct vadd8D_mem(vecZ dst, vecZ src, memory mem) %{ 6874 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6875 match(Set dst (AddVD src (LoadVector mem))); 6876 format %{ "vaddpd $dst,$src,$mem\t! add packed8D" %} 6877 ins_encode %{ 6878 int vector_len = 2; 6879 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6880 %} 6881 ins_pipe( pipe_slow ); 6882 %} 6883 6884 // --------------------------------- SUB -------------------------------------- 6885 6886 // Bytes vector sub 6887 instruct vsub4B(vecS dst, vecS src) %{ 6888 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 6889 match(Set dst (SubVB dst src)); 6890 format %{ "psubb $dst,$src\t! sub packed4B" %} 6891 ins_encode %{ 6892 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 6893 %} 6894 ins_pipe( pipe_slow ); 6895 %} 6896 6897 instruct vsub4B_reg_avx(vecS dst, vecS src1, vecS src2) %{ 6898 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 6899 match(Set dst (SubVB src1 src2)); 6900 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} 6901 ins_encode %{ 6902 int vector_len = 0; 6903 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6904 %} 6905 ins_pipe( pipe_slow ); 6906 %} 6907 6908 instruct vsub4B_reg_evex(vecS dst, vecS src1, vecS src2) %{ 6909 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 6910 match(Set dst (SubVB src1 src2)); 6911 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} 6912 ins_encode %{ 6913 int vector_len = 0; 6914 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6915 %} 6916 ins_pipe( pipe_slow ); 6917 %} 6918 6919 instruct vsub4B_reg_exex_special(vecS dst, vecS src1, vecS src2) %{ 6920 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 6921 match(Set dst (SubVB dst src2)); 6922 effect(TEMP src1); 6923 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} 6924 ins_encode %{ 6925 int vector_len = 0; 6926 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6927 %} 6928 ins_pipe( pipe_slow ); 6929 %} 6930 6931 instruct vsub4B_mem_avx(vecS dst, vecS src, memory mem) %{ 6932 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 6933 match(Set dst (SubVB src (LoadVector mem))); 6934 format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %} 6935 ins_encode %{ 6936 int vector_len = 0; 6937 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6938 %} 6939 ins_pipe( pipe_slow ); 6940 %} 6941 6942 instruct vsub4B_mem_evex(vecS dst, vecS src, memory mem) %{ 6943 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 6944 match(Set dst (SubVB src (LoadVector mem))); 6945 format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %} 6946 ins_encode %{ 6947 int vector_len = 0; 6948 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6949 %} 6950 ins_pipe( pipe_slow ); 6951 %} 6952 6953 instruct vsub4B_mem_evex_special(vecS dst, vecS src, memory mem) %{ 6954 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 6955 match(Set dst (SubVB dst (LoadVector mem))); 6956 effect(TEMP src); 6957 format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %} 6958 ins_encode %{ 6959 int vector_len = 0; 6960 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6961 %} 6962 ins_pipe( pipe_slow ); 6963 %} 6964 6965 instruct vsub8B(vecD dst, vecD src) %{ 6966 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 6967 match(Set dst (SubVB dst src)); 6968 format %{ "psubb $dst,$src\t! sub packed8B" %} 6969 ins_encode %{ 6970 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 6971 %} 6972 ins_pipe( pipe_slow ); 6973 %} 6974 6975 instruct vsub8B_reg_avx(vecD dst, vecD src1, vecD src2) %{ 6976 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 6977 match(Set dst (SubVB src1 src2)); 6978 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} 6979 ins_encode %{ 6980 int vector_len = 0; 6981 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6982 %} 6983 ins_pipe( pipe_slow ); 6984 %} 6985 6986 instruct vsub8B_reg_evex(vecD dst, vecD src1, vecD src2) %{ 6987 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 6988 match(Set dst (SubVB src1 src2)); 6989 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} 6990 ins_encode %{ 6991 int vector_len = 0; 6992 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6993 %} 6994 ins_pipe( pipe_slow ); 6995 %} 6996 6997 instruct vsub8B_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 6998 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 6999 match(Set dst (SubVB dst src2)); 7000 effect(TEMP src1); 7001 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} 7002 ins_encode %{ 7003 int vector_len = 0; 7004 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7005 %} 7006 ins_pipe( pipe_slow ); 7007 %} 7008 7009 instruct vsub8B_mem_avx(vecD dst, vecD src, memory mem) %{ 7010 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 7011 match(Set dst (SubVB src (LoadVector mem))); 7012 format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %} 7013 ins_encode %{ 7014 int vector_len = 0; 7015 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7016 %} 7017 ins_pipe( pipe_slow ); 7018 %} 7019 7020 instruct vsub8B_mem_evex(vecD dst, vecD src, memory mem) %{ 7021 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 7022 match(Set dst (SubVB src (LoadVector mem))); 7023 format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %} 7024 ins_encode %{ 7025 int vector_len = 0; 7026 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7027 %} 7028 ins_pipe( pipe_slow ); 7029 %} 7030 7031 instruct vsub8B_mem_evex_special(vecD dst, vecD src, memory mem) %{ 7032 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 7033 match(Set dst (SubVB dst (LoadVector mem))); 7034 effect(TEMP src); 7035 format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %} 7036 ins_encode %{ 7037 int vector_len = 0; 7038 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7039 %} 7040 ins_pipe( pipe_slow ); 7041 %} 7042 7043 instruct vsub16B(vecX dst, vecX src) %{ 7044 predicate(UseAVX == 0 && n->as_Vector()->length() == 16); 7045 match(Set dst (SubVB dst src)); 7046 format %{ "psubb $dst,$src\t! sub packed16B" %} 7047 ins_encode %{ 7048 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 7049 %} 7050 ins_pipe( pipe_slow ); 7051 %} 7052 7053 instruct vsub16B_reg_avx(vecX dst, vecX src1, vecX src2) %{ 7054 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); 7055 match(Set dst (SubVB src1 src2)); 7056 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} 7057 ins_encode %{ 7058 int vector_len = 0; 7059 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7060 %} 7061 ins_pipe( pipe_slow ); 7062 %} 7063 7064 instruct vsub16B_reg_evex(vecX dst, vecX src1, vecX src2) %{ 7065 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 7066 match(Set dst (SubVB src1 src2)); 7067 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} 7068 ins_encode %{ 7069 int vector_len = 0; 7070 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7071 %} 7072 ins_pipe( pipe_slow ); 7073 %} 7074 7075 instruct vsub16B_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 7076 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 7077 match(Set dst (SubVB dst src2)); 7078 effect(TEMP src1); 7079 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} 7080 ins_encode %{ 7081 int vector_len = 0; 7082 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7083 %} 7084 ins_pipe( pipe_slow ); 7085 %} 7086 7087 instruct vsub16B_mem_avx(vecX dst, vecX src, memory mem) %{ 7088 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); 7089 match(Set dst (SubVB src (LoadVector mem))); 7090 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} 7091 ins_encode %{ 7092 int vector_len = 0; 7093 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7094 %} 7095 ins_pipe( pipe_slow ); 7096 %} 7097 7098 instruct vsub16B_mem_evex(vecX dst, vecX src, memory mem) %{ 7099 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 7100 match(Set dst (SubVB src (LoadVector mem))); 7101 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} 7102 ins_encode %{ 7103 int vector_len = 0; 7104 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7105 %} 7106 ins_pipe( pipe_slow ); 7107 %} 7108 7109 instruct vsub16B_mem_evex_special(vecX dst, vecX src, memory mem) %{ 7110 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 7111 match(Set dst (SubVB dst (LoadVector mem))); 7112 effect(TEMP src); 7113 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} 7114 ins_encode %{ 7115 int vector_len = 0; 7116 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7117 %} 7118 ins_pipe( pipe_slow ); 7119 %} 7120 7121 instruct vsub32B_reg_avx(vecY dst, vecY src1, vecY src2) %{ 7122 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32); 7123 match(Set dst (SubVB src1 src2)); 7124 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} 7125 ins_encode %{ 7126 int vector_len = 1; 7127 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7128 %} 7129 ins_pipe( pipe_slow ); 7130 %} 7131 7132 instruct vsub32B_reg_evex(vecY dst, vecY src1, vecY src2) %{ 7133 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 7134 match(Set dst (SubVB src1 src2)); 7135 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} 7136 ins_encode %{ 7137 int vector_len = 1; 7138 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7139 %} 7140 ins_pipe( pipe_slow ); 7141 %} 7142 7143 instruct vsub32B_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 7144 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32); 7145 match(Set dst (SubVB dst src2)); 7146 effect(TEMP src1); 7147 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} 7148 ins_encode %{ 7149 int vector_len = 1; 7150 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7151 %} 7152 ins_pipe( pipe_slow ); 7153 %} 7154 7155 instruct vsub32B_mem_avx(vecY dst, vecY src, memory mem) %{ 7156 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32); 7157 match(Set dst (SubVB src (LoadVector mem))); 7158 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} 7159 ins_encode %{ 7160 int vector_len = 1; 7161 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7162 %} 7163 ins_pipe( pipe_slow ); 7164 %} 7165 7166 instruct vsub32B_mem_evex(vecY dst, vecY src, memory mem) %{ 7167 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 7168 match(Set dst (SubVB src (LoadVector mem))); 7169 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} 7170 ins_encode %{ 7171 int vector_len = 1; 7172 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7173 %} 7174 ins_pipe( pipe_slow ); 7175 %} 7176 7177 instruct vsub32B_mem_evex_special(vecY dst, vecY src, memory mem) %{ 7178 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32); 7179 match(Set dst (SubVB dst (LoadVector mem))); 7180 effect(TEMP src); 7181 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} 7182 ins_encode %{ 7183 int vector_len = 1; 7184 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7185 %} 7186 ins_pipe( pipe_slow ); 7187 %} 7188 7189 instruct vsub64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7190 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 7191 match(Set dst (SubVB src1 src2)); 7192 format %{ "vpsubb $dst,$src1,$src2\t! sub packed64B" %} 7193 ins_encode %{ 7194 int vector_len = 2; 7195 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7196 %} 7197 ins_pipe( pipe_slow ); 7198 %} 7199 7200 instruct vsub64B_mem(vecZ dst, vecZ src, memory mem) %{ 7201 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 7202 match(Set dst (SubVB src (LoadVector mem))); 7203 format %{ "vpsubb $dst,$src,$mem\t! sub packed64B" %} 7204 ins_encode %{ 7205 int vector_len = 2; 7206 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7207 %} 7208 ins_pipe( pipe_slow ); 7209 %} 7210 7211 // Shorts/Chars vector sub 7212 instruct vsub2S(vecS dst, vecS src) %{ 7213 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 7214 match(Set dst (SubVS dst src)); 7215 format %{ "psubw $dst,$src\t! sub packed2S" %} 7216 ins_encode %{ 7217 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 7218 %} 7219 ins_pipe( pipe_slow ); 7220 %} 7221 7222 instruct vsub2S_reg_avx(vecS dst, vecS src1, vecS src2) %{ 7223 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 7224 match(Set dst (SubVS src1 src2)); 7225 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} 7226 ins_encode %{ 7227 int vector_len = 0; 7228 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7229 %} 7230 ins_pipe( pipe_slow ); 7231 %} 7232 7233 instruct vsub2S_reg_evex(vecS dst, vecS src1, vecS src2) %{ 7234 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 7235 match(Set dst (SubVS src1 src2)); 7236 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} 7237 ins_encode %{ 7238 int vector_len = 0; 7239 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7240 %} 7241 ins_pipe( pipe_slow ); 7242 %} 7243 7244 instruct vsub2S_reg_evex_special(vecS dst, vecS src1, vecS src2) %{ 7245 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 7246 match(Set dst (SubVS dst src2)); 7247 effect(TEMP src1); 7248 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} 7249 ins_encode %{ 7250 int vector_len = 0; 7251 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7252 %} 7253 ins_pipe( pipe_slow ); 7254 %} 7255 7256 instruct vsub2S_mem_avx(vecS dst, vecS src, memory mem) %{ 7257 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 7258 match(Set dst (SubVS src (LoadVector mem))); 7259 format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %} 7260 ins_encode %{ 7261 int vector_len = 0; 7262 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7263 %} 7264 ins_pipe( pipe_slow ); 7265 %} 7266 7267 instruct vsub2S_mem_evex(vecS dst, vecS src, memory mem) %{ 7268 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 7269 match(Set dst (SubVS src (LoadVector mem))); 7270 format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %} 7271 ins_encode %{ 7272 int vector_len = 0; 7273 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7274 %} 7275 ins_pipe( pipe_slow ); 7276 %} 7277 7278 instruct vsub2S_mem_evex_special(vecS dst, vecS src, memory mem) %{ 7279 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 7280 match(Set dst (SubVS dst (LoadVector mem))); 7281 effect(TEMP src); 7282 format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %} 7283 ins_encode %{ 7284 int vector_len = 0; 7285 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7286 %} 7287 ins_pipe( pipe_slow ); 7288 %} 7289 7290 instruct vsub4S(vecD dst, vecD src) %{ 7291 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 7292 match(Set dst (SubVS dst src)); 7293 format %{ "psubw $dst,$src\t! sub packed4S" %} 7294 ins_encode %{ 7295 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 7296 %} 7297 ins_pipe( pipe_slow ); 7298 %} 7299 7300 instruct vsub4S_reg_avx(vecD dst, vecD src1, vecD src2) %{ 7301 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 7302 match(Set dst (SubVS src1 src2)); 7303 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} 7304 ins_encode %{ 7305 int vector_len = 0; 7306 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7307 %} 7308 ins_pipe( pipe_slow ); 7309 %} 7310 7311 instruct vsub4S_reg_evex(vecD dst, vecD src1, vecD src2) %{ 7312 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 7313 match(Set dst (SubVS src1 src2)); 7314 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} 7315 ins_encode %{ 7316 int vector_len = 0; 7317 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7318 %} 7319 ins_pipe( pipe_slow ); 7320 %} 7321 7322 instruct vsub4S_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 7323 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 7324 match(Set dst (SubVS dst src2)); 7325 effect(TEMP src1); 7326 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} 7327 ins_encode %{ 7328 int vector_len = 0; 7329 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7330 %} 7331 ins_pipe( pipe_slow ); 7332 %} 7333 7334 instruct vsub4S_mem_avx(vecD dst, vecD src, memory mem) %{ 7335 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 7336 match(Set dst (SubVS src (LoadVector mem))); 7337 format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %} 7338 ins_encode %{ 7339 int vector_len = 0; 7340 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7341 %} 7342 ins_pipe( pipe_slow ); 7343 %} 7344 7345 instruct vsub4S_mem_evex(vecD dst, vecD src, memory mem) %{ 7346 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 7347 match(Set dst (SubVS src (LoadVector mem))); 7348 format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %} 7349 ins_encode %{ 7350 int vector_len = 0; 7351 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7352 %} 7353 ins_pipe( pipe_slow ); 7354 %} 7355 7356 instruct vsub4S_mem_evex_special(vecD dst, vecD src, memory mem) %{ 7357 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 7358 match(Set dst (SubVS dst (LoadVector mem))); 7359 effect(TEMP src); 7360 format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %} 7361 ins_encode %{ 7362 int vector_len = 0; 7363 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7364 %} 7365 ins_pipe( pipe_slow ); 7366 %} 7367 7368 instruct vsub8S(vecX dst, vecX src) %{ 7369 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 7370 match(Set dst (SubVS dst src)); 7371 format %{ "psubw $dst,$src\t! sub packed8S" %} 7372 ins_encode %{ 7373 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 7374 %} 7375 ins_pipe( pipe_slow ); 7376 %} 7377 7378 instruct vsub8S_reg_avx(vecX dst, vecX src1, vecX src2) %{ 7379 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 7380 match(Set dst (SubVS src1 src2)); 7381 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} 7382 ins_encode %{ 7383 int vector_len = 0; 7384 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7385 %} 7386 ins_pipe( pipe_slow ); 7387 %} 7388 7389 instruct vsub8S_reg_evex(vecX dst, vecX src1, vecX src2) %{ 7390 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 7391 match(Set dst (SubVS src1 src2)); 7392 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} 7393 ins_encode %{ 7394 int vector_len = 0; 7395 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7396 %} 7397 ins_pipe( pipe_slow ); 7398 %} 7399 7400 instruct vsub8S_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 7401 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 7402 match(Set dst (SubVS dst src2)); 7403 effect(TEMP src1); 7404 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} 7405 ins_encode %{ 7406 int vector_len = 0; 7407 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7408 %} 7409 ins_pipe( pipe_slow ); 7410 %} 7411 7412 instruct vsub8S_mem_avx(vecX dst, vecX src, memory mem) %{ 7413 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 7414 match(Set dst (SubVS src (LoadVector mem))); 7415 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} 7416 ins_encode %{ 7417 int vector_len = 0; 7418 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7419 %} 7420 ins_pipe( pipe_slow ); 7421 %} 7422 7423 instruct vsub8S_mem_evex(vecX dst, vecX src, memory mem) %{ 7424 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 7425 match(Set dst (SubVS src (LoadVector mem))); 7426 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} 7427 ins_encode %{ 7428 int vector_len = 0; 7429 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7430 %} 7431 ins_pipe( pipe_slow ); 7432 %} 7433 7434 instruct vsub8S_mem_evex_special(vecX dst, vecX src, memory mem) %{ 7435 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 7436 match(Set dst (SubVS dst (LoadVector mem))); 7437 effect(TEMP src); 7438 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} 7439 ins_encode %{ 7440 int vector_len = 0; 7441 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7442 %} 7443 ins_pipe( pipe_slow ); 7444 %} 7445 7446 instruct vsub16S_reg_avx(vecY dst, vecY src1, vecY src2) %{ 7447 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 7448 match(Set dst (SubVS src1 src2)); 7449 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} 7450 ins_encode %{ 7451 int vector_len = 1; 7452 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7453 %} 7454 ins_pipe( pipe_slow ); 7455 %} 7456 7457 instruct vsub16S_reg_evex(vecY dst, vecY src1, vecY src2) %{ 7458 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 7459 match(Set dst (SubVS src1 src2)); 7460 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} 7461 ins_encode %{ 7462 int vector_len = 1; 7463 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7464 %} 7465 ins_pipe( pipe_slow ); 7466 %} 7467 7468 instruct vsub16S_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 7469 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 7470 match(Set dst (SubVS dst src2)); 7471 effect(TEMP src1); 7472 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} 7473 ins_encode %{ 7474 int vector_len = 1; 7475 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7476 %} 7477 ins_pipe( pipe_slow ); 7478 %} 7479 7480 instruct vsub16S_mem_avx(vecY dst, vecY src, memory mem) %{ 7481 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 7482 match(Set dst (SubVS src (LoadVector mem))); 7483 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} 7484 ins_encode %{ 7485 int vector_len = 1; 7486 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7487 %} 7488 ins_pipe( pipe_slow ); 7489 %} 7490 7491 instruct vsub16S_mem_evex(vecY dst, vecY src, memory mem) %{ 7492 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 7493 match(Set dst (SubVS src (LoadVector mem))); 7494 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} 7495 ins_encode %{ 7496 int vector_len = 1; 7497 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7498 %} 7499 ins_pipe( pipe_slow ); 7500 %} 7501 7502 instruct vsub16S_mem_evex_special(vecY dst, vecY src, memory mem) %{ 7503 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 7504 match(Set dst (SubVS dst (LoadVector mem))); 7505 effect(TEMP src); 7506 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} 7507 ins_encode %{ 7508 int vector_len = 1; 7509 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7510 %} 7511 ins_pipe( pipe_slow ); 7512 %} 7513 7514 instruct vsub32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7515 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 7516 match(Set dst (SubVS src1 src2)); 7517 format %{ "vpsubw $dst,$src1,$src2\t! sub packed32S" %} 7518 ins_encode %{ 7519 int vector_len = 2; 7520 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7521 %} 7522 ins_pipe( pipe_slow ); 7523 %} 7524 7525 instruct vsub32S_mem(vecZ dst, vecZ src, memory mem) %{ 7526 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 7527 match(Set dst (SubVS src (LoadVector mem))); 7528 format %{ "vpsubw $dst,$src,$mem\t! sub packed32S" %} 7529 ins_encode %{ 7530 int vector_len = 2; 7531 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7532 %} 7533 ins_pipe( pipe_slow ); 7534 %} 7535 7536 // Integers vector sub 7537 instruct vsub2I(vecD dst, vecD src) %{ 7538 predicate(n->as_Vector()->length() == 2); 7539 match(Set dst (SubVI dst src)); 7540 format %{ "psubd $dst,$src\t! sub packed2I" %} 7541 ins_encode %{ 7542 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 7543 %} 7544 ins_pipe( pipe_slow ); 7545 %} 7546 7547 instruct vsub2I_reg(vecD dst, vecD src1, vecD src2) %{ 7548 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7549 match(Set dst (SubVI src1 src2)); 7550 format %{ "vpsubd $dst,$src1,$src2\t! sub packed2I" %} 7551 ins_encode %{ 7552 int vector_len = 0; 7553 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7554 %} 7555 ins_pipe( pipe_slow ); 7556 %} 7557 7558 instruct vsub2I_mem(vecD dst, vecD src, memory mem) %{ 7559 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7560 match(Set dst (SubVI src (LoadVector mem))); 7561 format %{ "vpsubd $dst,$src,$mem\t! sub packed2I" %} 7562 ins_encode %{ 7563 int vector_len = 0; 7564 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7565 %} 7566 ins_pipe( pipe_slow ); 7567 %} 7568 7569 instruct vsub4I(vecX dst, vecX src) %{ 7570 predicate(n->as_Vector()->length() == 4); 7571 match(Set dst (SubVI dst src)); 7572 format %{ "psubd $dst,$src\t! sub packed4I" %} 7573 ins_encode %{ 7574 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 7575 %} 7576 ins_pipe( pipe_slow ); 7577 %} 7578 7579 instruct vsub4I_reg(vecX dst, vecX src1, vecX src2) %{ 7580 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7581 match(Set dst (SubVI src1 src2)); 7582 format %{ "vpsubd $dst,$src1,$src2\t! sub packed4I" %} 7583 ins_encode %{ 7584 int vector_len = 0; 7585 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7586 %} 7587 ins_pipe( pipe_slow ); 7588 %} 7589 7590 instruct vsub4I_mem(vecX dst, vecX src, memory mem) %{ 7591 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7592 match(Set dst (SubVI src (LoadVector mem))); 7593 format %{ "vpsubd $dst,$src,$mem\t! sub packed4I" %} 7594 ins_encode %{ 7595 int vector_len = 0; 7596 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7597 %} 7598 ins_pipe( pipe_slow ); 7599 %} 7600 7601 instruct vsub8I_reg(vecY dst, vecY src1, vecY src2) %{ 7602 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 7603 match(Set dst (SubVI src1 src2)); 7604 format %{ "vpsubd $dst,$src1,$src2\t! sub packed8I" %} 7605 ins_encode %{ 7606 int vector_len = 1; 7607 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7608 %} 7609 ins_pipe( pipe_slow ); 7610 %} 7611 7612 instruct vsub8I_mem(vecY dst, vecY src, memory mem) %{ 7613 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 7614 match(Set dst (SubVI src (LoadVector mem))); 7615 format %{ "vpsubd $dst,$src,$mem\t! sub packed8I" %} 7616 ins_encode %{ 7617 int vector_len = 1; 7618 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7619 %} 7620 ins_pipe( pipe_slow ); 7621 %} 7622 7623 instruct vsub16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7624 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7625 match(Set dst (SubVI src1 src2)); 7626 format %{ "vpsubd $dst,$src1,$src2\t! sub packed16I" %} 7627 ins_encode %{ 7628 int vector_len = 2; 7629 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7630 %} 7631 ins_pipe( pipe_slow ); 7632 %} 7633 7634 instruct vsub16I_mem(vecZ dst, vecZ src, memory mem) %{ 7635 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7636 match(Set dst (SubVI src (LoadVector mem))); 7637 format %{ "vpsubd $dst,$src,$mem\t! sub packed16I" %} 7638 ins_encode %{ 7639 int vector_len = 2; 7640 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7641 %} 7642 ins_pipe( pipe_slow ); 7643 %} 7644 7645 // Longs vector sub 7646 instruct vsub2L(vecX dst, vecX src) %{ 7647 predicate(n->as_Vector()->length() == 2); 7648 match(Set dst (SubVL dst src)); 7649 format %{ "psubq $dst,$src\t! sub packed2L" %} 7650 ins_encode %{ 7651 __ psubq($dst$$XMMRegister, $src$$XMMRegister); 7652 %} 7653 ins_pipe( pipe_slow ); 7654 %} 7655 7656 instruct vsub2L_reg(vecX dst, vecX src1, vecX src2) %{ 7657 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7658 match(Set dst (SubVL src1 src2)); 7659 format %{ "vpsubq $dst,$src1,$src2\t! sub packed2L" %} 7660 ins_encode %{ 7661 int vector_len = 0; 7662 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7663 %} 7664 ins_pipe( pipe_slow ); 7665 %} 7666 7667 instruct vsub2L_mem(vecX dst, vecX src, memory mem) %{ 7668 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7669 match(Set dst (SubVL src (LoadVector mem))); 7670 format %{ "vpsubq $dst,$src,$mem\t! sub packed2L" %} 7671 ins_encode %{ 7672 int vector_len = 0; 7673 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7674 %} 7675 ins_pipe( pipe_slow ); 7676 %} 7677 7678 instruct vsub4L_reg(vecY dst, vecY src1, vecY src2) %{ 7679 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 7680 match(Set dst (SubVL src1 src2)); 7681 format %{ "vpsubq $dst,$src1,$src2\t! sub packed4L" %} 7682 ins_encode %{ 7683 int vector_len = 1; 7684 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7685 %} 7686 ins_pipe( pipe_slow ); 7687 %} 7688 7689 instruct vsub4L_mem(vecY dst, vecY src, memory mem) %{ 7690 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 7691 match(Set dst (SubVL src (LoadVector mem))); 7692 format %{ "vpsubq $dst,$src,$mem\t! sub packed4L" %} 7693 ins_encode %{ 7694 int vector_len = 1; 7695 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7696 %} 7697 ins_pipe( pipe_slow ); 7698 %} 7699 7700 instruct vsub8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7701 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7702 match(Set dst (SubVL src1 src2)); 7703 format %{ "vpsubq $dst,$src1,$src2\t! sub packed8L" %} 7704 ins_encode %{ 7705 int vector_len = 2; 7706 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7707 %} 7708 ins_pipe( pipe_slow ); 7709 %} 7710 7711 instruct vsub8L_mem(vecZ dst, vecZ src, memory mem) %{ 7712 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7713 match(Set dst (SubVL src (LoadVector mem))); 7714 format %{ "vpsubq $dst,$src,$mem\t! sub packed8L" %} 7715 ins_encode %{ 7716 int vector_len = 2; 7717 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7718 %} 7719 ins_pipe( pipe_slow ); 7720 %} 7721 7722 // Floats vector sub 7723 instruct vsub2F(vecD dst, vecD src) %{ 7724 predicate(n->as_Vector()->length() == 2); 7725 match(Set dst (SubVF dst src)); 7726 format %{ "subps $dst,$src\t! sub packed2F" %} 7727 ins_encode %{ 7728 __ subps($dst$$XMMRegister, $src$$XMMRegister); 7729 %} 7730 ins_pipe( pipe_slow ); 7731 %} 7732 7733 instruct vsub2F_reg(vecD dst, vecD src1, vecD src2) %{ 7734 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7735 match(Set dst (SubVF src1 src2)); 7736 format %{ "vsubps $dst,$src1,$src2\t! sub packed2F" %} 7737 ins_encode %{ 7738 int vector_len = 0; 7739 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7740 %} 7741 ins_pipe( pipe_slow ); 7742 %} 7743 7744 instruct vsub2F_mem(vecD dst, vecD src, memory mem) %{ 7745 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7746 match(Set dst (SubVF src (LoadVector mem))); 7747 format %{ "vsubps $dst,$src,$mem\t! sub packed2F" %} 7748 ins_encode %{ 7749 int vector_len = 0; 7750 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7751 %} 7752 ins_pipe( pipe_slow ); 7753 %} 7754 7755 instruct vsub4F(vecX dst, vecX src) %{ 7756 predicate(n->as_Vector()->length() == 4); 7757 match(Set dst (SubVF dst src)); 7758 format %{ "subps $dst,$src\t! sub packed4F" %} 7759 ins_encode %{ 7760 __ subps($dst$$XMMRegister, $src$$XMMRegister); 7761 %} 7762 ins_pipe( pipe_slow ); 7763 %} 7764 7765 instruct vsub4F_reg(vecX dst, vecX src1, vecX src2) %{ 7766 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7767 match(Set dst (SubVF src1 src2)); 7768 format %{ "vsubps $dst,$src1,$src2\t! sub packed4F" %} 7769 ins_encode %{ 7770 int vector_len = 0; 7771 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7772 %} 7773 ins_pipe( pipe_slow ); 7774 %} 7775 7776 instruct vsub4F_mem(vecX dst, vecX src, memory mem) %{ 7777 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7778 match(Set dst (SubVF src (LoadVector mem))); 7779 format %{ "vsubps $dst,$src,$mem\t! sub packed4F" %} 7780 ins_encode %{ 7781 int vector_len = 0; 7782 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7783 %} 7784 ins_pipe( pipe_slow ); 7785 %} 7786 7787 instruct vsub8F_reg(vecY dst, vecY src1, vecY src2) %{ 7788 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7789 match(Set dst (SubVF src1 src2)); 7790 format %{ "vsubps $dst,$src1,$src2\t! sub packed8F" %} 7791 ins_encode %{ 7792 int vector_len = 1; 7793 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7794 %} 7795 ins_pipe( pipe_slow ); 7796 %} 7797 7798 instruct vsub8F_mem(vecY dst, vecY src, memory mem) %{ 7799 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7800 match(Set dst (SubVF src (LoadVector mem))); 7801 format %{ "vsubps $dst,$src,$mem\t! sub packed8F" %} 7802 ins_encode %{ 7803 int vector_len = 1; 7804 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7805 %} 7806 ins_pipe( pipe_slow ); 7807 %} 7808 7809 instruct vsub16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7810 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7811 match(Set dst (SubVF src1 src2)); 7812 format %{ "vsubps $dst,$src1,$src2\t! sub packed16F" %} 7813 ins_encode %{ 7814 int vector_len = 2; 7815 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7816 %} 7817 ins_pipe( pipe_slow ); 7818 %} 7819 7820 instruct vsub16F_mem(vecZ dst, vecZ src, memory mem) %{ 7821 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7822 match(Set dst (SubVF src (LoadVector mem))); 7823 format %{ "vsubps $dst,$src,$mem\t! sub packed16F" %} 7824 ins_encode %{ 7825 int vector_len = 2; 7826 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7827 %} 7828 ins_pipe( pipe_slow ); 7829 %} 7830 7831 // Doubles vector sub 7832 instruct vsub2D(vecX dst, vecX src) %{ 7833 predicate(n->as_Vector()->length() == 2); 7834 match(Set dst (SubVD dst src)); 7835 format %{ "subpd $dst,$src\t! sub packed2D" %} 7836 ins_encode %{ 7837 __ subpd($dst$$XMMRegister, $src$$XMMRegister); 7838 %} 7839 ins_pipe( pipe_slow ); 7840 %} 7841 7842 instruct vsub2D_reg(vecX dst, vecX src1, vecX src2) %{ 7843 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7844 match(Set dst (SubVD src1 src2)); 7845 format %{ "vsubpd $dst,$src1,$src2\t! sub packed2D" %} 7846 ins_encode %{ 7847 int vector_len = 0; 7848 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7849 %} 7850 ins_pipe( pipe_slow ); 7851 %} 7852 7853 instruct vsub2D_mem(vecX dst, vecX src, memory mem) %{ 7854 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7855 match(Set dst (SubVD src (LoadVector mem))); 7856 format %{ "vsubpd $dst,$src,$mem\t! sub packed2D" %} 7857 ins_encode %{ 7858 int vector_len = 0; 7859 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7860 %} 7861 ins_pipe( pipe_slow ); 7862 %} 7863 7864 instruct vsub4D_reg(vecY dst, vecY src1, vecY src2) %{ 7865 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7866 match(Set dst (SubVD src1 src2)); 7867 format %{ "vsubpd $dst,$src1,$src2\t! sub packed4D" %} 7868 ins_encode %{ 7869 int vector_len = 1; 7870 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7871 %} 7872 ins_pipe( pipe_slow ); 7873 %} 7874 7875 instruct vsub4D_mem(vecY dst, vecY src, memory mem) %{ 7876 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7877 match(Set dst (SubVD src (LoadVector mem))); 7878 format %{ "vsubpd $dst,$src,$mem\t! sub packed4D" %} 7879 ins_encode %{ 7880 int vector_len = 1; 7881 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7882 %} 7883 ins_pipe( pipe_slow ); 7884 %} 7885 7886 instruct vsub8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7887 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7888 match(Set dst (SubVD src1 src2)); 7889 format %{ "vsubpd $dst,$src1,$src2\t! sub packed8D" %} 7890 ins_encode %{ 7891 int vector_len = 2; 7892 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7893 %} 7894 ins_pipe( pipe_slow ); 7895 %} 7896 7897 instruct vsub8D_mem(vecZ dst, vecZ src, memory mem) %{ 7898 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7899 match(Set dst (SubVD src (LoadVector mem))); 7900 format %{ "vsubpd $dst,$src,$mem\t! sub packed8D" %} 7901 ins_encode %{ 7902 int vector_len = 2; 7903 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7904 %} 7905 ins_pipe( pipe_slow ); 7906 %} 7907 7908 // --------------------------------- MUL -------------------------------------- 7909 7910 // Shorts/Chars vector mul 7911 instruct vmul2S(vecS dst, vecS src) %{ 7912 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 7913 match(Set dst (MulVS dst src)); 7914 format %{ "pmullw $dst,$src\t! mul packed2S" %} 7915 ins_encode %{ 7916 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 7917 %} 7918 ins_pipe( pipe_slow ); 7919 %} 7920 7921 instruct vmul2S_reg_avx(vecS dst, vecS src1, vecS src2) %{ 7922 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 7923 match(Set dst (MulVS src1 src2)); 7924 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} 7925 ins_encode %{ 7926 int vector_len = 0; 7927 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7928 %} 7929 ins_pipe( pipe_slow ); 7930 %} 7931 7932 instruct vmul2S_reg_evex(vecS dst, vecS src1, vecS src2) %{ 7933 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 7934 match(Set dst (MulVS src1 src2)); 7935 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} 7936 ins_encode %{ 7937 int vector_len = 0; 7938 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7939 %} 7940 ins_pipe( pipe_slow ); 7941 %} 7942 7943 instruct vmul2S_evex_special(vecS dst, vecS src1, vecS src2) %{ 7944 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 7945 match(Set dst (MulVS dst src2)); 7946 effect(TEMP src1); 7947 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} 7948 ins_encode %{ 7949 int vector_len = 0; 7950 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7951 %} 7952 ins_pipe( pipe_slow ); 7953 %} 7954 7955 instruct vmul2S_mem_avx(vecS dst, vecS src, memory mem) %{ 7956 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 7957 match(Set dst (MulVS src (LoadVector mem))); 7958 format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %} 7959 ins_encode %{ 7960 int vector_len = 0; 7961 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7962 %} 7963 ins_pipe( pipe_slow ); 7964 %} 7965 7966 instruct vmul2S_mem_evex(vecS dst, vecS src, memory mem) %{ 7967 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 7968 match(Set dst (MulVS src (LoadVector mem))); 7969 format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %} 7970 ins_encode %{ 7971 int vector_len = 0; 7972 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7973 %} 7974 ins_pipe( pipe_slow ); 7975 %} 7976 7977 instruct vmul2S_mem_evex_special(vecS dst, vecS src, memory mem) %{ 7978 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 7979 match(Set dst (MulVS dst (LoadVector mem))); 7980 effect(TEMP src); 7981 format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %} 7982 ins_encode %{ 7983 int vector_len = 0; 7984 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7985 %} 7986 ins_pipe( pipe_slow ); 7987 %} 7988 7989 instruct vmul4S(vecD dst, vecD src) %{ 7990 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 7991 match(Set dst (MulVS dst src)); 7992 format %{ "pmullw $dst,$src\t! mul packed4S" %} 7993 ins_encode %{ 7994 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 7995 %} 7996 ins_pipe( pipe_slow ); 7997 %} 7998 7999 instruct vmul4S_reg_avx(vecD dst, vecD src1, vecD src2) %{ 8000 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 8001 match(Set dst (MulVS src1 src2)); 8002 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} 8003 ins_encode %{ 8004 int vector_len = 0; 8005 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8006 %} 8007 ins_pipe( pipe_slow ); 8008 %} 8009 8010 instruct vmul4S_reg_evex(vecD dst, vecD src1, vecD src2) %{ 8011 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 8012 match(Set dst (MulVS src1 src2)); 8013 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} 8014 ins_encode %{ 8015 int vector_len = 0; 8016 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8017 %} 8018 ins_pipe( pipe_slow ); 8019 %} 8020 8021 instruct vmul4S_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 8022 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 8023 match(Set dst (MulVS dst src2)); 8024 effect(TEMP src1); 8025 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} 8026 ins_encode %{ 8027 int vector_len = 0; 8028 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8029 %} 8030 ins_pipe( pipe_slow ); 8031 %} 8032 8033 instruct vmul4S_mem_avx(vecD dst, vecD src, memory mem) %{ 8034 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 8035 match(Set dst (MulVS src (LoadVector mem))); 8036 format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %} 8037 ins_encode %{ 8038 int vector_len = 0; 8039 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8040 %} 8041 ins_pipe( pipe_slow ); 8042 %} 8043 8044 instruct vmul4S_mem_evex(vecD dst, vecD src, memory mem) %{ 8045 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 8046 match(Set dst (MulVS src (LoadVector mem))); 8047 format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %} 8048 ins_encode %{ 8049 int vector_len = 0; 8050 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8051 %} 8052 ins_pipe( pipe_slow ); 8053 %} 8054 8055 instruct vmul4S_mem_evex_special(vecD dst, vecD src, memory mem) %{ 8056 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 8057 match(Set dst (MulVS dst (LoadVector mem))); 8058 effect(TEMP src); 8059 format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %} 8060 ins_encode %{ 8061 int vector_len = 0; 8062 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8063 %} 8064 ins_pipe( pipe_slow ); 8065 %} 8066 8067 instruct vmul8S(vecX dst, vecX src) %{ 8068 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 8069 match(Set dst (MulVS dst src)); 8070 format %{ "pmullw $dst,$src\t! mul packed8S" %} 8071 ins_encode %{ 8072 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 8073 %} 8074 ins_pipe( pipe_slow ); 8075 %} 8076 8077 instruct vmul8S_reg_avx(vecX dst, vecX src1, vecX src2) %{ 8078 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 8079 match(Set dst (MulVS src1 src2)); 8080 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} 8081 ins_encode %{ 8082 int vector_len = 0; 8083 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8084 %} 8085 ins_pipe( pipe_slow ); 8086 %} 8087 8088 instruct vmul8S_reg_evex(vecX dst, vecX src1, vecX src2) %{ 8089 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 8090 match(Set dst (MulVS src1 src2)); 8091 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} 8092 ins_encode %{ 8093 int vector_len = 0; 8094 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8095 %} 8096 ins_pipe( pipe_slow ); 8097 %} 8098 8099 instruct vmul8S_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 8100 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 8101 match(Set dst (MulVS dst src2)); 8102 effect(TEMP src1); 8103 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} 8104 ins_encode %{ 8105 int vector_len = 0; 8106 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8107 %} 8108 ins_pipe( pipe_slow ); 8109 %} 8110 8111 instruct vmul8S_mem_avx(vecX dst, vecX src, memory mem) %{ 8112 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 8113 match(Set dst (MulVS src (LoadVector mem))); 8114 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} 8115 ins_encode %{ 8116 int vector_len = 0; 8117 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8118 %} 8119 ins_pipe( pipe_slow ); 8120 %} 8121 8122 instruct vmul8S_mem_evex(vecX dst, vecX src, memory mem) %{ 8123 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 8124 match(Set dst (MulVS src (LoadVector mem))); 8125 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} 8126 ins_encode %{ 8127 int vector_len = 0; 8128 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8129 %} 8130 ins_pipe( pipe_slow ); 8131 %} 8132 8133 instruct vmul8S_mem_evex_special(vecX dst, vecX src, memory mem) %{ 8134 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 8135 match(Set dst (MulVS dst (LoadVector mem))); 8136 effect(TEMP src); 8137 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} 8138 ins_encode %{ 8139 int vector_len = 0; 8140 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8141 %} 8142 ins_pipe( pipe_slow ); 8143 %} 8144 8145 instruct vmul16S_reg_avx(vecY dst, vecY src1, vecY src2) %{ 8146 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 8147 match(Set dst (MulVS src1 src2)); 8148 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} 8149 ins_encode %{ 8150 int vector_len = 1; 8151 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8152 %} 8153 ins_pipe( pipe_slow ); 8154 %} 8155 8156 instruct vmul16S_reg_evex(vecY dst, vecY src1, vecY src2) %{ 8157 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 8158 match(Set dst (MulVS src1 src2)); 8159 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} 8160 ins_encode %{ 8161 int vector_len = 1; 8162 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8163 %} 8164 ins_pipe( pipe_slow ); 8165 %} 8166 8167 instruct vmul16S_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 8168 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 8169 match(Set dst (MulVS dst src2)); 8170 effect(TEMP src1); 8171 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} 8172 ins_encode %{ 8173 int vector_len = 1; 8174 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8175 %} 8176 ins_pipe( pipe_slow ); 8177 %} 8178 8179 instruct vmul16S_mem_avx(vecY dst, vecY src, memory mem) %{ 8180 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 8181 match(Set dst (MulVS src (LoadVector mem))); 8182 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} 8183 ins_encode %{ 8184 int vector_len = 1; 8185 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8186 %} 8187 ins_pipe( pipe_slow ); 8188 %} 8189 8190 instruct vmul16S_mem_evex(vecY dst, vecY src, memory mem) %{ 8191 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 8192 match(Set dst (MulVS src (LoadVector mem))); 8193 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} 8194 ins_encode %{ 8195 int vector_len = 1; 8196 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8197 %} 8198 ins_pipe( pipe_slow ); 8199 %} 8200 8201 instruct vmul16S_mem_evex_special(vecY dst, vecY src, memory mem) %{ 8202 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 8203 match(Set dst (MulVS dst (LoadVector mem))); 8204 effect(TEMP src); 8205 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} 8206 ins_encode %{ 8207 int vector_len = 1; 8208 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8209 %} 8210 ins_pipe( pipe_slow ); 8211 %} 8212 8213 instruct vmul32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8214 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 8215 match(Set dst (MulVS src1 src2)); 8216 format %{ "vpmullw $dst,$src1,$src2\t! mul packed32S" %} 8217 ins_encode %{ 8218 int vector_len = 2; 8219 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8220 %} 8221 ins_pipe( pipe_slow ); 8222 %} 8223 8224 instruct vmul32S_mem(vecZ dst, vecZ src, memory mem) %{ 8225 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 8226 match(Set dst (MulVS src (LoadVector mem))); 8227 format %{ "vpmullw $dst,$src,$mem\t! mul packed32S" %} 8228 ins_encode %{ 8229 int vector_len = 2; 8230 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8231 %} 8232 ins_pipe( pipe_slow ); 8233 %} 8234 8235 // Integers vector mul (sse4_1) 8236 instruct vmul2I(vecD dst, vecD src) %{ 8237 predicate(UseSSE > 3 && n->as_Vector()->length() == 2); 8238 match(Set dst (MulVI dst src)); 8239 format %{ "pmulld $dst,$src\t! mul packed2I" %} 8240 ins_encode %{ 8241 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 8242 %} 8243 ins_pipe( pipe_slow ); 8244 %} 8245 8246 instruct vmul2I_reg(vecD dst, vecD src1, vecD src2) %{ 8247 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8248 match(Set dst (MulVI src1 src2)); 8249 format %{ "vpmulld $dst,$src1,$src2\t! mul packed2I" %} 8250 ins_encode %{ 8251 int vector_len = 0; 8252 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8253 %} 8254 ins_pipe( pipe_slow ); 8255 %} 8256 8257 instruct vmul2I_mem(vecD dst, vecD src, memory mem) %{ 8258 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8259 match(Set dst (MulVI src (LoadVector mem))); 8260 format %{ "vpmulld $dst,$src,$mem\t! mul packed2I" %} 8261 ins_encode %{ 8262 int vector_len = 0; 8263 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8264 %} 8265 ins_pipe( pipe_slow ); 8266 %} 8267 8268 instruct vmul4I(vecX dst, vecX src) %{ 8269 predicate(UseSSE > 3 && n->as_Vector()->length() == 4); 8270 match(Set dst (MulVI dst src)); 8271 format %{ "pmulld $dst,$src\t! mul packed4I" %} 8272 ins_encode %{ 8273 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 8274 %} 8275 ins_pipe( pipe_slow ); 8276 %} 8277 8278 instruct vmul4I_reg(vecX dst, vecX src1, vecX src2) %{ 8279 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8280 match(Set dst (MulVI src1 src2)); 8281 format %{ "vpmulld $dst,$src1,$src2\t! mul packed4I" %} 8282 ins_encode %{ 8283 int vector_len = 0; 8284 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8285 %} 8286 ins_pipe( pipe_slow ); 8287 %} 8288 8289 instruct vmul4I_mem(vecX dst, vecX src, memory mem) %{ 8290 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8291 match(Set dst (MulVI src (LoadVector mem))); 8292 format %{ "vpmulld $dst,$src,$mem\t! mul packed4I" %} 8293 ins_encode %{ 8294 int vector_len = 0; 8295 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8296 %} 8297 ins_pipe( pipe_slow ); 8298 %} 8299 8300 instruct vmul2L_reg(vecX dst, vecX src1, vecX src2) %{ 8301 predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && VM_Version::supports_avx512dq()); 8302 match(Set dst (MulVL src1 src2)); 8303 format %{ "vpmullq $dst,$src1,$src2\t! mul packed2L" %} 8304 ins_encode %{ 8305 int vector_len = 0; 8306 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8307 %} 8308 ins_pipe( pipe_slow ); 8309 %} 8310 8311 instruct vmul2L_mem(vecX dst, vecX src, memory mem) %{ 8312 predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && VM_Version::supports_avx512dq()); 8313 match(Set dst (MulVL src (LoadVector mem))); 8314 format %{ "vpmullq $dst,$src,$mem\t! mul packed2L" %} 8315 ins_encode %{ 8316 int vector_len = 0; 8317 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8318 %} 8319 ins_pipe( pipe_slow ); 8320 %} 8321 8322 instruct vmul4L_reg(vecY dst, vecY src1, vecY src2) %{ 8323 predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && VM_Version::supports_avx512dq()); 8324 match(Set dst (MulVL src1 src2)); 8325 format %{ "vpmullq $dst,$src1,$src2\t! mul packed4L" %} 8326 ins_encode %{ 8327 int vector_len = 1; 8328 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8329 %} 8330 ins_pipe( pipe_slow ); 8331 %} 8332 8333 instruct vmul4L_mem(vecY dst, vecY src, memory mem) %{ 8334 predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && VM_Version::supports_avx512dq()); 8335 match(Set dst (MulVL src (LoadVector mem))); 8336 format %{ "vpmullq $dst,$src,$mem\t! mul packed4L" %} 8337 ins_encode %{ 8338 int vector_len = 1; 8339 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8340 %} 8341 ins_pipe( pipe_slow ); 8342 %} 8343 8344 instruct vmul8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8345 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq()); 8346 match(Set dst (MulVL src1 src2)); 8347 format %{ "vpmullq $dst,$src1,$src2\t! mul packed8L" %} 8348 ins_encode %{ 8349 int vector_len = 2; 8350 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8351 %} 8352 ins_pipe( pipe_slow ); 8353 %} 8354 8355 instruct vmul8L_mem(vecZ dst, vecZ src, memory mem) %{ 8356 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq()); 8357 match(Set dst (MulVL src (LoadVector mem))); 8358 format %{ "vpmullq $dst,$src,$mem\t! mul packed8L" %} 8359 ins_encode %{ 8360 int vector_len = 2; 8361 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8362 %} 8363 ins_pipe( pipe_slow ); 8364 %} 8365 8366 instruct vmul8I_reg(vecY dst, vecY src1, vecY src2) %{ 8367 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 8368 match(Set dst (MulVI src1 src2)); 8369 format %{ "vpmulld $dst,$src1,$src2\t! mul packed8I" %} 8370 ins_encode %{ 8371 int vector_len = 1; 8372 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8373 %} 8374 ins_pipe( pipe_slow ); 8375 %} 8376 8377 instruct vmul8I_mem(vecY dst, vecY src, memory mem) %{ 8378 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 8379 match(Set dst (MulVI src (LoadVector mem))); 8380 format %{ "vpmulld $dst,$src,$mem\t! mul packed8I" %} 8381 ins_encode %{ 8382 int vector_len = 1; 8383 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8384 %} 8385 ins_pipe( pipe_slow ); 8386 %} 8387 8388 instruct vmul16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8389 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8390 match(Set dst (MulVI src1 src2)); 8391 format %{ "vpmulld $dst,$src1,$src2\t! mul packed16I" %} 8392 ins_encode %{ 8393 int vector_len = 2; 8394 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8395 %} 8396 ins_pipe( pipe_slow ); 8397 %} 8398 8399 instruct vmul16I_mem(vecZ dst, vecZ src, memory mem) %{ 8400 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8401 match(Set dst (MulVI src (LoadVector mem))); 8402 format %{ "vpmulld $dst,$src,$mem\t! mul packed16I" %} 8403 ins_encode %{ 8404 int vector_len = 2; 8405 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8406 %} 8407 ins_pipe( pipe_slow ); 8408 %} 8409 8410 // Floats vector mul 8411 instruct vmul2F(vecD dst, vecD src) %{ 8412 predicate(n->as_Vector()->length() == 2); 8413 match(Set dst (MulVF dst src)); 8414 format %{ "mulps $dst,$src\t! mul packed2F" %} 8415 ins_encode %{ 8416 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 8417 %} 8418 ins_pipe( pipe_slow ); 8419 %} 8420 8421 instruct vmul2F_reg(vecD dst, vecD src1, vecD src2) %{ 8422 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8423 match(Set dst (MulVF src1 src2)); 8424 format %{ "vmulps $dst,$src1,$src2\t! mul packed2F" %} 8425 ins_encode %{ 8426 int vector_len = 0; 8427 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8428 %} 8429 ins_pipe( pipe_slow ); 8430 %} 8431 8432 instruct vmul2F_mem(vecD dst, vecD src, memory mem) %{ 8433 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8434 match(Set dst (MulVF src (LoadVector mem))); 8435 format %{ "vmulps $dst,$src,$mem\t! mul packed2F" %} 8436 ins_encode %{ 8437 int vector_len = 0; 8438 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8439 %} 8440 ins_pipe( pipe_slow ); 8441 %} 8442 8443 instruct vmul4F(vecX dst, vecX src) %{ 8444 predicate(n->as_Vector()->length() == 4); 8445 match(Set dst (MulVF dst src)); 8446 format %{ "mulps $dst,$src\t! mul packed4F" %} 8447 ins_encode %{ 8448 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 8449 %} 8450 ins_pipe( pipe_slow ); 8451 %} 8452 8453 instruct vmul4F_reg(vecX dst, vecX src1, vecX src2) %{ 8454 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8455 match(Set dst (MulVF src1 src2)); 8456 format %{ "vmulps $dst,$src1,$src2\t! mul packed4F" %} 8457 ins_encode %{ 8458 int vector_len = 0; 8459 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8460 %} 8461 ins_pipe( pipe_slow ); 8462 %} 8463 8464 instruct vmul4F_mem(vecX dst, vecX src, memory mem) %{ 8465 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8466 match(Set dst (MulVF src (LoadVector mem))); 8467 format %{ "vmulps $dst,$src,$mem\t! mul packed4F" %} 8468 ins_encode %{ 8469 int vector_len = 0; 8470 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8471 %} 8472 ins_pipe( pipe_slow ); 8473 %} 8474 8475 instruct vmul8F_reg(vecY dst, vecY src1, vecY src2) %{ 8476 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8477 match(Set dst (MulVF src1 src2)); 8478 format %{ "vmulps $dst,$src1,$src2\t! mul packed8F" %} 8479 ins_encode %{ 8480 int vector_len = 1; 8481 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8482 %} 8483 ins_pipe( pipe_slow ); 8484 %} 8485 8486 instruct vmul8F_mem(vecY dst, vecY src, memory mem) %{ 8487 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8488 match(Set dst (MulVF src (LoadVector mem))); 8489 format %{ "vmulps $dst,$src,$mem\t! mul packed8F" %} 8490 ins_encode %{ 8491 int vector_len = 1; 8492 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8493 %} 8494 ins_pipe( pipe_slow ); 8495 %} 8496 8497 instruct vmul16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8498 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8499 match(Set dst (MulVF src1 src2)); 8500 format %{ "vmulps $dst,$src1,$src2\t! mul packed16F" %} 8501 ins_encode %{ 8502 int vector_len = 2; 8503 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8504 %} 8505 ins_pipe( pipe_slow ); 8506 %} 8507 8508 instruct vmul16F_mem(vecZ dst, vecZ src, memory mem) %{ 8509 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8510 match(Set dst (MulVF src (LoadVector mem))); 8511 format %{ "vmulps $dst,$src,$mem\t! mul packed16F" %} 8512 ins_encode %{ 8513 int vector_len = 2; 8514 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8515 %} 8516 ins_pipe( pipe_slow ); 8517 %} 8518 8519 // Doubles vector mul 8520 instruct vmul2D(vecX dst, vecX src) %{ 8521 predicate(n->as_Vector()->length() == 2); 8522 match(Set dst (MulVD dst src)); 8523 format %{ "mulpd $dst,$src\t! mul packed2D" %} 8524 ins_encode %{ 8525 __ mulpd($dst$$XMMRegister, $src$$XMMRegister); 8526 %} 8527 ins_pipe( pipe_slow ); 8528 %} 8529 8530 instruct vmul2D_reg(vecX dst, vecX src1, vecX src2) %{ 8531 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8532 match(Set dst (MulVD src1 src2)); 8533 format %{ "vmulpd $dst,$src1,$src2\t! mul packed2D" %} 8534 ins_encode %{ 8535 int vector_len = 0; 8536 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8537 %} 8538 ins_pipe( pipe_slow ); 8539 %} 8540 8541 instruct vmul2D_mem(vecX dst, vecX src, memory mem) %{ 8542 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8543 match(Set dst (MulVD src (LoadVector mem))); 8544 format %{ "vmulpd $dst,$src,$mem\t! mul packed2D" %} 8545 ins_encode %{ 8546 int vector_len = 0; 8547 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8548 %} 8549 ins_pipe( pipe_slow ); 8550 %} 8551 8552 instruct vmul4D_reg(vecY dst, vecY src1, vecY src2) %{ 8553 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8554 match(Set dst (MulVD src1 src2)); 8555 format %{ "vmulpd $dst,$src1,$src2\t! mul packed4D" %} 8556 ins_encode %{ 8557 int vector_len = 1; 8558 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8559 %} 8560 ins_pipe( pipe_slow ); 8561 %} 8562 8563 instruct vmul4D_mem(vecY dst, vecY src, memory mem) %{ 8564 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8565 match(Set dst (MulVD src (LoadVector mem))); 8566 format %{ "vmulpd $dst,$src,$mem\t! mul packed4D" %} 8567 ins_encode %{ 8568 int vector_len = 1; 8569 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8570 %} 8571 ins_pipe( pipe_slow ); 8572 %} 8573 8574 instruct vmul8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8575 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8576 match(Set dst (MulVD src1 src2)); 8577 format %{ "vmulpd $dst k0,$src1,$src2\t! mul packed8D" %} 8578 ins_encode %{ 8579 int vector_len = 2; 8580 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8581 %} 8582 ins_pipe( pipe_slow ); 8583 %} 8584 8585 instruct vmul8D_mem(vecZ dst, vecZ src, memory mem) %{ 8586 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8587 match(Set dst (MulVD src (LoadVector mem))); 8588 format %{ "vmulpd $dst k0,$src,$mem\t! mul packed8D" %} 8589 ins_encode %{ 8590 int vector_len = 2; 8591 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8592 %} 8593 ins_pipe( pipe_slow ); 8594 %} 8595 8596 instruct vcmov4D_reg(vecY dst, vecY src1, vecY src2, immI8 cop, cmpOp_vcmppd copnd) %{ 8597 predicate(UseAVX > 0 && UseAVX < 3 && n->as_Vector()->length() == 4); 8598 match(Set dst (CMoveVD (Binary copnd cop) (Binary src1 src2))); 8599 effect(TEMP dst, USE src1, USE src2); 8600 format %{ "cmppd.$copnd $dst, $src1, $src2 ! vcmovevd, cond=$cop\n\t" 8601 "vpblendd $dst,$src1,$src2,$dst ! vcmovevd\n\t" 8602 %} 8603 ins_encode %{ 8604 int vector_len = 1; 8605 int cond = (Assembler::Condition)($copnd$$cmpcode); 8606 __ cmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cond, vector_len); 8607 __ vpblendd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vector_len); 8608 %} 8609 ins_pipe( pipe_slow ); 8610 %} 8611 8612 // --------------------------------- DIV -------------------------------------- 8613 8614 // Floats vector div 8615 instruct vdiv2F(vecD dst, vecD src) %{ 8616 predicate(n->as_Vector()->length() == 2); 8617 match(Set dst (DivVF dst src)); 8618 format %{ "divps $dst,$src\t! div packed2F" %} 8619 ins_encode %{ 8620 __ divps($dst$$XMMRegister, $src$$XMMRegister); 8621 %} 8622 ins_pipe( pipe_slow ); 8623 %} 8624 8625 instruct vdiv2F_reg(vecD dst, vecD src1, vecD src2) %{ 8626 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8627 match(Set dst (DivVF src1 src2)); 8628 format %{ "vdivps $dst,$src1,$src2\t! div packed2F" %} 8629 ins_encode %{ 8630 int vector_len = 0; 8631 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8632 %} 8633 ins_pipe( pipe_slow ); 8634 %} 8635 8636 instruct vdiv2F_mem(vecD dst, vecD src, memory mem) %{ 8637 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8638 match(Set dst (DivVF src (LoadVector mem))); 8639 format %{ "vdivps $dst,$src,$mem\t! div packed2F" %} 8640 ins_encode %{ 8641 int vector_len = 0; 8642 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8643 %} 8644 ins_pipe( pipe_slow ); 8645 %} 8646 8647 instruct vdiv4F(vecX dst, vecX src) %{ 8648 predicate(n->as_Vector()->length() == 4); 8649 match(Set dst (DivVF dst src)); 8650 format %{ "divps $dst,$src\t! div packed4F" %} 8651 ins_encode %{ 8652 __ divps($dst$$XMMRegister, $src$$XMMRegister); 8653 %} 8654 ins_pipe( pipe_slow ); 8655 %} 8656 8657 instruct vdiv4F_reg(vecX dst, vecX src1, vecX src2) %{ 8658 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8659 match(Set dst (DivVF src1 src2)); 8660 format %{ "vdivps $dst,$src1,$src2\t! div packed4F" %} 8661 ins_encode %{ 8662 int vector_len = 0; 8663 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8664 %} 8665 ins_pipe( pipe_slow ); 8666 %} 8667 8668 instruct vdiv4F_mem(vecX dst, vecX src, memory mem) %{ 8669 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8670 match(Set dst (DivVF src (LoadVector mem))); 8671 format %{ "vdivps $dst,$src,$mem\t! div packed4F" %} 8672 ins_encode %{ 8673 int vector_len = 0; 8674 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8675 %} 8676 ins_pipe( pipe_slow ); 8677 %} 8678 8679 instruct vdiv8F_reg(vecY dst, vecY src1, vecY src2) %{ 8680 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8681 match(Set dst (DivVF src1 src2)); 8682 format %{ "vdivps $dst,$src1,$src2\t! div packed8F" %} 8683 ins_encode %{ 8684 int vector_len = 1; 8685 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8686 %} 8687 ins_pipe( pipe_slow ); 8688 %} 8689 8690 instruct vdiv8F_mem(vecY dst, vecY src, memory mem) %{ 8691 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8692 match(Set dst (DivVF src (LoadVector mem))); 8693 format %{ "vdivps $dst,$src,$mem\t! div packed8F" %} 8694 ins_encode %{ 8695 int vector_len = 1; 8696 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8697 %} 8698 ins_pipe( pipe_slow ); 8699 %} 8700 8701 instruct vdiv16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8702 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 8703 match(Set dst (DivVF src1 src2)); 8704 format %{ "vdivps $dst,$src1,$src2\t! div packed16F" %} 8705 ins_encode %{ 8706 int vector_len = 2; 8707 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8708 %} 8709 ins_pipe( pipe_slow ); 8710 %} 8711 8712 instruct vdiv16F_mem(vecZ dst, vecZ src, memory mem) %{ 8713 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 8714 match(Set dst (DivVF src (LoadVector mem))); 8715 format %{ "vdivps $dst,$src,$mem\t! div packed16F" %} 8716 ins_encode %{ 8717 int vector_len = 2; 8718 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8719 %} 8720 ins_pipe( pipe_slow ); 8721 %} 8722 8723 // Doubles vector div 8724 instruct vdiv2D(vecX dst, vecX src) %{ 8725 predicate(n->as_Vector()->length() == 2); 8726 match(Set dst (DivVD dst src)); 8727 format %{ "divpd $dst,$src\t! div packed2D" %} 8728 ins_encode %{ 8729 __ divpd($dst$$XMMRegister, $src$$XMMRegister); 8730 %} 8731 ins_pipe( pipe_slow ); 8732 %} 8733 8734 instruct vdiv2D_reg(vecX dst, vecX src1, vecX src2) %{ 8735 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8736 match(Set dst (DivVD src1 src2)); 8737 format %{ "vdivpd $dst,$src1,$src2\t! div packed2D" %} 8738 ins_encode %{ 8739 int vector_len = 0; 8740 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8741 %} 8742 ins_pipe( pipe_slow ); 8743 %} 8744 8745 instruct vdiv2D_mem(vecX dst, vecX src, memory mem) %{ 8746 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8747 match(Set dst (DivVD src (LoadVector mem))); 8748 format %{ "vdivpd $dst,$src,$mem\t! div packed2D" %} 8749 ins_encode %{ 8750 int vector_len = 0; 8751 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8752 %} 8753 ins_pipe( pipe_slow ); 8754 %} 8755 8756 instruct vdiv4D_reg(vecY dst, vecY src1, vecY src2) %{ 8757 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8758 match(Set dst (DivVD src1 src2)); 8759 format %{ "vdivpd $dst,$src1,$src2\t! div packed4D" %} 8760 ins_encode %{ 8761 int vector_len = 1; 8762 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8763 %} 8764 ins_pipe( pipe_slow ); 8765 %} 8766 8767 instruct vdiv4D_mem(vecY dst, vecY src, memory mem) %{ 8768 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8769 match(Set dst (DivVD src (LoadVector mem))); 8770 format %{ "vdivpd $dst,$src,$mem\t! div packed4D" %} 8771 ins_encode %{ 8772 int vector_len = 1; 8773 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8774 %} 8775 ins_pipe( pipe_slow ); 8776 %} 8777 8778 instruct vdiv8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8779 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8780 match(Set dst (DivVD src1 src2)); 8781 format %{ "vdivpd $dst,$src1,$src2\t! div packed8D" %} 8782 ins_encode %{ 8783 int vector_len = 2; 8784 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8785 %} 8786 ins_pipe( pipe_slow ); 8787 %} 8788 8789 instruct vdiv8D_mem(vecZ dst, vecZ src, memory mem) %{ 8790 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8791 match(Set dst (DivVD src (LoadVector mem))); 8792 format %{ "vdivpd $dst,$src,$mem\t! div packed8D" %} 8793 ins_encode %{ 8794 int vector_len = 2; 8795 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8796 %} 8797 ins_pipe( pipe_slow ); 8798 %} 8799 8800 // ------------------------------ Shift --------------------------------------- 8801 8802 // Left and right shift count vectors are the same on x86 8803 // (only lowest bits of xmm reg are used for count). 8804 instruct vshiftcnt(vecS dst, rRegI cnt) %{ 8805 match(Set dst (LShiftCntV cnt)); 8806 match(Set dst (RShiftCntV cnt)); 8807 format %{ "movd $dst,$cnt\t! load shift count" %} 8808 ins_encode %{ 8809 __ movdl($dst$$XMMRegister, $cnt$$Register); 8810 %} 8811 ins_pipe( pipe_slow ); 8812 %} 8813 8814 // --------------------------------- Sqrt -------------------------------------- 8815 8816 // Floating point vector sqrt - double precision only 8817 instruct vsqrt2D_reg(vecX dst, vecX src) %{ 8818 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8819 match(Set dst (SqrtVD src)); 8820 format %{ "vsqrtpd $dst,$src\t! sqrt packed2D" %} 8821 ins_encode %{ 8822 int vector_len = 0; 8823 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8824 %} 8825 ins_pipe( pipe_slow ); 8826 %} 8827 8828 instruct vsqrt2D_mem(vecX dst, memory mem) %{ 8829 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8830 match(Set dst (SqrtVD (LoadVector mem))); 8831 format %{ "vsqrtpd $dst,$mem\t! sqrt packed2D" %} 8832 ins_encode %{ 8833 int vector_len = 0; 8834 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); 8835 %} 8836 ins_pipe( pipe_slow ); 8837 %} 8838 8839 instruct vsqrt4D_reg(vecY dst, vecY src) %{ 8840 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8841 match(Set dst (SqrtVD src)); 8842 format %{ "vsqrtpd $dst,$src\t! sqrt packed4D" %} 8843 ins_encode %{ 8844 int vector_len = 1; 8845 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8846 %} 8847 ins_pipe( pipe_slow ); 8848 %} 8849 8850 instruct vsqrt4D_mem(vecY dst, memory mem) %{ 8851 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8852 match(Set dst (SqrtVD (LoadVector mem))); 8853 format %{ "vsqrtpd $dst,$mem\t! sqrt packed4D" %} 8854 ins_encode %{ 8855 int vector_len = 1; 8856 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); 8857 %} 8858 ins_pipe( pipe_slow ); 8859 %} 8860 8861 instruct vsqrt8D_reg(vecZ dst, vecZ src) %{ 8862 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8863 match(Set dst (SqrtVD src)); 8864 format %{ "vsqrtpd $dst,$src\t! sqrt packed8D" %} 8865 ins_encode %{ 8866 int vector_len = 2; 8867 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8868 %} 8869 ins_pipe( pipe_slow ); 8870 %} 8871 8872 instruct vsqrt8D_mem(vecZ dst, memory mem) %{ 8873 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8874 match(Set dst (SqrtVD (LoadVector mem))); 8875 format %{ "vsqrtpd $dst,$mem\t! sqrt packed8D" %} 8876 ins_encode %{ 8877 int vector_len = 2; 8878 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); 8879 %} 8880 ins_pipe( pipe_slow ); 8881 %} 8882 8883 // ------------------------------ LeftShift ----------------------------------- 8884 8885 // Shorts/Chars vector left shift 8886 instruct vsll2S(vecS dst, vecS shift) %{ 8887 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8888 match(Set dst (LShiftVS dst shift)); 8889 format %{ "psllw $dst,$shift\t! left shift packed2S" %} 8890 ins_encode %{ 8891 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 8892 %} 8893 ins_pipe( pipe_slow ); 8894 %} 8895 8896 instruct vsll2S_imm(vecS dst, immI8 shift) %{ 8897 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8898 match(Set dst (LShiftVS dst shift)); 8899 format %{ "psllw $dst,$shift\t! left shift packed2S" %} 8900 ins_encode %{ 8901 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 8902 %} 8903 ins_pipe( pipe_slow ); 8904 %} 8905 8906 instruct vsll2S_reg_avx(vecS dst, vecS src, vecS shift) %{ 8907 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 8908 match(Set dst (LShiftVS src shift)); 8909 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 8910 ins_encode %{ 8911 int vector_len = 0; 8912 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8913 %} 8914 ins_pipe( pipe_slow ); 8915 %} 8916 8917 instruct vsll2S_reg_evex(vecS dst, vecS src, vecS shift) %{ 8918 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 8919 match(Set dst (LShiftVS src shift)); 8920 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 8921 ins_encode %{ 8922 int vector_len = 0; 8923 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8924 %} 8925 ins_pipe( pipe_slow ); 8926 %} 8927 8928 instruct vsll2S_reg_evex_special(vecS dst, vecS src, vecS shift) %{ 8929 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 8930 match(Set dst (LShiftVS dst shift)); 8931 effect(TEMP src); 8932 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 8933 ins_encode %{ 8934 int vector_len = 0; 8935 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8936 %} 8937 ins_pipe( pipe_slow ); 8938 %} 8939 8940 instruct vsll2S_reg_imm_avx(vecS dst, vecS src, immI8 shift) %{ 8941 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 8942 match(Set dst (LShiftVS src shift)); 8943 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 8944 ins_encode %{ 8945 int vector_len = 0; 8946 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8947 %} 8948 ins_pipe( pipe_slow ); 8949 %} 8950 8951 instruct vsll2S_reg_imm_evex(vecS dst, vecS src, immI8 shift) %{ 8952 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 8953 match(Set dst (LShiftVS src shift)); 8954 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 8955 ins_encode %{ 8956 int vector_len = 0; 8957 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8958 %} 8959 ins_pipe( pipe_slow ); 8960 %} 8961 8962 instruct vsll2S_reg_imm_evex_special(vecS dst, vecS src, immI8 shift) %{ 8963 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 8964 match(Set dst (LShiftVS dst shift)); 8965 effect(TEMP src); 8966 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 8967 ins_encode %{ 8968 int vector_len = 0; 8969 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8970 %} 8971 ins_pipe( pipe_slow ); 8972 %} 8973 8974 instruct vsll4S(vecD dst, vecS shift) %{ 8975 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 8976 match(Set dst (LShiftVS dst shift)); 8977 format %{ "psllw $dst,$shift\t! left shift packed4S" %} 8978 ins_encode %{ 8979 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 8980 %} 8981 ins_pipe( pipe_slow ); 8982 %} 8983 8984 instruct vsll4S_imm(vecD dst, immI8 shift) %{ 8985 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 8986 match(Set dst (LShiftVS dst shift)); 8987 format %{ "psllw $dst,$shift\t! left shift packed4S" %} 8988 ins_encode %{ 8989 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 8990 %} 8991 ins_pipe( pipe_slow ); 8992 %} 8993 8994 instruct vsll4S_reg_avx(vecD dst, vecD src, vecS shift) %{ 8995 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 8996 match(Set dst (LShiftVS src shift)); 8997 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 8998 ins_encode %{ 8999 int vector_len = 0; 9000 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9001 %} 9002 ins_pipe( pipe_slow ); 9003 %} 9004 9005 instruct vsll4S_reg_evex(vecD dst, vecD src, vecS shift) %{ 9006 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 9007 match(Set dst (LShiftVS src shift)); 9008 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 9009 ins_encode %{ 9010 int vector_len = 0; 9011 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9012 %} 9013 ins_pipe( pipe_slow ); 9014 %} 9015 9016 instruct vsll4S_reg_evex_special(vecD dst, vecD src, vecS shift) %{ 9017 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 9018 match(Set dst (LShiftVS dst shift)); 9019 effect(TEMP src); 9020 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 9021 ins_encode %{ 9022 int vector_len = 0; 9023 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9024 %} 9025 ins_pipe( pipe_slow ); 9026 %} 9027 9028 instruct vsll4S_reg_imm_avx(vecD dst, vecD src, immI8 shift) %{ 9029 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 9030 match(Set dst (LShiftVS src shift)); 9031 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 9032 ins_encode %{ 9033 int vector_len = 0; 9034 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9035 %} 9036 ins_pipe( pipe_slow ); 9037 %} 9038 9039 instruct vsll4S_reg_imm_evex(vecD dst, vecD src, immI8 shift) %{ 9040 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 9041 match(Set dst (LShiftVS src shift)); 9042 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 9043 ins_encode %{ 9044 int vector_len = 0; 9045 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9046 %} 9047 ins_pipe( pipe_slow ); 9048 %} 9049 9050 instruct vsll4S_reg_imm_evex_special(vecD dst, vecD src, immI8 shift) %{ 9051 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 9052 match(Set dst (LShiftVS dst shift)); 9053 effect(TEMP src); 9054 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 9055 ins_encode %{ 9056 int vector_len = 0; 9057 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9058 %} 9059 ins_pipe( pipe_slow ); 9060 %} 9061 9062 instruct vsll8S(vecX dst, vecS shift) %{ 9063 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 9064 match(Set dst (LShiftVS dst shift)); 9065 format %{ "psllw $dst,$shift\t! left shift packed8S" %} 9066 ins_encode %{ 9067 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 9068 %} 9069 ins_pipe( pipe_slow ); 9070 %} 9071 9072 instruct vsll8S_imm(vecX dst, immI8 shift) %{ 9073 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 9074 match(Set dst (LShiftVS dst shift)); 9075 format %{ "psllw $dst,$shift\t! left shift packed8S" %} 9076 ins_encode %{ 9077 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 9078 %} 9079 ins_pipe( pipe_slow ); 9080 %} 9081 9082 instruct vsll8S_reg_avx(vecX dst, vecX src, vecS shift) %{ 9083 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 9084 match(Set dst (LShiftVS src shift)); 9085 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 9086 ins_encode %{ 9087 int vector_len = 0; 9088 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9089 %} 9090 ins_pipe( pipe_slow ); 9091 %} 9092 9093 instruct vsll8S_reg_evex(vecX dst, vecX src, vecS shift) %{ 9094 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 9095 match(Set dst (LShiftVS src shift)); 9096 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 9097 ins_encode %{ 9098 int vector_len = 0; 9099 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9100 %} 9101 ins_pipe( pipe_slow ); 9102 %} 9103 9104 instruct vsll8S_reg_evex_special(vecX dst, vecX src, vecS shift) %{ 9105 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 9106 match(Set dst (LShiftVS dst shift)); 9107 effect(TEMP src); 9108 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 9109 ins_encode %{ 9110 int vector_len = 0; 9111 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9112 %} 9113 ins_pipe( pipe_slow ); 9114 %} 9115 9116 instruct vsll8S_reg_imm_avx(vecX dst, vecX src, immI8 shift) %{ 9117 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 9118 match(Set dst (LShiftVS src shift)); 9119 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 9120 ins_encode %{ 9121 int vector_len = 0; 9122 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9123 %} 9124 ins_pipe( pipe_slow ); 9125 %} 9126 9127 instruct vsll8S_reg_imm_evex(vecX dst, vecX src, immI8 shift) %{ 9128 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 9129 match(Set dst (LShiftVS src shift)); 9130 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 9131 ins_encode %{ 9132 int vector_len = 0; 9133 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9134 %} 9135 ins_pipe( pipe_slow ); 9136 %} 9137 9138 instruct vsll8S_reg_imm_evex_special(vecX dst, vecX src, immI8 shift) %{ 9139 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 9140 match(Set dst (LShiftVS dst shift)); 9141 effect(TEMP src); 9142 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 9143 ins_encode %{ 9144 int vector_len = 0; 9145 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9146 %} 9147 ins_pipe( pipe_slow ); 9148 %} 9149 9150 instruct vsll16S_reg_avx(vecY dst, vecY src, vecS shift) %{ 9151 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 9152 match(Set dst (LShiftVS src shift)); 9153 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 9154 ins_encode %{ 9155 int vector_len = 1; 9156 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9157 %} 9158 ins_pipe( pipe_slow ); 9159 %} 9160 9161 instruct vsll16S_reg_evex(vecY dst, vecY src, vecS shift) %{ 9162 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 9163 match(Set dst (LShiftVS src shift)); 9164 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 9165 ins_encode %{ 9166 int vector_len = 1; 9167 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9168 %} 9169 ins_pipe( pipe_slow ); 9170 %} 9171 9172 instruct vsll16S_reg_evex_special(vecY dst, vecY src, vecS shift) %{ 9173 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 9174 match(Set dst (LShiftVS dst shift)); 9175 effect(TEMP src); 9176 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 9177 ins_encode %{ 9178 int vector_len = 1; 9179 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9180 %} 9181 ins_pipe( pipe_slow ); 9182 %} 9183 9184 instruct vsll16S_reg_imm_avx(vecY dst, vecY src, immI8 shift) %{ 9185 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 9186 match(Set dst (LShiftVS src shift)); 9187 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 9188 ins_encode %{ 9189 int vector_len = 1; 9190 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9191 %} 9192 ins_pipe( pipe_slow ); 9193 %} 9194 9195 instruct vsll16S_reg_imm_evex(vecY dst, vecY src, immI8 shift) %{ 9196 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 9197 match(Set dst (LShiftVS src shift)); 9198 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 9199 ins_encode %{ 9200 int vector_len = 1; 9201 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9202 %} 9203 ins_pipe( pipe_slow ); 9204 %} 9205 9206 instruct vsll16S_reg_imm_evex_special(vecY dst, vecY src, immI8 shift) %{ 9207 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 9208 match(Set dst (LShiftVS dst shift)); 9209 effect(TEMP src); 9210 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 9211 ins_encode %{ 9212 int vector_len = 1; 9213 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9214 %} 9215 ins_pipe( pipe_slow ); 9216 %} 9217 9218 instruct vsll32S_reg(vecZ dst, vecZ src, vecS shift) %{ 9219 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 9220 match(Set dst (LShiftVS src shift)); 9221 format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %} 9222 ins_encode %{ 9223 int vector_len = 2; 9224 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9225 %} 9226 ins_pipe( pipe_slow ); 9227 %} 9228 9229 instruct vsll32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 9230 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 9231 match(Set dst (LShiftVS src shift)); 9232 format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %} 9233 ins_encode %{ 9234 int vector_len = 2; 9235 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9236 %} 9237 ins_pipe( pipe_slow ); 9238 %} 9239 9240 // Integers vector left shift 9241 instruct vsll2I(vecD dst, vecS shift) %{ 9242 predicate(n->as_Vector()->length() == 2); 9243 match(Set dst (LShiftVI dst shift)); 9244 format %{ "pslld $dst,$shift\t! left shift packed2I" %} 9245 ins_encode %{ 9246 __ pslld($dst$$XMMRegister, $shift$$XMMRegister); 9247 %} 9248 ins_pipe( pipe_slow ); 9249 %} 9250 9251 instruct vsll2I_imm(vecD dst, immI8 shift) %{ 9252 predicate(n->as_Vector()->length() == 2); 9253 match(Set dst (LShiftVI dst shift)); 9254 format %{ "pslld $dst,$shift\t! left shift packed2I" %} 9255 ins_encode %{ 9256 __ pslld($dst$$XMMRegister, (int)$shift$$constant); 9257 %} 9258 ins_pipe( pipe_slow ); 9259 %} 9260 9261 instruct vsll2I_reg(vecD dst, vecD src, vecS shift) %{ 9262 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9263 match(Set dst (LShiftVI src shift)); 9264 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} 9265 ins_encode %{ 9266 int vector_len = 0; 9267 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9268 %} 9269 ins_pipe( pipe_slow ); 9270 %} 9271 9272 instruct vsll2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 9273 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9274 match(Set dst (LShiftVI src shift)); 9275 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} 9276 ins_encode %{ 9277 int vector_len = 0; 9278 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9279 %} 9280 ins_pipe( pipe_slow ); 9281 %} 9282 9283 instruct vsll4I(vecX dst, vecS shift) %{ 9284 predicate(n->as_Vector()->length() == 4); 9285 match(Set dst (LShiftVI dst shift)); 9286 format %{ "pslld $dst,$shift\t! left shift packed4I" %} 9287 ins_encode %{ 9288 __ pslld($dst$$XMMRegister, $shift$$XMMRegister); 9289 %} 9290 ins_pipe( pipe_slow ); 9291 %} 9292 9293 instruct vsll4I_imm(vecX dst, immI8 shift) %{ 9294 predicate(n->as_Vector()->length() == 4); 9295 match(Set dst (LShiftVI dst shift)); 9296 format %{ "pslld $dst,$shift\t! left shift packed4I" %} 9297 ins_encode %{ 9298 __ pslld($dst$$XMMRegister, (int)$shift$$constant); 9299 %} 9300 ins_pipe( pipe_slow ); 9301 %} 9302 9303 instruct vsll4I_reg(vecX dst, vecX src, vecS shift) %{ 9304 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9305 match(Set dst (LShiftVI src shift)); 9306 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} 9307 ins_encode %{ 9308 int vector_len = 0; 9309 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9310 %} 9311 ins_pipe( pipe_slow ); 9312 %} 9313 9314 instruct vsll4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 9315 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9316 match(Set dst (LShiftVI src shift)); 9317 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} 9318 ins_encode %{ 9319 int vector_len = 0; 9320 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9321 %} 9322 ins_pipe( pipe_slow ); 9323 %} 9324 9325 instruct vsll8I_reg(vecY dst, vecY src, vecS shift) %{ 9326 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 9327 match(Set dst (LShiftVI src shift)); 9328 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} 9329 ins_encode %{ 9330 int vector_len = 1; 9331 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9332 %} 9333 ins_pipe( pipe_slow ); 9334 %} 9335 9336 instruct vsll8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 9337 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 9338 match(Set dst (LShiftVI src shift)); 9339 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} 9340 ins_encode %{ 9341 int vector_len = 1; 9342 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9343 %} 9344 ins_pipe( pipe_slow ); 9345 %} 9346 9347 instruct vsll16I_reg(vecZ dst, vecZ src, vecS shift) %{ 9348 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 9349 match(Set dst (LShiftVI src shift)); 9350 format %{ "vpslld $dst,$src,$shift\t! left shift packed16I" %} 9351 ins_encode %{ 9352 int vector_len = 2; 9353 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9354 %} 9355 ins_pipe( pipe_slow ); 9356 %} 9357 9358 instruct vsll16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 9359 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 9360 match(Set dst (LShiftVI src shift)); 9361 format %{ "vpslld $dst,$src,$shift\t! left shift packed16I" %} 9362 ins_encode %{ 9363 int vector_len = 2; 9364 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9365 %} 9366 ins_pipe( pipe_slow ); 9367 %} 9368 9369 // Longs vector left shift 9370 instruct vsll2L(vecX dst, vecS shift) %{ 9371 predicate(n->as_Vector()->length() == 2); 9372 match(Set dst (LShiftVL dst shift)); 9373 format %{ "psllq $dst,$shift\t! left shift packed2L" %} 9374 ins_encode %{ 9375 __ psllq($dst$$XMMRegister, $shift$$XMMRegister); 9376 %} 9377 ins_pipe( pipe_slow ); 9378 %} 9379 9380 instruct vsll2L_imm(vecX dst, immI8 shift) %{ 9381 predicate(n->as_Vector()->length() == 2); 9382 match(Set dst (LShiftVL dst shift)); 9383 format %{ "psllq $dst,$shift\t! left shift packed2L" %} 9384 ins_encode %{ 9385 __ psllq($dst$$XMMRegister, (int)$shift$$constant); 9386 %} 9387 ins_pipe( pipe_slow ); 9388 %} 9389 9390 instruct vsll2L_reg(vecX dst, vecX src, vecS shift) %{ 9391 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9392 match(Set dst (LShiftVL src shift)); 9393 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} 9394 ins_encode %{ 9395 int vector_len = 0; 9396 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9397 %} 9398 ins_pipe( pipe_slow ); 9399 %} 9400 9401 instruct vsll2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ 9402 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9403 match(Set dst (LShiftVL src shift)); 9404 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} 9405 ins_encode %{ 9406 int vector_len = 0; 9407 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9408 %} 9409 ins_pipe( pipe_slow ); 9410 %} 9411 9412 instruct vsll4L_reg(vecY dst, vecY src, vecS shift) %{ 9413 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 9414 match(Set dst (LShiftVL src shift)); 9415 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} 9416 ins_encode %{ 9417 int vector_len = 1; 9418 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9419 %} 9420 ins_pipe( pipe_slow ); 9421 %} 9422 9423 instruct vsll4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ 9424 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 9425 match(Set dst (LShiftVL src shift)); 9426 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} 9427 ins_encode %{ 9428 int vector_len = 1; 9429 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9430 %} 9431 ins_pipe( pipe_slow ); 9432 %} 9433 9434 instruct vsll8L_reg(vecZ dst, vecZ src, vecS shift) %{ 9435 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 9436 match(Set dst (LShiftVL src shift)); 9437 format %{ "vpsllq $dst,$src,$shift\t! left shift packed8L" %} 9438 ins_encode %{ 9439 int vector_len = 2; 9440 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9441 %} 9442 ins_pipe( pipe_slow ); 9443 %} 9444 9445 instruct vsll8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 9446 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 9447 match(Set dst (LShiftVL src shift)); 9448 format %{ "vpsllq $dst,$src,$shift\t! left shift packed8L" %} 9449 ins_encode %{ 9450 int vector_len = 2; 9451 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9452 %} 9453 ins_pipe( pipe_slow ); 9454 %} 9455 9456 // ----------------------- LogicalRightShift ----------------------------------- 9457 9458 // Shorts vector logical right shift produces incorrect Java result 9459 // for negative data because java code convert short value into int with 9460 // sign extension before a shift. But char vectors are fine since chars are 9461 // unsigned values. 9462 9463 instruct vsrl2S(vecS dst, vecS shift) %{ 9464 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 9465 match(Set dst (URShiftVS dst shift)); 9466 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} 9467 ins_encode %{ 9468 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 9469 %} 9470 ins_pipe( pipe_slow ); 9471 %} 9472 9473 instruct vsrl2S_imm(vecS dst, immI8 shift) %{ 9474 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 9475 match(Set dst (URShiftVS dst shift)); 9476 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} 9477 ins_encode %{ 9478 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 9479 %} 9480 ins_pipe( pipe_slow ); 9481 %} 9482 9483 instruct vsrl2S_reg_avx(vecS dst, vecS src, vecS shift) %{ 9484 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 9485 match(Set dst (URShiftVS src shift)); 9486 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 9487 ins_encode %{ 9488 int vector_len = 0; 9489 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9490 %} 9491 ins_pipe( pipe_slow ); 9492 %} 9493 9494 instruct vsrl2S_reg_evex(vecS dst, vecS src, vecS shift) %{ 9495 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 9496 match(Set dst (URShiftVS src shift)); 9497 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 9498 ins_encode %{ 9499 int vector_len = 0; 9500 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9501 %} 9502 ins_pipe( pipe_slow ); 9503 %} 9504 9505 instruct vsrl2S_reg_evex_special(vecS dst, vecS src, vecS shift) %{ 9506 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 9507 match(Set dst (URShiftVS dst shift)); 9508 effect(TEMP src); 9509 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 9510 ins_encode %{ 9511 int vector_len = 0; 9512 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9513 %} 9514 ins_pipe( pipe_slow ); 9515 %} 9516 9517 instruct vsrl2S_reg_imm_avx(vecS dst, vecS src, immI8 shift) %{ 9518 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 9519 match(Set dst (URShiftVS src shift)); 9520 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 9521 ins_encode %{ 9522 int vector_len = 0; 9523 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9524 %} 9525 ins_pipe( pipe_slow ); 9526 %} 9527 9528 instruct vsrl2S_reg_imm_evex(vecS dst, vecS src, immI8 shift) %{ 9529 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 9530 match(Set dst (URShiftVS src shift)); 9531 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 9532 ins_encode %{ 9533 int vector_len = 0; 9534 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9535 %} 9536 ins_pipe( pipe_slow ); 9537 %} 9538 9539 instruct vsrl2S_reg_imm_evex_special(vecS dst, vecS src, immI8 shift) %{ 9540 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 9541 match(Set dst (URShiftVS dst shift)); 9542 effect(TEMP src); 9543 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 9544 ins_encode %{ 9545 int vector_len = 0; 9546 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9547 %} 9548 ins_pipe( pipe_slow ); 9549 %} 9550 9551 instruct vsrl4S(vecD dst, vecS shift) %{ 9552 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 9553 match(Set dst (URShiftVS dst shift)); 9554 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} 9555 ins_encode %{ 9556 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 9557 %} 9558 ins_pipe( pipe_slow ); 9559 %} 9560 9561 instruct vsrl4S_imm(vecD dst, immI8 shift) %{ 9562 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 9563 match(Set dst (URShiftVS dst shift)); 9564 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} 9565 ins_encode %{ 9566 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 9567 %} 9568 ins_pipe( pipe_slow ); 9569 %} 9570 9571 instruct vsrl4S_reg_avx(vecD dst, vecD src, vecS shift) %{ 9572 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 9573 match(Set dst (URShiftVS src shift)); 9574 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 9575 ins_encode %{ 9576 int vector_len = 0; 9577 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9578 %} 9579 ins_pipe( pipe_slow ); 9580 %} 9581 9582 instruct vsrl4S_reg_evex(vecD dst, vecD src, vecS shift) %{ 9583 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 9584 match(Set dst (URShiftVS src shift)); 9585 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 9586 ins_encode %{ 9587 int vector_len = 0; 9588 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9589 %} 9590 ins_pipe( pipe_slow ); 9591 %} 9592 9593 instruct vsrl4S_reg_evex_special(vecD dst, vecD src, vecS shift) %{ 9594 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 9595 match(Set dst (URShiftVS dst shift)); 9596 effect(TEMP src); 9597 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 9598 ins_encode %{ 9599 int vector_len = 0; 9600 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9601 %} 9602 ins_pipe( pipe_slow ); 9603 %} 9604 9605 instruct vsrl4S_reg_imm_avx(vecD dst, vecD src, immI8 shift) %{ 9606 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 9607 match(Set dst (URShiftVS src shift)); 9608 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 9609 ins_encode %{ 9610 int vector_len = 0; 9611 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9612 %} 9613 ins_pipe( pipe_slow ); 9614 %} 9615 9616 instruct vsrl4S_reg_imm_evex(vecD dst, vecD src, immI8 shift) %{ 9617 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 9618 match(Set dst (URShiftVS src shift)); 9619 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 9620 ins_encode %{ 9621 int vector_len = 0; 9622 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9623 %} 9624 ins_pipe( pipe_slow ); 9625 %} 9626 9627 instruct vsrl4S_reg_imm_evex_special(vecD dst, vecD src, immI8 shift) %{ 9628 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 9629 match(Set dst (URShiftVS dst shift)); 9630 effect(TEMP src); 9631 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 9632 ins_encode %{ 9633 int vector_len = 0; 9634 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9635 %} 9636 ins_pipe( pipe_slow ); 9637 %} 9638 9639 instruct vsrl8S(vecX dst, vecS shift) %{ 9640 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 9641 match(Set dst (URShiftVS dst shift)); 9642 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} 9643 ins_encode %{ 9644 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 9645 %} 9646 ins_pipe( pipe_slow ); 9647 %} 9648 9649 instruct vsrl8S_imm(vecX dst, immI8 shift) %{ 9650 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 9651 match(Set dst (URShiftVS dst shift)); 9652 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} 9653 ins_encode %{ 9654 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 9655 %} 9656 ins_pipe( pipe_slow ); 9657 %} 9658 9659 instruct vsrl8S_reg_avx(vecX dst, vecX src, vecS shift) %{ 9660 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 9661 match(Set dst (URShiftVS src shift)); 9662 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 9663 ins_encode %{ 9664 int vector_len = 0; 9665 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9666 %} 9667 ins_pipe( pipe_slow ); 9668 %} 9669 9670 instruct vsrl8S_reg_evex(vecX dst, vecX src, vecS shift) %{ 9671 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 9672 match(Set dst (URShiftVS src shift)); 9673 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 9674 ins_encode %{ 9675 int vector_len = 0; 9676 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9677 %} 9678 ins_pipe( pipe_slow ); 9679 %} 9680 9681 instruct vsrl8S_reg_evex_special(vecX dst, vecX src, vecS shift) %{ 9682 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 9683 match(Set dst (URShiftVS dst shift)); 9684 effect(TEMP src); 9685 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 9686 ins_encode %{ 9687 int vector_len = 0; 9688 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9689 %} 9690 ins_pipe( pipe_slow ); 9691 %} 9692 9693 instruct vsrl8S_reg_imm_avx(vecX dst, vecX src, immI8 shift) %{ 9694 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 9695 match(Set dst (URShiftVS src shift)); 9696 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 9697 ins_encode %{ 9698 int vector_len = 0; 9699 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9700 %} 9701 ins_pipe( pipe_slow ); 9702 %} 9703 9704 instruct vsrl8S_reg_imm_evex(vecX dst, vecX src, immI8 shift) %{ 9705 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 9706 match(Set dst (URShiftVS src shift)); 9707 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 9708 ins_encode %{ 9709 int vector_len = 0; 9710 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9711 %} 9712 ins_pipe( pipe_slow ); 9713 %} 9714 9715 instruct vsrl8S_reg_imm_evex_special(vecX dst, vecX src, immI8 shift) %{ 9716 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 9717 match(Set dst (URShiftVS dst shift)); 9718 effect(TEMP src); 9719 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 9720 ins_encode %{ 9721 int vector_len = 0; 9722 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9723 %} 9724 ins_pipe( pipe_slow ); 9725 %} 9726 9727 instruct vsrl16S_reg_avx(vecY dst, vecY src, vecS shift) %{ 9728 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 9729 match(Set dst (URShiftVS src shift)); 9730 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 9731 ins_encode %{ 9732 int vector_len = 1; 9733 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9734 %} 9735 ins_pipe( pipe_slow ); 9736 %} 9737 9738 instruct vsrl16S_reg_evex(vecY dst, vecY src, vecS shift) %{ 9739 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 9740 match(Set dst (URShiftVS src shift)); 9741 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 9742 ins_encode %{ 9743 int vector_len = 1; 9744 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9745 %} 9746 ins_pipe( pipe_slow ); 9747 %} 9748 9749 instruct vsrl16S_reg_evex_special(vecY dst, vecY src, vecS shift) %{ 9750 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 9751 match(Set dst (URShiftVS dst shift)); 9752 effect(TEMP src); 9753 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 9754 ins_encode %{ 9755 int vector_len = 1; 9756 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9757 %} 9758 ins_pipe( pipe_slow ); 9759 %} 9760 9761 instruct vsrl16S_reg_imm_avx(vecY dst, vecY src, immI8 shift) %{ 9762 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 9763 match(Set dst (URShiftVS src shift)); 9764 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 9765 ins_encode %{ 9766 int vector_len = 1; 9767 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9768 %} 9769 ins_pipe( pipe_slow ); 9770 %} 9771 9772 instruct vsrl16S_reg_imm_evex(vecY dst, vecY src, immI8 shift) %{ 9773 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 9774 match(Set dst (URShiftVS src shift)); 9775 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 9776 ins_encode %{ 9777 int vector_len = 1; 9778 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9779 %} 9780 ins_pipe( pipe_slow ); 9781 %} 9782 9783 instruct vsrl16S_reg_imm_evex_special(vecY dst, vecY src, immI8 shift) %{ 9784 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 9785 match(Set dst (URShiftVS dst shift)); 9786 effect(TEMP src); 9787 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 9788 ins_encode %{ 9789 int vector_len = 1; 9790 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9791 %} 9792 ins_pipe( pipe_slow ); 9793 %} 9794 9795 instruct vsrl32S_reg(vecZ dst, vecZ src, vecS shift) %{ 9796 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 9797 match(Set dst (URShiftVS src shift)); 9798 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %} 9799 ins_encode %{ 9800 int vector_len = 2; 9801 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9802 %} 9803 ins_pipe( pipe_slow ); 9804 %} 9805 9806 instruct vsrl32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 9807 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 9808 match(Set dst (URShiftVS src shift)); 9809 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %} 9810 ins_encode %{ 9811 int vector_len = 2; 9812 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9813 %} 9814 ins_pipe( pipe_slow ); 9815 %} 9816 9817 // Integers vector logical right shift 9818 instruct vsrl2I(vecD dst, vecS shift) %{ 9819 predicate(n->as_Vector()->length() == 2); 9820 match(Set dst (URShiftVI dst shift)); 9821 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} 9822 ins_encode %{ 9823 __ psrld($dst$$XMMRegister, $shift$$XMMRegister); 9824 %} 9825 ins_pipe( pipe_slow ); 9826 %} 9827 9828 instruct vsrl2I_imm(vecD dst, immI8 shift) %{ 9829 predicate(n->as_Vector()->length() == 2); 9830 match(Set dst (URShiftVI dst shift)); 9831 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} 9832 ins_encode %{ 9833 __ psrld($dst$$XMMRegister, (int)$shift$$constant); 9834 %} 9835 ins_pipe( pipe_slow ); 9836 %} 9837 9838 instruct vsrl2I_reg(vecD dst, vecD src, vecS shift) %{ 9839 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9840 match(Set dst (URShiftVI src shift)); 9841 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} 9842 ins_encode %{ 9843 int vector_len = 0; 9844 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9845 %} 9846 ins_pipe( pipe_slow ); 9847 %} 9848 9849 instruct vsrl2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 9850 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9851 match(Set dst (URShiftVI src shift)); 9852 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} 9853 ins_encode %{ 9854 int vector_len = 0; 9855 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9856 %} 9857 ins_pipe( pipe_slow ); 9858 %} 9859 9860 instruct vsrl4I(vecX dst, vecS shift) %{ 9861 predicate(n->as_Vector()->length() == 4); 9862 match(Set dst (URShiftVI dst shift)); 9863 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} 9864 ins_encode %{ 9865 __ psrld($dst$$XMMRegister, $shift$$XMMRegister); 9866 %} 9867 ins_pipe( pipe_slow ); 9868 %} 9869 9870 instruct vsrl4I_imm(vecX dst, immI8 shift) %{ 9871 predicate(n->as_Vector()->length() == 4); 9872 match(Set dst (URShiftVI dst shift)); 9873 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} 9874 ins_encode %{ 9875 __ psrld($dst$$XMMRegister, (int)$shift$$constant); 9876 %} 9877 ins_pipe( pipe_slow ); 9878 %} 9879 9880 instruct vsrl4I_reg(vecX dst, vecX src, vecS shift) %{ 9881 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9882 match(Set dst (URShiftVI src shift)); 9883 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} 9884 ins_encode %{ 9885 int vector_len = 0; 9886 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9887 %} 9888 ins_pipe( pipe_slow ); 9889 %} 9890 9891 instruct vsrl4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 9892 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9893 match(Set dst (URShiftVI src shift)); 9894 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} 9895 ins_encode %{ 9896 int vector_len = 0; 9897 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9898 %} 9899 ins_pipe( pipe_slow ); 9900 %} 9901 9902 instruct vsrl8I_reg(vecY dst, vecY src, vecS shift) %{ 9903 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 9904 match(Set dst (URShiftVI src shift)); 9905 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} 9906 ins_encode %{ 9907 int vector_len = 1; 9908 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9909 %} 9910 ins_pipe( pipe_slow ); 9911 %} 9912 9913 instruct vsrl8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 9914 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 9915 match(Set dst (URShiftVI src shift)); 9916 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} 9917 ins_encode %{ 9918 int vector_len = 1; 9919 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9920 %} 9921 ins_pipe( pipe_slow ); 9922 %} 9923 9924 instruct vsrl16I_reg(vecZ dst, vecZ src, vecS shift) %{ 9925 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 9926 match(Set dst (URShiftVI src shift)); 9927 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed16I" %} 9928 ins_encode %{ 9929 int vector_len = 2; 9930 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9931 %} 9932 ins_pipe( pipe_slow ); 9933 %} 9934 9935 instruct vsrl16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 9936 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 9937 match(Set dst (URShiftVI src shift)); 9938 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed16I" %} 9939 ins_encode %{ 9940 int vector_len = 2; 9941 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9942 %} 9943 ins_pipe( pipe_slow ); 9944 %} 9945 9946 // Longs vector logical right shift 9947 instruct vsrl2L(vecX dst, vecS shift) %{ 9948 predicate(n->as_Vector()->length() == 2); 9949 match(Set dst (URShiftVL dst shift)); 9950 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} 9951 ins_encode %{ 9952 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister); 9953 %} 9954 ins_pipe( pipe_slow ); 9955 %} 9956 9957 instruct vsrl2L_imm(vecX dst, immI8 shift) %{ 9958 predicate(n->as_Vector()->length() == 2); 9959 match(Set dst (URShiftVL dst shift)); 9960 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} 9961 ins_encode %{ 9962 __ psrlq($dst$$XMMRegister, (int)$shift$$constant); 9963 %} 9964 ins_pipe( pipe_slow ); 9965 %} 9966 9967 instruct vsrl2L_reg(vecX dst, vecX src, vecS shift) %{ 9968 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9969 match(Set dst (URShiftVL src shift)); 9970 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} 9971 ins_encode %{ 9972 int vector_len = 0; 9973 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9974 %} 9975 ins_pipe( pipe_slow ); 9976 %} 9977 9978 instruct vsrl2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ 9979 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9980 match(Set dst (URShiftVL src shift)); 9981 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} 9982 ins_encode %{ 9983 int vector_len = 0; 9984 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9985 %} 9986 ins_pipe( pipe_slow ); 9987 %} 9988 9989 instruct vsrl4L_reg(vecY dst, vecY src, vecS shift) %{ 9990 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 9991 match(Set dst (URShiftVL src shift)); 9992 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} 9993 ins_encode %{ 9994 int vector_len = 1; 9995 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9996 %} 9997 ins_pipe( pipe_slow ); 9998 %} 9999 10000 instruct vsrl4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ 10001 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 10002 match(Set dst (URShiftVL src shift)); 10003 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} 10004 ins_encode %{ 10005 int vector_len = 1; 10006 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10007 %} 10008 ins_pipe( pipe_slow ); 10009 %} 10010 10011 instruct vsrl8L_reg(vecZ dst, vecZ src, vecS shift) %{ 10012 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 10013 match(Set dst (URShiftVL src shift)); 10014 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed8L" %} 10015 ins_encode %{ 10016 int vector_len = 2; 10017 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10018 %} 10019 ins_pipe( pipe_slow ); 10020 %} 10021 10022 instruct vsrl8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 10023 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 10024 match(Set dst (URShiftVL src shift)); 10025 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed8L" %} 10026 ins_encode %{ 10027 int vector_len = 2; 10028 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10029 %} 10030 ins_pipe( pipe_slow ); 10031 %} 10032 10033 // ------------------- ArithmeticRightShift ----------------------------------- 10034 10035 // Shorts/Chars vector arithmetic right shift 10036 instruct vsra2S(vecS dst, vecS shift) %{ 10037 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 10038 match(Set dst (RShiftVS dst shift)); 10039 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} 10040 ins_encode %{ 10041 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 10042 %} 10043 ins_pipe( pipe_slow ); 10044 %} 10045 10046 instruct vsra2S_imm(vecS dst, immI8 shift) %{ 10047 predicate(n->as_Vector()->length() == 2); 10048 match(Set dst (RShiftVS dst shift)); 10049 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} 10050 ins_encode %{ 10051 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 10052 %} 10053 ins_pipe( pipe_slow ); 10054 %} 10055 10056 instruct vsra2S_reg_avx(vecS dst, vecS src, vecS shift) %{ 10057 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 10058 match(Set dst (RShiftVS src shift)); 10059 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 10060 ins_encode %{ 10061 int vector_len = 0; 10062 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10063 %} 10064 ins_pipe( pipe_slow ); 10065 %} 10066 10067 instruct vsra2S_reg_evex(vecS dst, vecS src, vecS shift) %{ 10068 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 10069 match(Set dst (RShiftVS src shift)); 10070 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 10071 ins_encode %{ 10072 int vector_len = 0; 10073 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10074 %} 10075 ins_pipe( pipe_slow ); 10076 %} 10077 10078 instruct vsra2S_reg_evex_special(vecS dst, vecS src, vecS shift) %{ 10079 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 10080 match(Set dst (RShiftVS dst shift)); 10081 effect(TEMP src); 10082 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 10083 ins_encode %{ 10084 int vector_len = 0; 10085 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10086 %} 10087 ins_pipe( pipe_slow ); 10088 %} 10089 10090 instruct vsra2S_reg_imm_avx(vecS dst, vecS src, immI8 shift) %{ 10091 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 10092 match(Set dst (RShiftVS src shift)); 10093 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 10094 ins_encode %{ 10095 int vector_len = 0; 10096 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10097 %} 10098 ins_pipe( pipe_slow ); 10099 %} 10100 10101 instruct vsra2S_reg_imm_evex(vecS dst, vecS src, immI8 shift) %{ 10102 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 10103 match(Set dst (RShiftVS src shift)); 10104 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 10105 ins_encode %{ 10106 int vector_len = 0; 10107 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10108 %} 10109 ins_pipe( pipe_slow ); 10110 %} 10111 10112 instruct vsra2S_reg_imm_evex_special(vecS dst, vecS src, immI8 shift) %{ 10113 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 10114 match(Set dst (RShiftVS dst shift)); 10115 effect(TEMP src); 10116 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 10117 ins_encode %{ 10118 int vector_len = 0; 10119 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10120 %} 10121 ins_pipe( pipe_slow ); 10122 %} 10123 10124 instruct vsra4S(vecD dst, vecS shift) %{ 10125 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 10126 match(Set dst (RShiftVS dst shift)); 10127 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} 10128 ins_encode %{ 10129 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 10130 %} 10131 ins_pipe( pipe_slow ); 10132 %} 10133 10134 instruct vsra4S_imm(vecD dst, immI8 shift) %{ 10135 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 10136 match(Set dst (RShiftVS dst shift)); 10137 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} 10138 ins_encode %{ 10139 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 10140 %} 10141 ins_pipe( pipe_slow ); 10142 %} 10143 10144 instruct vsra4S_reg_avx(vecD dst, vecD src, vecS shift) %{ 10145 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 10146 match(Set dst (RShiftVS src shift)); 10147 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 10148 ins_encode %{ 10149 int vector_len = 0; 10150 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10151 %} 10152 ins_pipe( pipe_slow ); 10153 %} 10154 10155 instruct vsra4S_reg_evex(vecD dst, vecD src, vecS shift) %{ 10156 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 10157 match(Set dst (RShiftVS src shift)); 10158 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 10159 ins_encode %{ 10160 int vector_len = 0; 10161 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10162 %} 10163 ins_pipe( pipe_slow ); 10164 %} 10165 10166 instruct vsra4S_reg_evex_special(vecD dst, vecD src, vecS shift) %{ 10167 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 10168 match(Set dst (RShiftVS dst shift)); 10169 effect(TEMP src); 10170 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 10171 ins_encode %{ 10172 int vector_len = 0; 10173 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10174 %} 10175 ins_pipe( pipe_slow ); 10176 %} 10177 10178 instruct vsra4S_reg_imm_avx(vecD dst, vecD src, immI8 shift) %{ 10179 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 10180 match(Set dst (RShiftVS src shift)); 10181 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 10182 ins_encode %{ 10183 int vector_len = 0; 10184 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10185 %} 10186 ins_pipe( pipe_slow ); 10187 %} 10188 10189 instruct vsra4S_reg_imm_evex(vecD dst, vecD src, immI8 shift) %{ 10190 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 10191 match(Set dst (RShiftVS src shift)); 10192 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 10193 ins_encode %{ 10194 int vector_len = 0; 10195 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10196 %} 10197 ins_pipe( pipe_slow ); 10198 %} 10199 10200 instruct vsra4S_reg_imm_evex_special(vecD dst, vecD src, immI8 shift) %{ 10201 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 10202 match(Set dst (RShiftVS dst shift)); 10203 effect(TEMP src); 10204 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 10205 ins_encode %{ 10206 int vector_len = 0; 10207 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10208 %} 10209 ins_pipe( pipe_slow ); 10210 %} 10211 10212 instruct vsra8S(vecX dst, vecS shift) %{ 10213 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 10214 match(Set dst (RShiftVS dst shift)); 10215 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} 10216 ins_encode %{ 10217 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 10218 %} 10219 ins_pipe( pipe_slow ); 10220 %} 10221 10222 instruct vsra8S_imm(vecX dst, immI8 shift) %{ 10223 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 10224 match(Set dst (RShiftVS dst shift)); 10225 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} 10226 ins_encode %{ 10227 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 10228 %} 10229 ins_pipe( pipe_slow ); 10230 %} 10231 10232 instruct vsra8S_reg_avx(vecX dst, vecX src, vecS shift) %{ 10233 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 10234 match(Set dst (RShiftVS src shift)); 10235 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 10236 ins_encode %{ 10237 int vector_len = 0; 10238 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10239 %} 10240 ins_pipe( pipe_slow ); 10241 %} 10242 10243 instruct vsra8S_reg_evex(vecX dst, vecX src, vecS shift) %{ 10244 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 10245 match(Set dst (RShiftVS src shift)); 10246 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 10247 ins_encode %{ 10248 int vector_len = 0; 10249 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10250 %} 10251 ins_pipe( pipe_slow ); 10252 %} 10253 10254 instruct vsra8S_reg_evex_special(vecX dst, vecX src, vecS shift) %{ 10255 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 10256 match(Set dst (RShiftVS dst shift)); 10257 effect(TEMP src); 10258 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 10259 ins_encode %{ 10260 int vector_len = 0; 10261 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10262 %} 10263 ins_pipe( pipe_slow ); 10264 %} 10265 10266 instruct vsra8S_reg_imm_avx(vecX dst, vecX src, immI8 shift) %{ 10267 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 10268 match(Set dst (RShiftVS src shift)); 10269 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 10270 ins_encode %{ 10271 int vector_len = 0; 10272 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10273 %} 10274 ins_pipe( pipe_slow ); 10275 %} 10276 10277 instruct vsra8S_reg_imm_evex(vecX dst, vecX src, immI8 shift) %{ 10278 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 10279 match(Set dst (RShiftVS src shift)); 10280 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 10281 ins_encode %{ 10282 int vector_len = 0; 10283 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10284 %} 10285 ins_pipe( pipe_slow ); 10286 %} 10287 10288 instruct vsra8S_reg_imm_evex_special(vecX dst, vecX src, immI8 shift) %{ 10289 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 10290 match(Set dst (RShiftVS dst shift)); 10291 effect(TEMP src); 10292 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 10293 ins_encode %{ 10294 int vector_len = 0; 10295 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10296 %} 10297 ins_pipe( pipe_slow ); 10298 %} 10299 10300 instruct vsra16S_reg_avx(vecY dst, vecY src, vecS shift) %{ 10301 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 10302 match(Set dst (RShiftVS src shift)); 10303 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 10304 ins_encode %{ 10305 int vector_len = 1; 10306 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10307 %} 10308 ins_pipe( pipe_slow ); 10309 %} 10310 10311 instruct vsra16S_reg_evex(vecY dst, vecY src, vecS shift) %{ 10312 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 10313 match(Set dst (RShiftVS src shift)); 10314 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 10315 ins_encode %{ 10316 int vector_len = 1; 10317 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10318 %} 10319 ins_pipe( pipe_slow ); 10320 %} 10321 10322 instruct vsra16S_reg_evex_special(vecY dst, vecY src, vecS shift) %{ 10323 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 10324 match(Set dst (RShiftVS dst shift)); 10325 effect(TEMP src); 10326 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 10327 ins_encode %{ 10328 int vector_len = 1; 10329 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10330 %} 10331 ins_pipe( pipe_slow ); 10332 %} 10333 10334 instruct vsra16S_reg_imm_avx(vecY dst, vecY src, immI8 shift) %{ 10335 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); 10336 match(Set dst (RShiftVS src shift)); 10337 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 10338 ins_encode %{ 10339 int vector_len = 1; 10340 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10341 %} 10342 ins_pipe( pipe_slow ); 10343 %} 10344 10345 instruct vsra16S_reg_imm_evex(vecY dst, vecY src, immI8 shift) %{ 10346 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 10347 match(Set dst (RShiftVS src shift)); 10348 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 10349 ins_encode %{ 10350 int vector_len = 1; 10351 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10352 %} 10353 ins_pipe( pipe_slow ); 10354 %} 10355 10356 instruct vsra16S_reg_imm_evex_special(vecY dst, vecY src, immI8 shift) %{ 10357 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 10358 match(Set dst (RShiftVS dst shift)); 10359 effect(TEMP src); 10360 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 10361 ins_encode %{ 10362 int vector_len = 1; 10363 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10364 %} 10365 ins_pipe( pipe_slow ); 10366 %} 10367 10368 instruct vsra32S_reg(vecZ dst, vecZ src, vecS shift) %{ 10369 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 10370 match(Set dst (RShiftVS src shift)); 10371 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %} 10372 ins_encode %{ 10373 int vector_len = 2; 10374 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10375 %} 10376 ins_pipe( pipe_slow ); 10377 %} 10378 10379 instruct vsra32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 10380 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 10381 match(Set dst (RShiftVS src shift)); 10382 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %} 10383 ins_encode %{ 10384 int vector_len = 2; 10385 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10386 %} 10387 ins_pipe( pipe_slow ); 10388 %} 10389 10390 // Integers vector arithmetic right shift 10391 instruct vsra2I(vecD dst, vecS shift) %{ 10392 predicate(n->as_Vector()->length() == 2); 10393 match(Set dst (RShiftVI dst shift)); 10394 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} 10395 ins_encode %{ 10396 __ psrad($dst$$XMMRegister, $shift$$XMMRegister); 10397 %} 10398 ins_pipe( pipe_slow ); 10399 %} 10400 10401 instruct vsra2I_imm(vecD dst, immI8 shift) %{ 10402 predicate(n->as_Vector()->length() == 2); 10403 match(Set dst (RShiftVI dst shift)); 10404 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} 10405 ins_encode %{ 10406 __ psrad($dst$$XMMRegister, (int)$shift$$constant); 10407 %} 10408 ins_pipe( pipe_slow ); 10409 %} 10410 10411 instruct vsra2I_reg(vecD dst, vecD src, vecS shift) %{ 10412 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 10413 match(Set dst (RShiftVI src shift)); 10414 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} 10415 ins_encode %{ 10416 int vector_len = 0; 10417 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10418 %} 10419 ins_pipe( pipe_slow ); 10420 %} 10421 10422 instruct vsra2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 10423 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 10424 match(Set dst (RShiftVI src shift)); 10425 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} 10426 ins_encode %{ 10427 int vector_len = 0; 10428 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10429 %} 10430 ins_pipe( pipe_slow ); 10431 %} 10432 10433 instruct vsra4I(vecX dst, vecS shift) %{ 10434 predicate(n->as_Vector()->length() == 4); 10435 match(Set dst (RShiftVI dst shift)); 10436 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} 10437 ins_encode %{ 10438 __ psrad($dst$$XMMRegister, $shift$$XMMRegister); 10439 %} 10440 ins_pipe( pipe_slow ); 10441 %} 10442 10443 instruct vsra4I_imm(vecX dst, immI8 shift) %{ 10444 predicate(n->as_Vector()->length() == 4); 10445 match(Set dst (RShiftVI dst shift)); 10446 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} 10447 ins_encode %{ 10448 __ psrad($dst$$XMMRegister, (int)$shift$$constant); 10449 %} 10450 ins_pipe( pipe_slow ); 10451 %} 10452 10453 instruct vsra4I_reg(vecX dst, vecX src, vecS shift) %{ 10454 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 10455 match(Set dst (RShiftVI src shift)); 10456 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} 10457 ins_encode %{ 10458 int vector_len = 0; 10459 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10460 %} 10461 ins_pipe( pipe_slow ); 10462 %} 10463 10464 instruct vsra4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 10465 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 10466 match(Set dst (RShiftVI src shift)); 10467 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} 10468 ins_encode %{ 10469 int vector_len = 0; 10470 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10471 %} 10472 ins_pipe( pipe_slow ); 10473 %} 10474 10475 instruct vsra8I_reg(vecY dst, vecY src, vecS shift) %{ 10476 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 10477 match(Set dst (RShiftVI src shift)); 10478 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} 10479 ins_encode %{ 10480 int vector_len = 1; 10481 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10482 %} 10483 ins_pipe( pipe_slow ); 10484 %} 10485 10486 instruct vsra8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 10487 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 10488 match(Set dst (RShiftVI src shift)); 10489 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} 10490 ins_encode %{ 10491 int vector_len = 1; 10492 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10493 %} 10494 ins_pipe( pipe_slow ); 10495 %} 10496 10497 instruct vsra16I_reg(vecZ dst, vecZ src, vecS shift) %{ 10498 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 10499 match(Set dst (RShiftVI src shift)); 10500 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed16I" %} 10501 ins_encode %{ 10502 int vector_len = 2; 10503 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10504 %} 10505 ins_pipe( pipe_slow ); 10506 %} 10507 10508 instruct vsra16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 10509 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 10510 match(Set dst (RShiftVI src shift)); 10511 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed16I" %} 10512 ins_encode %{ 10513 int vector_len = 2; 10514 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10515 %} 10516 ins_pipe( pipe_slow ); 10517 %} 10518 10519 // There are no longs vector arithmetic right shift instructions. 10520 10521 10522 // --------------------------------- AND -------------------------------------- 10523 10524 instruct vand4B(vecS dst, vecS src) %{ 10525 predicate(n->as_Vector()->length_in_bytes() == 4); 10526 match(Set dst (AndV dst src)); 10527 format %{ "pand $dst,$src\t! and vectors (4 bytes)" %} 10528 ins_encode %{ 10529 __ pand($dst$$XMMRegister, $src$$XMMRegister); 10530 %} 10531 ins_pipe( pipe_slow ); 10532 %} 10533 10534 instruct vand4B_reg(vecS dst, vecS src1, vecS src2) %{ 10535 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 10536 match(Set dst (AndV src1 src2)); 10537 format %{ "vpand $dst,$src1,$src2\t! and vectors (4 bytes)" %} 10538 ins_encode %{ 10539 int vector_len = 0; 10540 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10541 %} 10542 ins_pipe( pipe_slow ); 10543 %} 10544 10545 instruct vand4B_mem(vecS dst, vecS src, memory mem) %{ 10546 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 10547 match(Set dst (AndV src (LoadVector mem))); 10548 format %{ "vpand $dst,$src,$mem\t! and vectors (4 bytes)" %} 10549 ins_encode %{ 10550 int vector_len = 0; 10551 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10552 %} 10553 ins_pipe( pipe_slow ); 10554 %} 10555 10556 instruct vand8B(vecD dst, vecD src) %{ 10557 predicate(n->as_Vector()->length_in_bytes() == 8); 10558 match(Set dst (AndV dst src)); 10559 format %{ "pand $dst,$src\t! and vectors (8 bytes)" %} 10560 ins_encode %{ 10561 __ pand($dst$$XMMRegister, $src$$XMMRegister); 10562 %} 10563 ins_pipe( pipe_slow ); 10564 %} 10565 10566 instruct vand8B_reg(vecD dst, vecD src1, vecD src2) %{ 10567 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 10568 match(Set dst (AndV src1 src2)); 10569 format %{ "vpand $dst,$src1,$src2\t! and vectors (8 bytes)" %} 10570 ins_encode %{ 10571 int vector_len = 0; 10572 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10573 %} 10574 ins_pipe( pipe_slow ); 10575 %} 10576 10577 instruct vand8B_mem(vecD dst, vecD src, memory mem) %{ 10578 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 10579 match(Set dst (AndV src (LoadVector mem))); 10580 format %{ "vpand $dst,$src,$mem\t! and vectors (8 bytes)" %} 10581 ins_encode %{ 10582 int vector_len = 0; 10583 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10584 %} 10585 ins_pipe( pipe_slow ); 10586 %} 10587 10588 instruct vand16B(vecX dst, vecX src) %{ 10589 predicate(n->as_Vector()->length_in_bytes() == 16); 10590 match(Set dst (AndV dst src)); 10591 format %{ "pand $dst,$src\t! and vectors (16 bytes)" %} 10592 ins_encode %{ 10593 __ pand($dst$$XMMRegister, $src$$XMMRegister); 10594 %} 10595 ins_pipe( pipe_slow ); 10596 %} 10597 10598 instruct vand16B_reg(vecX dst, vecX src1, vecX src2) %{ 10599 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 10600 match(Set dst (AndV src1 src2)); 10601 format %{ "vpand $dst,$src1,$src2\t! and vectors (16 bytes)" %} 10602 ins_encode %{ 10603 int vector_len = 0; 10604 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10605 %} 10606 ins_pipe( pipe_slow ); 10607 %} 10608 10609 instruct vand16B_mem(vecX dst, vecX src, memory mem) %{ 10610 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 10611 match(Set dst (AndV src (LoadVector mem))); 10612 format %{ "vpand $dst,$src,$mem\t! and vectors (16 bytes)" %} 10613 ins_encode %{ 10614 int vector_len = 0; 10615 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10616 %} 10617 ins_pipe( pipe_slow ); 10618 %} 10619 10620 instruct vand32B_reg(vecY dst, vecY src1, vecY src2) %{ 10621 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 10622 match(Set dst (AndV src1 src2)); 10623 format %{ "vpand $dst,$src1,$src2\t! and vectors (32 bytes)" %} 10624 ins_encode %{ 10625 int vector_len = 1; 10626 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10627 %} 10628 ins_pipe( pipe_slow ); 10629 %} 10630 10631 instruct vand32B_mem(vecY dst, vecY src, memory mem) %{ 10632 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 10633 match(Set dst (AndV src (LoadVector mem))); 10634 format %{ "vpand $dst,$src,$mem\t! and vectors (32 bytes)" %} 10635 ins_encode %{ 10636 int vector_len = 1; 10637 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10638 %} 10639 ins_pipe( pipe_slow ); 10640 %} 10641 10642 instruct vand64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 10643 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 10644 match(Set dst (AndV src1 src2)); 10645 format %{ "vpand $dst,$src1,$src2\t! and vectors (64 bytes)" %} 10646 ins_encode %{ 10647 int vector_len = 2; 10648 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10649 %} 10650 ins_pipe( pipe_slow ); 10651 %} 10652 10653 instruct vand64B_mem(vecZ dst, vecZ src, memory mem) %{ 10654 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 10655 match(Set dst (AndV src (LoadVector mem))); 10656 format %{ "vpand $dst,$src,$mem\t! and vectors (64 bytes)" %} 10657 ins_encode %{ 10658 int vector_len = 2; 10659 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10660 %} 10661 ins_pipe( pipe_slow ); 10662 %} 10663 10664 // --------------------------------- OR --------------------------------------- 10665 10666 instruct vor4B(vecS dst, vecS src) %{ 10667 predicate(n->as_Vector()->length_in_bytes() == 4); 10668 match(Set dst (OrV dst src)); 10669 format %{ "por $dst,$src\t! or vectors (4 bytes)" %} 10670 ins_encode %{ 10671 __ por($dst$$XMMRegister, $src$$XMMRegister); 10672 %} 10673 ins_pipe( pipe_slow ); 10674 %} 10675 10676 instruct vor4B_reg(vecS dst, vecS src1, vecS src2) %{ 10677 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 10678 match(Set dst (OrV src1 src2)); 10679 format %{ "vpor $dst,$src1,$src2\t! or vectors (4 bytes)" %} 10680 ins_encode %{ 10681 int vector_len = 0; 10682 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10683 %} 10684 ins_pipe( pipe_slow ); 10685 %} 10686 10687 instruct vor4B_mem(vecS dst, vecS src, memory mem) %{ 10688 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 10689 match(Set dst (OrV src (LoadVector mem))); 10690 format %{ "vpor $dst,$src,$mem\t! or vectors (4 bytes)" %} 10691 ins_encode %{ 10692 int vector_len = 0; 10693 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10694 %} 10695 ins_pipe( pipe_slow ); 10696 %} 10697 10698 instruct vor8B(vecD dst, vecD src) %{ 10699 predicate(n->as_Vector()->length_in_bytes() == 8); 10700 match(Set dst (OrV dst src)); 10701 format %{ "por $dst,$src\t! or vectors (8 bytes)" %} 10702 ins_encode %{ 10703 __ por($dst$$XMMRegister, $src$$XMMRegister); 10704 %} 10705 ins_pipe( pipe_slow ); 10706 %} 10707 10708 instruct vor8B_reg(vecD dst, vecD src1, vecD src2) %{ 10709 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 10710 match(Set dst (OrV src1 src2)); 10711 format %{ "vpor $dst,$src1,$src2\t! or vectors (8 bytes)" %} 10712 ins_encode %{ 10713 int vector_len = 0; 10714 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10715 %} 10716 ins_pipe( pipe_slow ); 10717 %} 10718 10719 instruct vor8B_mem(vecD dst, vecD src, memory mem) %{ 10720 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 10721 match(Set dst (OrV src (LoadVector mem))); 10722 format %{ "vpor $dst,$src,$mem\t! or vectors (8 bytes)" %} 10723 ins_encode %{ 10724 int vector_len = 0; 10725 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10726 %} 10727 ins_pipe( pipe_slow ); 10728 %} 10729 10730 instruct vor16B(vecX dst, vecX src) %{ 10731 predicate(n->as_Vector()->length_in_bytes() == 16); 10732 match(Set dst (OrV dst src)); 10733 format %{ "por $dst,$src\t! or vectors (16 bytes)" %} 10734 ins_encode %{ 10735 __ por($dst$$XMMRegister, $src$$XMMRegister); 10736 %} 10737 ins_pipe( pipe_slow ); 10738 %} 10739 10740 instruct vor16B_reg(vecX dst, vecX src1, vecX src2) %{ 10741 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 10742 match(Set dst (OrV src1 src2)); 10743 format %{ "vpor $dst,$src1,$src2\t! or vectors (16 bytes)" %} 10744 ins_encode %{ 10745 int vector_len = 0; 10746 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10747 %} 10748 ins_pipe( pipe_slow ); 10749 %} 10750 10751 instruct vor16B_mem(vecX dst, vecX src, memory mem) %{ 10752 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 10753 match(Set dst (OrV src (LoadVector mem))); 10754 format %{ "vpor $dst,$src,$mem\t! or vectors (16 bytes)" %} 10755 ins_encode %{ 10756 int vector_len = 0; 10757 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10758 %} 10759 ins_pipe( pipe_slow ); 10760 %} 10761 10762 instruct vor32B_reg(vecY dst, vecY src1, vecY src2) %{ 10763 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 10764 match(Set dst (OrV src1 src2)); 10765 format %{ "vpor $dst,$src1,$src2\t! or vectors (32 bytes)" %} 10766 ins_encode %{ 10767 int vector_len = 1; 10768 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10769 %} 10770 ins_pipe( pipe_slow ); 10771 %} 10772 10773 instruct vor32B_mem(vecY dst, vecY src, memory mem) %{ 10774 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 10775 match(Set dst (OrV src (LoadVector mem))); 10776 format %{ "vpor $dst,$src,$mem\t! or vectors (32 bytes)" %} 10777 ins_encode %{ 10778 int vector_len = 1; 10779 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10780 %} 10781 ins_pipe( pipe_slow ); 10782 %} 10783 10784 instruct vor64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 10785 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 10786 match(Set dst (OrV src1 src2)); 10787 format %{ "vpor $dst,$src1,$src2\t! or vectors (64 bytes)" %} 10788 ins_encode %{ 10789 int vector_len = 2; 10790 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10791 %} 10792 ins_pipe( pipe_slow ); 10793 %} 10794 10795 instruct vor64B_mem(vecZ dst, vecZ src, memory mem) %{ 10796 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 10797 match(Set dst (OrV src (LoadVector mem))); 10798 format %{ "vpor $dst,$src,$mem\t! or vectors (64 bytes)" %} 10799 ins_encode %{ 10800 int vector_len = 2; 10801 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10802 %} 10803 ins_pipe( pipe_slow ); 10804 %} 10805 10806 // --------------------------------- XOR -------------------------------------- 10807 10808 instruct vxor4B(vecS dst, vecS src) %{ 10809 predicate(n->as_Vector()->length_in_bytes() == 4); 10810 match(Set dst (XorV dst src)); 10811 format %{ "pxor $dst,$src\t! xor vectors (4 bytes)" %} 10812 ins_encode %{ 10813 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 10814 %} 10815 ins_pipe( pipe_slow ); 10816 %} 10817 10818 instruct vxor4B_reg(vecS dst, vecS src1, vecS src2) %{ 10819 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 10820 match(Set dst (XorV src1 src2)); 10821 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (4 bytes)" %} 10822 ins_encode %{ 10823 int vector_len = 0; 10824 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10825 %} 10826 ins_pipe( pipe_slow ); 10827 %} 10828 10829 instruct vxor4B_mem(vecS dst, vecS src, memory mem) %{ 10830 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 10831 match(Set dst (XorV src (LoadVector mem))); 10832 format %{ "vpxor $dst,$src,$mem\t! xor vectors (4 bytes)" %} 10833 ins_encode %{ 10834 int vector_len = 0; 10835 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10836 %} 10837 ins_pipe( pipe_slow ); 10838 %} 10839 10840 instruct vxor8B(vecD dst, vecD src) %{ 10841 predicate(n->as_Vector()->length_in_bytes() == 8); 10842 match(Set dst (XorV dst src)); 10843 format %{ "pxor $dst,$src\t! xor vectors (8 bytes)" %} 10844 ins_encode %{ 10845 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 10846 %} 10847 ins_pipe( pipe_slow ); 10848 %} 10849 10850 instruct vxor8B_reg(vecD dst, vecD src1, vecD src2) %{ 10851 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 10852 match(Set dst (XorV src1 src2)); 10853 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (8 bytes)" %} 10854 ins_encode %{ 10855 int vector_len = 0; 10856 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10857 %} 10858 ins_pipe( pipe_slow ); 10859 %} 10860 10861 instruct vxor8B_mem(vecD dst, vecD src, memory mem) %{ 10862 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 10863 match(Set dst (XorV src (LoadVector mem))); 10864 format %{ "vpxor $dst,$src,$mem\t! xor vectors (8 bytes)" %} 10865 ins_encode %{ 10866 int vector_len = 0; 10867 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10868 %} 10869 ins_pipe( pipe_slow ); 10870 %} 10871 10872 instruct vxor16B(vecX dst, vecX src) %{ 10873 predicate(n->as_Vector()->length_in_bytes() == 16); 10874 match(Set dst (XorV dst src)); 10875 format %{ "pxor $dst,$src\t! xor vectors (16 bytes)" %} 10876 ins_encode %{ 10877 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 10878 %} 10879 ins_pipe( pipe_slow ); 10880 %} 10881 10882 instruct vxor16B_reg(vecX dst, vecX src1, vecX src2) %{ 10883 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 10884 match(Set dst (XorV src1 src2)); 10885 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (16 bytes)" %} 10886 ins_encode %{ 10887 int vector_len = 0; 10888 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10889 %} 10890 ins_pipe( pipe_slow ); 10891 %} 10892 10893 instruct vxor16B_mem(vecX dst, vecX src, memory mem) %{ 10894 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 10895 match(Set dst (XorV src (LoadVector mem))); 10896 format %{ "vpxor $dst,$src,$mem\t! xor vectors (16 bytes)" %} 10897 ins_encode %{ 10898 int vector_len = 0; 10899 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10900 %} 10901 ins_pipe( pipe_slow ); 10902 %} 10903 10904 instruct vxor32B_reg(vecY dst, vecY src1, vecY src2) %{ 10905 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 10906 match(Set dst (XorV src1 src2)); 10907 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (32 bytes)" %} 10908 ins_encode %{ 10909 int vector_len = 1; 10910 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10911 %} 10912 ins_pipe( pipe_slow ); 10913 %} 10914 10915 instruct vxor32B_mem(vecY dst, vecY src, memory mem) %{ 10916 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 10917 match(Set dst (XorV src (LoadVector mem))); 10918 format %{ "vpxor $dst,$src,$mem\t! xor vectors (32 bytes)" %} 10919 ins_encode %{ 10920 int vector_len = 1; 10921 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10922 %} 10923 ins_pipe( pipe_slow ); 10924 %} 10925 10926 instruct vxor64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 10927 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 10928 match(Set dst (XorV src1 src2)); 10929 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (64 bytes)" %} 10930 ins_encode %{ 10931 int vector_len = 2; 10932 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10933 %} 10934 ins_pipe( pipe_slow ); 10935 %} 10936 10937 instruct vxor64B_mem(vecZ dst, vecZ src, memory mem) %{ 10938 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 10939 match(Set dst (XorV src (LoadVector mem))); 10940 format %{ "vpxor $dst,$src,$mem\t! xor vectors (64 bytes)" %} 10941 ins_encode %{ 10942 int vector_len = 2; 10943 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10944 %} 10945 ins_pipe( pipe_slow ); 10946 %} 10947