1 // 2 // Copyright (c) 2011, 2014, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Common Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // archtecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // XMM registers. 512-bit registers or 8 words each, labeled (a)-p. 63 // Word a in each register holds a Float, words ab hold a Double. 64 // The whole registers are used in SSE4.2 version intrinsics, 65 // array copy stubs and superword operations (see UseSSE42Intrinsics, 66 // UseXMMForArrayCopy and UseSuperword flags). 67 // For pre EVEX enabled architectures: 68 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX) 69 // For EVEX enabled architectures: 70 // XMM8-XMM31 must be encoded with REX (EVEX for UseAVX). 71 // 72 // Linux ABI: No register preserved across function calls 73 // XMM0-XMM7 might hold parameters 74 // Windows ABI: XMM6-XMM31 preserved across function calls 75 // XMM0-XMM3 might hold parameters 76 77 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()); 78 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1)); 79 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2)); 80 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3)); 81 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4)); 82 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5)); 83 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6)); 84 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7)); 85 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8)); 86 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9)); 87 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10)); 88 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11)); 89 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12)); 90 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13)); 91 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14)); 92 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15)); 93 94 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()); 95 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1)); 96 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2)); 97 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3)); 98 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4)); 99 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5)); 100 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6)); 101 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7)); 102 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8)); 103 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9)); 104 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10)); 105 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11)); 106 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12)); 107 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13)); 108 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14)); 109 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15)); 110 111 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()); 112 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1)); 113 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2)); 114 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3)); 115 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4)); 116 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5)); 117 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6)); 118 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7)); 119 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8)); 120 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9)); 121 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10)); 122 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11)); 123 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12)); 124 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13)); 125 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14)); 126 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15)); 127 128 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()); 129 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1)); 130 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2)); 131 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3)); 132 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4)); 133 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5)); 134 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6)); 135 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7)); 136 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8)); 137 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9)); 138 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10)); 139 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11)); 140 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12)); 141 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13)); 142 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14)); 143 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15)); 144 145 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()); 146 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1)); 147 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2)); 148 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3)); 149 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4)); 150 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5)); 151 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6)); 152 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7)); 153 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8)); 154 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9)); 155 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10)); 156 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11)); 157 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12)); 158 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13)); 159 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14)); 160 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15)); 161 162 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()); 163 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1)); 164 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2)); 165 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3)); 166 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4)); 167 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5)); 168 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6)); 169 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7)); 170 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8)); 171 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9)); 172 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10)); 173 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11)); 174 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12)); 175 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13)); 176 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14)); 177 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15)); 178 179 #ifdef _WIN64 180 181 reg_def XMM6 ( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()); 182 reg_def XMM6b( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(1)); 183 reg_def XMM6c( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(2)); 184 reg_def XMM6d( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(3)); 185 reg_def XMM6e( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(4)); 186 reg_def XMM6f( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(5)); 187 reg_def XMM6g( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(6)); 188 reg_def XMM6h( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(7)); 189 reg_def XMM6i( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(8)); 190 reg_def XMM6j( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(9)); 191 reg_def XMM6k( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(10)); 192 reg_def XMM6l( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(11)); 193 reg_def XMM6m( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(12)); 194 reg_def XMM6n( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(13)); 195 reg_def XMM6o( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(14)); 196 reg_def XMM6p( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(15)); 197 198 reg_def XMM7 ( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()); 199 reg_def XMM7b( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(1)); 200 reg_def XMM7c( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(2)); 201 reg_def XMM7d( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(3)); 202 reg_def XMM7e( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(4)); 203 reg_def XMM7f( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(5)); 204 reg_def XMM7g( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(6)); 205 reg_def XMM7h( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(7)); 206 reg_def XMM7i( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(8)); 207 reg_def XMM7j( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(9)); 208 reg_def XMM7k( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(10)); 209 reg_def XMM7l( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(11)); 210 reg_def XMM7m( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(12)); 211 reg_def XMM7n( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(13)); 212 reg_def XMM7o( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(14)); 213 reg_def XMM7p( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(15)); 214 215 reg_def XMM8 ( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()); 216 reg_def XMM8b( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(1)); 217 reg_def XMM8c( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(2)); 218 reg_def XMM8d( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(3)); 219 reg_def XMM8e( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(4)); 220 reg_def XMM8f( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(5)); 221 reg_def XMM8g( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(6)); 222 reg_def XMM8h( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(7)); 223 reg_def XMM8i( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(8)); 224 reg_def XMM8j( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(9)); 225 reg_def XMM8k( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(10)); 226 reg_def XMM8l( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(11)); 227 reg_def XMM8m( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(12)); 228 reg_def XMM8n( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(13)); 229 reg_def XMM8o( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(14)); 230 reg_def XMM8p( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(15)); 231 232 reg_def XMM9 ( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()); 233 reg_def XMM9b( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(1)); 234 reg_def XMM9c( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(2)); 235 reg_def XMM9d( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(3)); 236 reg_def XMM9e( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(4)); 237 reg_def XMM9f( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(5)); 238 reg_def XMM9g( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(6)); 239 reg_def XMM9h( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(7)); 240 reg_def XMM9i( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(8)); 241 reg_def XMM9j( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(9)); 242 reg_def XMM9k( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(10)); 243 reg_def XMM9l( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(11)); 244 reg_def XMM9m( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(12)); 245 reg_def XMM9n( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(13)); 246 reg_def XMM9o( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(14)); 247 reg_def XMM9p( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(15)); 248 249 reg_def XMM10 ( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()); 250 reg_def XMM10b( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(1)); 251 reg_def XMM10c( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(2)); 252 reg_def XMM10d( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(3)); 253 reg_def XMM10e( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(4)); 254 reg_def XMM10f( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(5)); 255 reg_def XMM10g( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(6)); 256 reg_def XMM10h( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(7)); 257 reg_def XMM10i( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(8)); 258 reg_def XMM10j( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(9)); 259 reg_def XMM10k( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(10)); 260 reg_def XMM10l( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(11)); 261 reg_def XMM10m( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(12)); 262 reg_def XMM10n( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(13)); 263 reg_def XMM10o( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(14)); 264 reg_def XMM10p( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(15)); 265 266 reg_def XMM11 ( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()); 267 reg_def XMM11b( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(1)); 268 reg_def XMM11c( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(2)); 269 reg_def XMM11d( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(3)); 270 reg_def XMM11e( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(4)); 271 reg_def XMM11f( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(5)); 272 reg_def XMM11g( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(6)); 273 reg_def XMM11h( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(7)); 274 reg_def XMM11i( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(8)); 275 reg_def XMM11j( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(9)); 276 reg_def XMM11k( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(10)); 277 reg_def XMM11l( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(11)); 278 reg_def XMM11m( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(12)); 279 reg_def XMM11n( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(13)); 280 reg_def XMM11o( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(14)); 281 reg_def XMM11p( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(15)); 282 283 reg_def XMM12 ( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()); 284 reg_def XMM12b( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(1)); 285 reg_def XMM12c( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(2)); 286 reg_def XMM12d( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(3)); 287 reg_def XMM12e( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(4)); 288 reg_def XMM12f( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(5)); 289 reg_def XMM12g( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(6)); 290 reg_def XMM12h( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(7)); 291 reg_def XMM12i( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(8)); 292 reg_def XMM12j( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(9)); 293 reg_def XMM12k( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(10)); 294 reg_def XMM12l( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(11)); 295 reg_def XMM12m( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(12)); 296 reg_def XMM12n( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(13)); 297 reg_def XMM12o( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(14)); 298 reg_def XMM12p( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(15)); 299 300 reg_def XMM13 ( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()); 301 reg_def XMM13b( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(1)); 302 reg_def XMM13c( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(2)); 303 reg_def XMM13d( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(3)); 304 reg_def XMM13e( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(4)); 305 reg_def XMM13f( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(5)); 306 reg_def XMM13g( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(6)); 307 reg_def XMM13h( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(7)); 308 reg_def XMM13i( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(8)); 309 reg_def XMM13j( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(9)); 310 reg_def XMM13k( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(10)); 311 reg_def XMM13l( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(11)); 312 reg_def XMM13m( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(12)); 313 reg_def XMM13n( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(13)); 314 reg_def XMM13o( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(14)); 315 reg_def XMM13p( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(15)); 316 317 reg_def XMM14 ( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()); 318 reg_def XMM14b( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(1)); 319 reg_def XMM14c( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(2)); 320 reg_def XMM14d( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(3)); 321 reg_def XMM14e( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(4)); 322 reg_def XMM14f( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(5)); 323 reg_def XMM14g( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(6)); 324 reg_def XMM14h( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(7)); 325 reg_def XMM14i( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(8)); 326 reg_def XMM14j( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(9)); 327 reg_def XMM14k( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(10)); 328 reg_def XMM14l( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(11)); 329 reg_def XMM14m( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(12)); 330 reg_def XMM14n( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(13)); 331 reg_def XMM14o( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(14)); 332 reg_def XMM14p( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(15)); 333 334 reg_def XMM15 ( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()); 335 reg_def XMM15b( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(1)); 336 reg_def XMM15c( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(2)); 337 reg_def XMM15d( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(3)); 338 reg_def XMM15e( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(4)); 339 reg_def XMM15f( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(5)); 340 reg_def XMM15g( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(6)); 341 reg_def XMM15h( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(7)); 342 reg_def XMM15i( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(8)); 343 reg_def XMM15j( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(9)); 344 reg_def XMM15k( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(10)); 345 reg_def XMM15l( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(11)); 346 reg_def XMM15m( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(12)); 347 reg_def XMM15n( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(13)); 348 reg_def XMM15o( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(14)); 349 reg_def XMM15p( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(15)); 350 351 reg_def XMM16 ( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()); 352 reg_def XMM16b( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(1)); 353 reg_def XMM16c( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(2)); 354 reg_def XMM16d( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(3)); 355 reg_def XMM16e( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(4)); 356 reg_def XMM16f( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(5)); 357 reg_def XMM16g( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(6)); 358 reg_def XMM16h( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(7)); 359 reg_def XMM16i( SOC, SOE, Op_RegF, 16, xmm15->as_VMReg()->next(8)); 360 reg_def XMM16j( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(9)); 361 reg_def XMM16k( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(10)); 362 reg_def XMM16l( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(11)); 363 reg_def XMM16m( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(12)); 364 reg_def XMM16n( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(13)); 365 reg_def XMM16o( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(14)); 366 reg_def XMM16p( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(15)); 367 368 reg_def XMM17 ( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()); 369 reg_def XMM17b( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(1)); 370 reg_def XMM17c( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(2)); 371 reg_def XMM17d( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(3)); 372 reg_def XMM17e( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(4)); 373 reg_def XMM17f( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(5)); 374 reg_def XMM17g( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(6)); 375 reg_def XMM17h( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(7)); 376 reg_def XMM17i( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(8)); 377 reg_def XMM17j( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(9)); 378 reg_def XMM17k( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(10)); 379 reg_def XMM17l( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(11)); 380 reg_def XMM17m( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(12)); 381 reg_def XMM17n( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(13)); 382 reg_def XMM17o( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(14)); 383 reg_def XMM17p( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(15)); 384 385 reg_def XMM18 ( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()); 386 reg_def XMM18b( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(1)); 387 reg_def XMM18c( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(2)); 388 reg_def XMM18d( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(3)); 389 reg_def XMM18e( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(4)); 390 reg_def XMM18f( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(5)); 391 reg_def XMM18g( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(6)); 392 reg_def XMM18h( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(7)); 393 reg_def XMM18i( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(8)); 394 reg_def XMM18j( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(9)); 395 reg_def XMM18k( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(10)); 396 reg_def XMM18l( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(11)); 397 reg_def XMM18m( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(12)); 398 reg_def XMM18n( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(13)); 399 reg_def XMM18o( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(14)); 400 reg_def XMM18p( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(15)); 401 402 reg_def XMM19 ( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()); 403 reg_def XMM19b( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(1)); 404 reg_def XMM19c( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(2)); 405 reg_def XMM19d( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(3)); 406 reg_def XMM19e( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(4)); 407 reg_def XMM19f( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(5)); 408 reg_def XMM19g( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(6)); 409 reg_def XMM19h( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(7)); 410 reg_def XMM19i( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(8)); 411 reg_def XMM19j( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(9)); 412 reg_def XMM19k( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(10)); 413 reg_def XMM19l( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(11)); 414 reg_def XMM19m( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(12)); 415 reg_def XMM19n( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(13)); 416 reg_def XMM19o( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(14)); 417 reg_def XMM19p( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(15)); 418 419 reg_def XMM20 ( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()); 420 reg_def XMM20b( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(1)); 421 reg_def XMM20c( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(2)); 422 reg_def XMM20d( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(3)); 423 reg_def XMM20e( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(4)); 424 reg_def XMM20f( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(5)); 425 reg_def XMM20g( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(6)); 426 reg_def XMM20h( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(7)); 427 reg_def XMM20i( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(8)); 428 reg_def XMM20j( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(9)); 429 reg_def XMM20k( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(10)); 430 reg_def XMM20l( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(11)); 431 reg_def XMM20m( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(12)); 432 reg_def XMM20n( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(13)); 433 reg_def XMM20o( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(14)); 434 reg_def XMM20p( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(15)); 435 436 reg_def XMM21 ( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()); 437 reg_def XMM21b( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(1)); 438 reg_def XMM21c( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(2)); 439 reg_def XMM21d( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(3)); 440 reg_def XMM21e( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(4)); 441 reg_def XMM21f( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(5)); 442 reg_def XMM21g( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(6)); 443 reg_def XMM21h( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(7)); 444 reg_def XMM21i( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(8)); 445 reg_def XMM21j( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(9)); 446 reg_def XMM21k( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(10)); 447 reg_def XMM21l( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(11)); 448 reg_def XMM21m( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(12)); 449 reg_def XMM21n( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(13)); 450 reg_def XMM21o( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(14)); 451 reg_def XMM21p( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(15)); 452 453 reg_def XMM22 ( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()); 454 reg_def XMM22b( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(1)); 455 reg_def XMM22c( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(2)); 456 reg_def XMM22d( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(3)); 457 reg_def XMM22e( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(4)); 458 reg_def XMM22f( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(5)); 459 reg_def XMM22g( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(6)); 460 reg_def XMM22h( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(7)); 461 reg_def XMM22i( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(8)); 462 reg_def XMM22j( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(9)); 463 reg_def XMM22k( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(10)); 464 reg_def XMM22l( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(11)); 465 reg_def XMM22m( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(12)); 466 reg_def XMM22n( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(13)); 467 reg_def XMM22o( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(14)); 468 reg_def XMM22p( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(15)); 469 470 reg_def XMM23 ( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()); 471 reg_def XMM23b( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(1)); 472 reg_def XMM23c( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(2)); 473 reg_def XMM23d( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(3)); 474 reg_def XMM23e( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(4)); 475 reg_def XMM23f( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(5)); 476 reg_def XMM23g( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(6)); 477 reg_def XMM23h( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(7)); 478 reg_def XMM23i( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(8)); 479 reg_def XMM23j( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(9)); 480 reg_def XMM23k( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(10)); 481 reg_def XMM23l( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(11)); 482 reg_def XMM23m( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(12)); 483 reg_def XMM23n( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(13)); 484 reg_def XMM23o( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(14)); 485 reg_def XMM23p( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(15)); 486 487 reg_def XMM24 ( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()); 488 reg_def XMM24b( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(1)); 489 reg_def XMM24c( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(2)); 490 reg_def XMM24d( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(3)); 491 reg_def XMM24e( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(4)); 492 reg_def XMM24f( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(5)); 493 reg_def XMM24g( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(6)); 494 reg_def XMM24h( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(7)); 495 reg_def XMM24i( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(8)); 496 reg_def XMM24j( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(9)); 497 reg_def XMM24k( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(10)); 498 reg_def XMM24l( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(11)); 499 reg_def XMM24m( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(12)); 500 reg_def XMM24n( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(13)); 501 reg_def XMM24o( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(14)); 502 reg_def XMM24p( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(15)); 503 504 reg_def XMM25 ( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()); 505 reg_def XMM25b( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(1)); 506 reg_def XMM25c( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(2)); 507 reg_def XMM25d( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(3)); 508 reg_def XMM25e( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(4)); 509 reg_def XMM25f( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(5)); 510 reg_def XMM25g( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(6)); 511 reg_def XMM25h( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(7)); 512 reg_def XMM25i( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(8)); 513 reg_def XMM25j( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(9)); 514 reg_def XMM25k( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(10)); 515 reg_def XMM25l( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(11)); 516 reg_def XMM25m( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(12)); 517 reg_def XMM25n( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(13)); 518 reg_def XMM25o( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(14)); 519 reg_def XMM25p( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(15)); 520 521 reg_def XMM26 ( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()); 522 reg_def XMM26b( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(1)); 523 reg_def XMM26c( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(2)); 524 reg_def XMM26d( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(3)); 525 reg_def XMM26e( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(4)); 526 reg_def XMM26f( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(5)); 527 reg_def XMM26g( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(6)); 528 reg_def XMM26h( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(7)); 529 reg_def XMM26i( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(8)); 530 reg_def XMM26j( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(9)); 531 reg_def XMM26k( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(10)); 532 reg_def XMM26l( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(11)); 533 reg_def XMM26m( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(12)); 534 reg_def XMM26n( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(13)); 535 reg_def XMM26o( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(14)); 536 reg_def XMM26p( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(15)); 537 538 reg_def XMM27g( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(1)); 539 reg_def XMM27c( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(2)); 540 reg_def XMM27d( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(3)); 541 reg_def XMM27e( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(4)); 542 reg_def XMM27f( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(5)); 543 reg_def XMM27g( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(6)); 544 reg_def XMM27h( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(7)); 545 reg_def XMM27i( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(8)); 546 reg_def XMM27j( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(9)); 547 reg_def XMM27k( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(10)); 548 reg_def XMM27l( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(11)); 549 reg_def XMM27m( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(12)); 550 reg_def XMM27n( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(13)); 551 reg_def XMM27o( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(14)); 552 reg_def XMM27p( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(15)); 553 554 reg_def XMM28 ( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()); 555 reg_def XMM28b( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(1)); 556 reg_def XMM28c( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(2)); 557 reg_def XMM28d( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(3)); 558 reg_def XMM28e( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(4)); 559 reg_def XMM28f( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(5)); 560 reg_def XMM28g( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(6)); 561 reg_def XMM28h( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(7)); 562 reg_def XMM28i( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(8)); 563 reg_def XMM28j( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(9)); 564 reg_def XMM28k( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(10)); 565 reg_def XMM28l( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(11)); 566 reg_def XMM28m( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(12)); 567 reg_def XMM28n( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(13)); 568 reg_def XMM28o( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(14)); 569 reg_def XMM28p( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(15)); 570 571 reg_def XMM29 ( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()); 572 reg_def XMM29b( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(1)); 573 reg_def XMM29c( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(2)); 574 reg_def XMM29d( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(3)); 575 reg_def XMM29e( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(4)); 576 reg_def XMM29f( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(5)); 577 reg_def XMM29g( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(6)); 578 reg_def XMM29h( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(7)); 579 reg_def XMM29i( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(8)); 580 reg_def XMM29j( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(9)); 581 reg_def XMM29k( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(10)); 582 reg_def XMM29l( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(11)); 583 reg_def XMM29m( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(12)); 584 reg_def XMM29n( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(13)); 585 reg_def XMM29o( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(14)); 586 reg_def XMM29p( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(15)); 587 588 reg_def XMM30 ( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()); 589 reg_def XMM30b( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(1)); 590 reg_def XMM30c( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(2)); 591 reg_def XMM30d( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(3)); 592 reg_def XMM30e( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(4)); 593 reg_def XMM30f( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(5)); 594 reg_def XMM30g( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(6)); 595 reg_def XMM30h( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(7)); 596 reg_def XMM30i( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(8)); 597 reg_def XMM30j( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(9)); 598 reg_def XMM30k( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(10)); 599 reg_def XMM30l( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(11)); 600 reg_def XMM30m( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(12)); 601 reg_def XMM30n( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(13)); 602 reg_def XMM30o( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(14)); 603 reg_def XMM30p( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(15)); 604 605 reg_def XMM31 ( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()); 606 reg_def XMM31b( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(1)); 607 reg_def XMM31c( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(2)); 608 reg_def XMM31d( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(3)); 609 reg_def XMM31e( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(4)); 610 reg_def XMM31f( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(5)); 611 reg_def XMM31g( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(6)); 612 reg_def XMM31h( SOC, SOE, Op_RegF, 31, xmm31>-as_VMReg()->next(7)); 613 reg_def XMM31i( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(8)); 614 reg_def XMM31j( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(9)); 615 reg_def XMM31k( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(10)); 616 reg_def XMM31l( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(11)); 617 reg_def XMM31m( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(12)); 618 reg_def XMM31n( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(13)); 619 reg_def XMM31o( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(14)); 620 reg_def XMM31p( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(15)); 621 622 #else // _WIN64 623 624 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()); 625 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1)); 626 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2)); 627 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3)); 628 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4)); 629 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5)); 630 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6)); 631 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7)); 632 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8)); 633 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9)); 634 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10)); 635 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11)); 636 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12)); 637 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13)); 638 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14)); 639 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15)); 640 641 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()); 642 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1)); 643 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2)); 644 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3)); 645 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4)); 646 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5)); 647 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6)); 648 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7)); 649 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8)); 650 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9)); 651 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10)); 652 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11)); 653 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12)); 654 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13)); 655 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14)); 656 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15)); 657 658 #ifdef _LP64 659 660 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()); 661 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1)); 662 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2)); 663 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3)); 664 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4)); 665 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5)); 666 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6)); 667 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7)); 668 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8)); 669 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9)); 670 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10)); 671 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11)); 672 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12)); 673 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13)); 674 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14)); 675 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15)); 676 677 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()); 678 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1)); 679 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2)); 680 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3)); 681 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4)); 682 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5)); 683 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6)); 684 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7)); 685 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8)); 686 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9)); 687 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10)); 688 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11)); 689 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12)); 690 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13)); 691 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14)); 692 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15)); 693 694 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()); 695 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1)); 696 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2)); 697 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3)); 698 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4)); 699 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5)); 700 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6)); 701 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7)); 702 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8)); 703 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9)); 704 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10)); 705 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11)); 706 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12)); 707 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13)); 708 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14)); 709 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15)); 710 711 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()); 712 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1)); 713 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2)); 714 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3)); 715 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4)); 716 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5)); 717 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6)); 718 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7)); 719 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8)); 720 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9)); 721 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10)); 722 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11)); 723 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12)); 724 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13)); 725 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14)); 726 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15)); 727 728 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()); 729 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1)); 730 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2)); 731 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3)); 732 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4)); 733 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5)); 734 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6)); 735 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7)); 736 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8)); 737 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9)); 738 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10)); 739 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11)); 740 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12)); 741 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13)); 742 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14)); 743 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15)); 744 745 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()); 746 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1)); 747 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2)); 748 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3)); 749 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4)); 750 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5)); 751 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6)); 752 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7)); 753 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8)); 754 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9)); 755 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10)); 756 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11)); 757 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12)); 758 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13)); 759 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14)); 760 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15)); 761 762 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()); 763 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1)); 764 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2)); 765 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3)); 766 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4)); 767 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5)); 768 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6)); 769 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7)); 770 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8)); 771 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9)); 772 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10)); 773 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11)); 774 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12)); 775 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13)); 776 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14)); 777 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15)); 778 779 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()); 780 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1)); 781 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2)); 782 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3)); 783 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4)); 784 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5)); 785 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6)); 786 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7)); 787 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8)); 788 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9)); 789 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10)); 790 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11)); 791 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12)); 792 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13)); 793 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14)); 794 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15)); 795 796 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()); 797 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1)); 798 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2)); 799 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3)); 800 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4)); 801 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5)); 802 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6)); 803 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7)); 804 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8)); 805 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9)); 806 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10)); 807 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11)); 808 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12)); 809 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13)); 810 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14)); 811 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15)); 812 813 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()); 814 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1)); 815 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2)); 816 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3)); 817 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4)); 818 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5)); 819 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6)); 820 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7)); 821 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8)); 822 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9)); 823 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10)); 824 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11)); 825 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12)); 826 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13)); 827 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14)); 828 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15)); 829 830 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()); 831 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1)); 832 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2)); 833 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3)); 834 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4)); 835 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5)); 836 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6)); 837 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7)); 838 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8)); 839 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9)); 840 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10)); 841 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11)); 842 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12)); 843 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13)); 844 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14)); 845 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15)); 846 847 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()); 848 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1)); 849 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2)); 850 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3)); 851 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4)); 852 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5)); 853 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6)); 854 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7)); 855 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8)); 856 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9)); 857 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10)); 858 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11)); 859 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12)); 860 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13)); 861 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14)); 862 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15)); 863 864 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()); 865 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1)); 866 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2)); 867 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3)); 868 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4)); 869 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5)); 870 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6)); 871 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7)); 872 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8)); 873 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9)); 874 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10)); 875 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11)); 876 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12)); 877 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13)); 878 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14)); 879 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15)); 880 881 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()); 882 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1)); 883 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2)); 884 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3)); 885 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4)); 886 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5)); 887 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6)); 888 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7)); 889 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8)); 890 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9)); 891 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10)); 892 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11)); 893 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12)); 894 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13)); 895 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14)); 896 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15)); 897 898 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()); 899 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1)); 900 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2)); 901 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3)); 902 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4)); 903 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5)); 904 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6)); 905 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7)); 906 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8)); 907 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9)); 908 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10)); 909 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11)); 910 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12)); 911 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13)); 912 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14)); 913 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15)); 914 915 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()); 916 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1)); 917 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2)); 918 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3)); 919 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4)); 920 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5)); 921 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6)); 922 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7)); 923 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8)); 924 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9)); 925 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10)); 926 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11)); 927 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12)); 928 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13)); 929 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14)); 930 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15)); 931 932 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()); 933 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1)); 934 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2)); 935 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3)); 936 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4)); 937 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5)); 938 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6)); 939 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7)); 940 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8)); 941 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9)); 942 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10)); 943 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11)); 944 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12)); 945 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13)); 946 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14)); 947 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15)); 948 949 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()); 950 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1)); 951 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2)); 952 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3)); 953 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4)); 954 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5)); 955 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6)); 956 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7)); 957 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8)); 958 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9)); 959 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10)); 960 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11)); 961 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12)); 962 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13)); 963 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14)); 964 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15)); 965 966 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()); 967 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1)); 968 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2)); 969 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3)); 970 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4)); 971 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5)); 972 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6)); 973 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7)); 974 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8)); 975 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9)); 976 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10)); 977 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11)); 978 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12)); 979 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13)); 980 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14)); 981 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15)); 982 983 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()); 984 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1)); 985 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2)); 986 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3)); 987 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4)); 988 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5)); 989 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6)); 990 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7)); 991 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8)); 992 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9)); 993 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10)); 994 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11)); 995 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12)); 996 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13)); 997 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14)); 998 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15)); 999 1000 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()); 1001 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1)); 1002 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2)); 1003 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3)); 1004 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4)); 1005 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5)); 1006 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6)); 1007 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7)); 1008 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8)); 1009 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9)); 1010 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10)); 1011 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11)); 1012 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12)); 1013 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13)); 1014 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14)); 1015 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15)); 1016 1017 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()); 1018 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1)); 1019 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2)); 1020 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3)); 1021 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4)); 1022 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5)); 1023 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6)); 1024 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7)); 1025 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8)); 1026 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9)); 1027 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10)); 1028 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11)); 1029 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12)); 1030 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13)); 1031 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14)); 1032 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15)); 1033 1034 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()); 1035 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1)); 1036 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2)); 1037 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3)); 1038 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4)); 1039 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5)); 1040 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6)); 1041 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7)); 1042 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8)); 1043 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9)); 1044 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10)); 1045 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11)); 1046 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12)); 1047 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13)); 1048 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14)); 1049 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15)); 1050 1051 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()); 1052 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1)); 1053 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2)); 1054 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3)); 1055 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4)); 1056 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5)); 1057 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6)); 1058 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7)); 1059 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8)); 1060 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9)); 1061 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10)); 1062 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11)); 1063 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12)); 1064 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13)); 1065 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14)); 1066 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15)); 1067 1068 #endif // _LP64 1069 1070 #endif // _WIN64 1071 1072 #ifdef _LP64 1073 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad()); 1074 #else 1075 reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad()); 1076 #endif // _LP64 1077 1078 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1079 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1080 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1081 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1082 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1083 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1084 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1085 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1086 #ifdef _LP64 1087 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1088 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1089 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1090 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1091 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1092 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1093 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1094 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1095 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 1096 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 1097 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 1098 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 1099 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 1100 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 1101 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 1102 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 1103 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 1104 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 1105 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 1106 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 1107 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 1108 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 1109 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 1110 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 1111 #endif 1112 ); 1113 1114 // flags allocation class should be last. 1115 alloc_class chunk2(RFLAGS); 1116 1117 // Singleton class for condition codes 1118 reg_class int_flags(RFLAGS); 1119 1120 // Class for pre evex float registers 1121 reg_class float_reg_legacy(XMM0, 1122 XMM1, 1123 XMM2, 1124 XMM3, 1125 XMM4, 1126 XMM5, 1127 XMM6, 1128 XMM7 1129 #ifdef _LP64 1130 ,XMM8, 1131 XMM9, 1132 XMM10, 1133 XMM11, 1134 XMM12, 1135 XMM13, 1136 XMM14, 1137 XMM15 1138 #endif 1139 ); 1140 1141 // Class for evex float registers 1142 reg_class float_reg_evex(XMM0, 1143 XMM1, 1144 XMM2, 1145 XMM3, 1146 XMM4, 1147 XMM5, 1148 XMM6, 1149 XMM7 1150 #ifdef _LP64 1151 ,XMM8, 1152 XMM9, 1153 XMM10, 1154 XMM11, 1155 XMM12, 1156 XMM13, 1157 XMM14, 1158 XMM15, 1159 XMM16, 1160 XMM17, 1161 XMM18, 1162 XMM19, 1163 XMM20, 1164 XMM21, 1165 XMM22, 1166 XMM23, 1167 XMM24, 1168 XMM25, 1169 XMM26, 1170 XMM27, 1171 XMM28, 1172 XMM29, 1173 XMM30, 1174 XMM31 1175 #endif 1176 ); 1177 1178 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} ); 1179 1180 // Class for pre evex double registers 1181 reg_class double_reg_legacy(XMM0, XMM0b, 1182 XMM1, XMM1b, 1183 XMM2, XMM2b, 1184 XMM3, XMM3b, 1185 XMM4, XMM4b, 1186 XMM5, XMM5b, 1187 XMM6, XMM6b, 1188 XMM7, XMM7b 1189 #ifdef _LP64 1190 ,XMM8, XMM8b, 1191 XMM9, XMM9b, 1192 XMM10, XMM10b, 1193 XMM11, XMM11b, 1194 XMM12, XMM12b, 1195 XMM13, XMM13b, 1196 XMM14, XMM14b, 1197 XMM15, XMM15b 1198 #endif 1199 ); 1200 1201 // Class for evex double registers 1202 reg_class double_reg_evex(XMM0, XMM0b, 1203 XMM1, XMM1b, 1204 XMM2, XMM2b, 1205 XMM3, XMM3b, 1206 XMM4, XMM4b, 1207 XMM5, XMM5b, 1208 XMM6, XMM6b, 1209 XMM7, XMM7b 1210 #ifdef _LP64 1211 ,XMM8, XMM8b, 1212 XMM9, XMM9b, 1213 XMM10, XMM10b, 1214 XMM11, XMM11b, 1215 XMM12, XMM12b, 1216 XMM13, XMM13b, 1217 XMM14, XMM14b, 1218 XMM15, XMM15b, 1219 XMM16, XMM16b, 1220 XMM17, XMM17b, 1221 XMM18, XMM18b, 1222 XMM19, XMM19b, 1223 XMM20, XMM20b, 1224 XMM21, XMM21b, 1225 XMM22, XMM22b, 1226 XMM23, XMM23b, 1227 XMM24, XMM24b, 1228 XMM25, XMM25b, 1229 XMM26, XMM26b, 1230 XMM27, XMM27b, 1231 XMM28, XMM28b, 1232 XMM29, XMM29b, 1233 XMM30, XMM30b, 1234 XMM31, XMM31b 1235 #endif 1236 ); 1237 1238 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} ); 1239 1240 // Class for pre evex 32bit vector registers 1241 reg_class vectors_reg_legacy(XMM0, 1242 XMM1, 1243 XMM2, 1244 XMM3, 1245 XMM4, 1246 XMM5, 1247 XMM6, 1248 XMM7 1249 #ifdef _LP64 1250 ,XMM8, 1251 XMM9, 1252 XMM10, 1253 XMM11, 1254 XMM12, 1255 XMM13, 1256 XMM14, 1257 XMM15 1258 #endif 1259 ); 1260 1261 // Class for evex 32bit vector registers 1262 reg_class vectors_reg_evex(XMM0, 1263 XMM1, 1264 XMM2, 1265 XMM3, 1266 XMM4, 1267 XMM5, 1268 XMM6, 1269 XMM7 1270 #ifdef _LP64 1271 ,XMM8, 1272 XMM9, 1273 XMM10, 1274 XMM11, 1275 XMM12, 1276 XMM13, 1277 XMM14, 1278 XMM15, 1279 XMM16, 1280 XMM17, 1281 XMM18, 1282 XMM19, 1283 XMM20, 1284 XMM21, 1285 XMM22, 1286 XMM23, 1287 XMM24, 1288 XMM25, 1289 XMM26, 1290 XMM27, 1291 XMM28, 1292 XMM29, 1293 XMM30, 1294 XMM31 1295 #endif 1296 ); 1297 1298 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} ); 1299 1300 // Class for all 64bit vector registers 1301 reg_class vectord_reg_legacy(XMM0, XMM0b, 1302 XMM1, XMM1b, 1303 XMM2, XMM2b, 1304 XMM3, XMM3b, 1305 XMM4, XMM4b, 1306 XMM5, XMM5b, 1307 XMM6, XMM6b, 1308 XMM7, XMM7b 1309 #ifdef _LP64 1310 ,XMM8, XMM8b, 1311 XMM9, XMM9b, 1312 XMM10, XMM10b, 1313 XMM11, XMM11b, 1314 XMM12, XMM12b, 1315 XMM13, XMM13b, 1316 XMM14, XMM14b, 1317 XMM15, XMM15b 1318 #endif 1319 ); 1320 1321 // Class for all 64bit vector registers 1322 reg_class vectord_reg_evex(XMM0, XMM0b, 1323 XMM1, XMM1b, 1324 XMM2, XMM2b, 1325 XMM3, XMM3b, 1326 XMM4, XMM4b, 1327 XMM5, XMM5b, 1328 XMM6, XMM6b, 1329 XMM7, XMM7b 1330 #ifdef _LP64 1331 ,XMM8, XMM8b, 1332 XMM9, XMM9b, 1333 XMM10, XMM10b, 1334 XMM11, XMM11b, 1335 XMM12, XMM12b, 1336 XMM13, XMM13b, 1337 XMM14, XMM14b, 1338 XMM15, XMM15b, 1339 XMM16, XMM16b, 1340 XMM17, XMM17b, 1341 XMM18, XMM18b, 1342 XMM19, XMM19b, 1343 XMM20, XMM20b, 1344 XMM21, XMM21b, 1345 XMM22, XMM22b, 1346 XMM23, XMM23b, 1347 XMM24, XMM24b, 1348 XMM25, XMM25b, 1349 XMM26, XMM26b, 1350 XMM27, XMM27b, 1351 XMM28, XMM28b, 1352 XMM29, XMM29b, 1353 XMM30, XMM30b, 1354 XMM31, XMM31b 1355 #endif 1356 ); 1357 1358 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} ); 1359 1360 // Class for all 128bit vector registers 1361 reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, 1362 XMM1, XMM1b, XMM1c, XMM1d, 1363 XMM2, XMM2b, XMM2c, XMM2d, 1364 XMM3, XMM3b, XMM3c, XMM3d, 1365 XMM4, XMM4b, XMM4c, XMM4d, 1366 XMM5, XMM5b, XMM5c, XMM5d, 1367 XMM6, XMM6b, XMM6c, XMM6d, 1368 XMM7, XMM7b, XMM7c, XMM7d 1369 #ifdef _LP64 1370 ,XMM8, XMM8b, XMM8c, XMM8d, 1371 XMM9, XMM9b, XMM9c, XMM9d, 1372 XMM10, XMM10b, XMM10c, XMM10d, 1373 XMM11, XMM11b, XMM11c, XMM11d, 1374 XMM12, XMM12b, XMM12c, XMM12d, 1375 XMM13, XMM13b, XMM13c, XMM13d, 1376 XMM14, XMM14b, XMM14c, XMM14d, 1377 XMM15, XMM15b, XMM15c, XMM15d 1378 #endif 1379 ); 1380 1381 // Class for all 128bit vector registers 1382 reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, 1383 XMM1, XMM1b, XMM1c, XMM1d, 1384 XMM2, XMM2b, XMM2c, XMM2d, 1385 XMM3, XMM3b, XMM3c, XMM3d, 1386 XMM4, XMM4b, XMM4c, XMM4d, 1387 XMM5, XMM5b, XMM5c, XMM5d, 1388 XMM6, XMM6b, XMM6c, XMM6d, 1389 XMM7, XMM7b, XMM7c, XMM7d 1390 #ifdef _LP64 1391 ,XMM8, XMM8b, XMM8c, XMM8d, 1392 XMM9, XMM9b, XMM9c, XMM9d, 1393 XMM10, XMM10b, XMM10c, XMM10d, 1394 XMM11, XMM11b, XMM11c, XMM11d, 1395 XMM12, XMM12b, XMM12c, XMM12d, 1396 XMM13, XMM13b, XMM13c, XMM13d, 1397 XMM14, XMM14b, XMM14c, XMM14d, 1398 XMM15, XMM15b, XMM15c, XMM15d, 1399 XMM16, XMM16b, XMM16c, XMM16d, 1400 XMM17, XMM17b, XMM17c, XMM17d, 1401 XMM18, XMM18b, XMM18c, XMM18d, 1402 XMM19, XMM19b, XMM19c, XMM19d, 1403 XMM20, XMM20b, XMM20c, XMM20d, 1404 XMM21, XMM21b, XMM21c, XMM21d, 1405 XMM22, XMM22b, XMM22c, XMM22d, 1406 XMM23, XMM23b, XMM23c, XMM23d, 1407 XMM24, XMM24b, XMM24c, XMM24d, 1408 XMM25, XMM25b, XMM25c, XMM25d, 1409 XMM26, XMM26b, XMM26c, XMM26d, 1410 XMM27, XMM27b, XMM27c, XMM27d, 1411 XMM28, XMM28b, XMM28c, XMM28d, 1412 XMM29, XMM29b, XMM29c, XMM29d, 1413 XMM30, XMM30b, XMM30c, XMM30d, 1414 XMM31, XMM31b, XMM31c, XMM31d 1415 #endif 1416 ); 1417 1418 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} ); 1419 1420 // Class for all 256bit vector registers 1421 reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 1422 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 1423 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 1424 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 1425 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 1426 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 1427 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 1428 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 1429 #ifdef _LP64 1430 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 1431 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 1432 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 1433 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 1434 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 1435 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1436 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1437 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h 1438 #endif 1439 ); 1440 1441 // Class for all 256bit vector registers 1442 reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 1443 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 1444 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 1445 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 1446 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 1447 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 1448 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 1449 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 1450 #ifdef _LP64 1451 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 1452 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 1453 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 1454 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 1455 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 1456 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1457 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1458 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, 1459 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, 1460 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, 1461 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, 1462 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, 1463 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, 1464 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, 1465 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, 1466 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, 1467 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, 1468 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, 1469 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, 1470 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, 1471 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, 1472 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, 1473 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, 1474 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h 1475 #endif 1476 ); 1477 1478 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} ); 1479 1480 // Class for all 512bit vector registers 1481 reg_class vectorz_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1482 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1483 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1484 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1485 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1486 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1487 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1488 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1489 #ifdef _LP64 1490 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1491 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1492 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1493 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1494 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1495 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1496 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1497 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1498 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 1499 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 1500 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 1501 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 1502 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 1503 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 1504 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 1505 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 1506 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 1507 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 1508 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 1509 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 1510 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 1511 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 1512 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 1513 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 1514 #endif 1515 ); 1516 1517 %} 1518 1519 1520 //----------SOURCE BLOCK------------------------------------------------------- 1521 // This is a block of C++ code which provides values, functions, and 1522 // definitions necessary in the rest of the architecture description 1523 1524 source_hpp %{ 1525 // Header information of the source block. 1526 // Method declarations/definitions which are used outside 1527 // the ad-scope can conveniently be defined here. 1528 // 1529 // To keep related declarations/definitions/uses close together, 1530 // we switch between source %{ }% and source_hpp %{ }% freely as needed. 1531 1532 class NativeJump; 1533 1534 class CallStubImpl { 1535 1536 //-------------------------------------------------------------- 1537 //---< Used for optimization in Compile::shorten_branches >--- 1538 //-------------------------------------------------------------- 1539 1540 public: 1541 // Size of call trampoline stub. 1542 static uint size_call_trampoline() { 1543 return 0; // no call trampolines on this platform 1544 } 1545 1546 // number of relocations needed by a call trampoline stub 1547 static uint reloc_call_trampoline() { 1548 return 0; // no call trampolines on this platform 1549 } 1550 }; 1551 1552 class HandlerImpl { 1553 1554 public: 1555 1556 static int emit_exception_handler(CodeBuffer &cbuf); 1557 static int emit_deopt_handler(CodeBuffer& cbuf); 1558 1559 static uint size_exception_handler() { 1560 // NativeCall instruction size is the same as NativeJump. 1561 // exception handler starts out as jump and can be patched to 1562 // a call be deoptimization. (4932387) 1563 // Note that this value is also credited (in output.cpp) to 1564 // the size of the code section. 1565 return NativeJump::instruction_size; 1566 } 1567 1568 #ifdef _LP64 1569 static uint size_deopt_handler() { 1570 // three 5 byte instructions 1571 return 15; 1572 } 1573 #else 1574 static uint size_deopt_handler() { 1575 // NativeCall instruction size is the same as NativeJump. 1576 // exception handler starts out as jump and can be patched to 1577 // a call be deoptimization. (4932387) 1578 // Note that this value is also credited (in output.cpp) to 1579 // the size of the code section. 1580 return 5 + NativeJump::instruction_size; // pushl(); jmp; 1581 } 1582 #endif 1583 }; 1584 1585 %} // end source_hpp 1586 1587 source %{ 1588 1589 // Emit exception handler code. 1590 // Stuff framesize into a register and call a VM stub routine. 1591 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) { 1592 1593 // Note that the code buffer's insts_mark is always relative to insts. 1594 // That's why we must use the macroassembler to generate a handler. 1595 MacroAssembler _masm(&cbuf); 1596 address base = __ start_a_stub(size_exception_handler()); 1597 if (base == NULL) return 0; // CodeBuffer::expand failed 1598 int offset = __ offset(); 1599 __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point())); 1600 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); 1601 __ end_a_stub(); 1602 return offset; 1603 } 1604 1605 // Emit deopt handler code. 1606 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) { 1607 1608 // Note that the code buffer's insts_mark is always relative to insts. 1609 // That's why we must use the macroassembler to generate a handler. 1610 MacroAssembler _masm(&cbuf); 1611 address base = __ start_a_stub(size_deopt_handler()); 1612 if (base == NULL) return 0; // CodeBuffer::expand failed 1613 int offset = __ offset(); 1614 1615 #ifdef _LP64 1616 address the_pc = (address) __ pc(); 1617 Label next; 1618 // push a "the_pc" on the stack without destroying any registers 1619 // as they all may be live. 1620 1621 // push address of "next" 1622 __ call(next, relocInfo::none); // reloc none is fine since it is a disp32 1623 __ bind(next); 1624 // adjust it so it matches "the_pc" 1625 __ subptr(Address(rsp, 0), __ offset() - offset); 1626 #else 1627 InternalAddress here(__ pc()); 1628 __ pushptr(here.addr()); 1629 #endif 1630 1631 __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack())); 1632 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow"); 1633 __ end_a_stub(); 1634 return offset; 1635 } 1636 1637 1638 //============================================================================= 1639 1640 // Float masks come from different places depending on platform. 1641 #ifdef _LP64 1642 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); } 1643 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); } 1644 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); } 1645 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); } 1646 #else 1647 static address float_signmask() { return (address)float_signmask_pool; } 1648 static address float_signflip() { return (address)float_signflip_pool; } 1649 static address double_signmask() { return (address)double_signmask_pool; } 1650 static address double_signflip() { return (address)double_signflip_pool; } 1651 #endif 1652 1653 1654 const bool Matcher::match_rule_supported(int opcode) { 1655 if (!has_match_rule(opcode)) 1656 return false; 1657 1658 switch (opcode) { 1659 case Op_PopCountI: 1660 case Op_PopCountL: 1661 if (!UsePopCountInstruction) 1662 return false; 1663 break; 1664 case Op_MulVI: 1665 if ((UseSSE < 4) && (UseAVX < 1)) // only with SSE4_1 or AVX 1666 return false; 1667 break; 1668 case Op_MulVL: 1669 case Op_MulReductionVL: 1670 if (VM_Version::supports_avx512dq() == false) 1671 return false; 1672 case Op_AddReductionVL: 1673 if (UseAVX < 3) // only EVEX : vector connectivity becomes an issue here 1674 return false; 1675 case Op_AddReductionVI: 1676 if (UseSSE < 3) // requires at least SSE3 1677 return false; 1678 case Op_MulReductionVI: 1679 if (UseSSE < 4) // requires at least SSE4 1680 return false; 1681 case Op_AddReductionVF: 1682 case Op_AddReductionVD: 1683 case Op_MulReductionVF: 1684 case Op_MulReductionVD: 1685 if (UseSSE < 1) // requires at least SSE 1686 return false; 1687 break; 1688 case Op_CompareAndSwapL: 1689 #ifdef _LP64 1690 case Op_CompareAndSwapP: 1691 #endif 1692 if (!VM_Version::supports_cx8()) 1693 return false; 1694 break; 1695 } 1696 1697 return true; // Per default match rules are supported. 1698 } 1699 1700 // Max vector size in bytes. 0 if not supported. 1701 const int Matcher::vector_width_in_bytes(BasicType bt) { 1702 assert(is_java_primitive(bt), "only primitive type vectors"); 1703 if (UseSSE < 2) return 0; 1704 // SSE2 supports 128bit vectors for all types. 1705 // AVX2 supports 256bit vectors for all types. 1706 // AVX2/EVEX supports 512bit vectors for all types. 1707 int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16; 1708 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE. 1709 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE)) 1710 size = (UseAVX > 2) ? 64 : 32; 1711 // Use flag to limit vector size. 1712 size = MIN2(size,(int)MaxVectorSize); 1713 // Minimum 2 values in vector (or 4 for bytes). 1714 switch (bt) { 1715 case T_DOUBLE: 1716 case T_LONG: 1717 if (size < 16) return 0; 1718 case T_FLOAT: 1719 case T_INT: 1720 if (size < 8) return 0; 1721 case T_BOOLEAN: 1722 case T_BYTE: 1723 case T_CHAR: 1724 case T_SHORT: 1725 if (size < 4) return 0; 1726 break; 1727 default: 1728 ShouldNotReachHere(); 1729 } 1730 return size; 1731 } 1732 1733 // Limits on vector size (number of elements) loaded into vector. 1734 const int Matcher::max_vector_size(const BasicType bt) { 1735 return vector_width_in_bytes(bt)/type2aelembytes(bt); 1736 } 1737 const int Matcher::min_vector_size(const BasicType bt) { 1738 int max_size = max_vector_size(bt); 1739 // Min size which can be loaded into vector is 4 bytes. 1740 int size = (type2aelembytes(bt) == 1) ? 4 : 2; 1741 return MIN2(size,max_size); 1742 } 1743 1744 // Vector ideal reg corresponding to specidied size in bytes 1745 const int Matcher::vector_ideal_reg(int size) { 1746 assert(MaxVectorSize >= size, ""); 1747 switch(size) { 1748 case 4: return Op_VecS; 1749 case 8: return Op_VecD; 1750 case 16: return Op_VecX; 1751 case 32: return Op_VecY; 1752 case 64: return Op_VecZ; 1753 } 1754 ShouldNotReachHere(); 1755 return 0; 1756 } 1757 1758 // Only lowest bits of xmm reg are used for vector shift count. 1759 const int Matcher::vector_shift_count_ideal_reg(int size) { 1760 return Op_VecS; 1761 } 1762 1763 // x86 supports misaligned vectors store/load. 1764 const bool Matcher::misaligned_vectors_ok() { 1765 return !AlignVector; // can be changed by flag 1766 } 1767 1768 // x86 AES instructions are compatible with SunJCE expanded 1769 // keys, hence we do not need to pass the original key to stubs 1770 const bool Matcher::pass_original_key_for_aes() { 1771 return false; 1772 } 1773 1774 // Helper methods for MachSpillCopyNode::implementation(). 1775 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 1776 int src_hi, int dst_hi, uint ireg, outputStream* st) { 1777 // In 64-bit VM size calculation is very complex. Emitting instructions 1778 // into scratch buffer is used to get size in 64-bit VM. 1779 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 1780 assert(ireg == Op_VecS || // 32bit vector 1781 (src_lo & 1) == 0 && (src_lo + 1) == src_hi && 1782 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi, 1783 "no non-adjacent vector moves" ); 1784 if (cbuf) { 1785 MacroAssembler _masm(cbuf); 1786 int offset = __ offset(); 1787 switch (ireg) { 1788 case Op_VecS: // copy whole register 1789 case Op_VecD: 1790 case Op_VecX: 1791 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1792 break; 1793 case Op_VecY: 1794 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1795 break; 1796 case Op_VecZ: 1797 __ evmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2); 1798 break; 1799 default: 1800 ShouldNotReachHere(); 1801 } 1802 int size = __ offset() - offset; 1803 #ifdef ASSERT 1804 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1805 assert(!do_size || size == 4, "incorrect size calculattion"); 1806 #endif 1807 return size; 1808 #ifndef PRODUCT 1809 } else if (!do_size) { 1810 switch (ireg) { 1811 case Op_VecS: 1812 case Op_VecD: 1813 case Op_VecX: 1814 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 1815 break; 1816 case Op_VecY: 1817 case Op_VecZ: 1818 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 1819 break; 1820 default: 1821 ShouldNotReachHere(); 1822 } 1823 #endif 1824 } 1825 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 1826 return (UseAVX > 2) ? 6 : 4; 1827 } 1828 1829 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, 1830 int stack_offset, int reg, uint ireg, outputStream* st) { 1831 // In 64-bit VM size calculation is very complex. Emitting instructions 1832 // into scratch buffer is used to get size in 64-bit VM. 1833 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 1834 if (cbuf) { 1835 MacroAssembler _masm(cbuf); 1836 int offset = __ offset(); 1837 if (is_load) { 1838 switch (ireg) { 1839 case Op_VecS: 1840 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1841 break; 1842 case Op_VecD: 1843 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1844 break; 1845 case Op_VecX: 1846 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1847 break; 1848 case Op_VecY: 1849 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1850 break; 1851 case Op_VecZ: 1852 __ evmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2); 1853 break; 1854 default: 1855 ShouldNotReachHere(); 1856 } 1857 } else { // store 1858 switch (ireg) { 1859 case Op_VecS: 1860 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1861 break; 1862 case Op_VecD: 1863 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1864 break; 1865 case Op_VecX: 1866 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1867 break; 1868 case Op_VecY: 1869 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1870 break; 1871 case Op_VecZ: 1872 __ evmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2); 1873 break; 1874 default: 1875 ShouldNotReachHere(); 1876 } 1877 } 1878 int size = __ offset() - offset; 1879 #ifdef ASSERT 1880 int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : (UseAVX > 2) ? 6 : 4); 1881 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1882 assert(!do_size || size == (5+offset_size), "incorrect size calculattion"); 1883 #endif 1884 return size; 1885 #ifndef PRODUCT 1886 } else if (!do_size) { 1887 if (is_load) { 1888 switch (ireg) { 1889 case Op_VecS: 1890 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1891 break; 1892 case Op_VecD: 1893 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1894 break; 1895 case Op_VecX: 1896 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1897 break; 1898 case Op_VecY: 1899 case Op_VecZ: 1900 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1901 break; 1902 default: 1903 ShouldNotReachHere(); 1904 } 1905 } else { // store 1906 switch (ireg) { 1907 case Op_VecS: 1908 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1909 break; 1910 case Op_VecD: 1911 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1912 break; 1913 case Op_VecX: 1914 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1915 break; 1916 case Op_VecY: 1917 case Op_VecZ: 1918 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1919 break; 1920 default: 1921 ShouldNotReachHere(); 1922 } 1923 } 1924 #endif 1925 } 1926 int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : (UseAVX > 2) ? 6 : 4); 1927 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1928 return 5+offset_size; 1929 } 1930 1931 static inline jfloat replicate4_imm(int con, int width) { 1932 // Load a constant of "width" (in bytes) and replicate it to fill 32bit. 1933 assert(width == 1 || width == 2, "only byte or short types here"); 1934 int bit_width = width * 8; 1935 jint val = con; 1936 val &= (1 << bit_width) - 1; // mask off sign bits 1937 while(bit_width < 32) { 1938 val |= (val << bit_width); 1939 bit_width <<= 1; 1940 } 1941 jfloat fval = *((jfloat*) &val); // coerce to float type 1942 return fval; 1943 } 1944 1945 static inline jdouble replicate8_imm(int con, int width) { 1946 // Load a constant of "width" (in bytes) and replicate it to fill 64bit. 1947 assert(width == 1 || width == 2 || width == 4, "only byte, short or int types here"); 1948 int bit_width = width * 8; 1949 jlong val = con; 1950 val &= (((jlong) 1) << bit_width) - 1; // mask off sign bits 1951 while(bit_width < 64) { 1952 val |= (val << bit_width); 1953 bit_width <<= 1; 1954 } 1955 jdouble dval = *((jdouble*) &val); // coerce to double type 1956 return dval; 1957 } 1958 1959 #ifndef PRODUCT 1960 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const { 1961 st->print("nop \t# %d bytes pad for loops and calls", _count); 1962 } 1963 #endif 1964 1965 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const { 1966 MacroAssembler _masm(&cbuf); 1967 __ nop(_count); 1968 } 1969 1970 uint MachNopNode::size(PhaseRegAlloc*) const { 1971 return _count; 1972 } 1973 1974 #ifndef PRODUCT 1975 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const { 1976 st->print("# breakpoint"); 1977 } 1978 #endif 1979 1980 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const { 1981 MacroAssembler _masm(&cbuf); 1982 __ int3(); 1983 } 1984 1985 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const { 1986 return MachNode::size(ra_); 1987 } 1988 1989 %} 1990 1991 encode %{ 1992 1993 enc_class call_epilog %{ 1994 if (VerifyStackAtCalls) { 1995 // Check that stack depth is unchanged: find majik cookie on stack 1996 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word)); 1997 MacroAssembler _masm(&cbuf); 1998 Label L; 1999 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d); 2000 __ jccb(Assembler::equal, L); 2001 // Die if stack mismatch 2002 __ int3(); 2003 __ bind(L); 2004 } 2005 %} 2006 2007 %} 2008 2009 2010 //----------OPERANDS----------------------------------------------------------- 2011 // Operand definitions must precede instruction definitions for correct parsing 2012 // in the ADLC because operands constitute user defined types which are used in 2013 // instruction definitions. 2014 2015 // This one generically applies only for evex, so only one version 2016 operand vecZ() %{ 2017 constraint(ALLOC_IN_RC(vectorz_reg)); 2018 match(VecZ); 2019 2020 format %{ %} 2021 interface(REG_INTER); 2022 %} 2023 2024 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit) 2025 2026 // ============================================================================ 2027 2028 instruct ShouldNotReachHere() %{ 2029 match(Halt); 2030 format %{ "int3\t# ShouldNotReachHere" %} 2031 ins_encode %{ 2032 __ int3(); 2033 %} 2034 ins_pipe(pipe_slow); 2035 %} 2036 2037 // ============================================================================ 2038 2039 instruct addF_reg(regF dst, regF src) %{ 2040 predicate((UseSSE>=1) && (UseAVX == 0)); 2041 match(Set dst (AddF dst src)); 2042 2043 format %{ "addss $dst, $src" %} 2044 ins_cost(150); 2045 ins_encode %{ 2046 __ addss($dst$$XMMRegister, $src$$XMMRegister); 2047 %} 2048 ins_pipe(pipe_slow); 2049 %} 2050 2051 instruct addF_mem(regF dst, memory src) %{ 2052 predicate((UseSSE>=1) && (UseAVX == 0)); 2053 match(Set dst (AddF dst (LoadF src))); 2054 2055 format %{ "addss $dst, $src" %} 2056 ins_cost(150); 2057 ins_encode %{ 2058 __ addss($dst$$XMMRegister, $src$$Address); 2059 %} 2060 ins_pipe(pipe_slow); 2061 %} 2062 2063 instruct addF_imm(regF dst, immF con) %{ 2064 predicate((UseSSE>=1) && (UseAVX == 0)); 2065 match(Set dst (AddF dst con)); 2066 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2067 ins_cost(150); 2068 ins_encode %{ 2069 __ addss($dst$$XMMRegister, $constantaddress($con)); 2070 %} 2071 ins_pipe(pipe_slow); 2072 %} 2073 2074 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{ 2075 predicate(UseAVX > 0); 2076 match(Set dst (AddF src1 src2)); 2077 2078 format %{ "vaddss $dst, $src1, $src2" %} 2079 ins_cost(150); 2080 ins_encode %{ 2081 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2082 %} 2083 ins_pipe(pipe_slow); 2084 %} 2085 2086 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{ 2087 predicate(UseAVX > 0); 2088 match(Set dst (AddF src1 (LoadF src2))); 2089 2090 format %{ "vaddss $dst, $src1, $src2" %} 2091 ins_cost(150); 2092 ins_encode %{ 2093 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2094 %} 2095 ins_pipe(pipe_slow); 2096 %} 2097 2098 instruct addF_reg_imm(regF dst, regF src, immF con) %{ 2099 predicate(UseAVX > 0); 2100 match(Set dst (AddF src con)); 2101 2102 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2103 ins_cost(150); 2104 ins_encode %{ 2105 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2106 %} 2107 ins_pipe(pipe_slow); 2108 %} 2109 2110 instruct addD_reg(regD dst, regD src) %{ 2111 predicate((UseSSE>=2) && (UseAVX == 0)); 2112 match(Set dst (AddD dst src)); 2113 2114 format %{ "addsd $dst, $src" %} 2115 ins_cost(150); 2116 ins_encode %{ 2117 __ addsd($dst$$XMMRegister, $src$$XMMRegister); 2118 %} 2119 ins_pipe(pipe_slow); 2120 %} 2121 2122 instruct addD_mem(regD dst, memory src) %{ 2123 predicate((UseSSE>=2) && (UseAVX == 0)); 2124 match(Set dst (AddD dst (LoadD src))); 2125 2126 format %{ "addsd $dst, $src" %} 2127 ins_cost(150); 2128 ins_encode %{ 2129 __ addsd($dst$$XMMRegister, $src$$Address); 2130 %} 2131 ins_pipe(pipe_slow); 2132 %} 2133 2134 instruct addD_imm(regD dst, immD con) %{ 2135 predicate((UseSSE>=2) && (UseAVX == 0)); 2136 match(Set dst (AddD dst con)); 2137 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2138 ins_cost(150); 2139 ins_encode %{ 2140 __ addsd($dst$$XMMRegister, $constantaddress($con)); 2141 %} 2142 ins_pipe(pipe_slow); 2143 %} 2144 2145 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{ 2146 predicate(UseAVX > 0); 2147 match(Set dst (AddD src1 src2)); 2148 2149 format %{ "vaddsd $dst, $src1, $src2" %} 2150 ins_cost(150); 2151 ins_encode %{ 2152 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2153 %} 2154 ins_pipe(pipe_slow); 2155 %} 2156 2157 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{ 2158 predicate(UseAVX > 0); 2159 match(Set dst (AddD src1 (LoadD src2))); 2160 2161 format %{ "vaddsd $dst, $src1, $src2" %} 2162 ins_cost(150); 2163 ins_encode %{ 2164 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2165 %} 2166 ins_pipe(pipe_slow); 2167 %} 2168 2169 instruct addD_reg_imm(regD dst, regD src, immD con) %{ 2170 predicate(UseAVX > 0); 2171 match(Set dst (AddD src con)); 2172 2173 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2174 ins_cost(150); 2175 ins_encode %{ 2176 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2177 %} 2178 ins_pipe(pipe_slow); 2179 %} 2180 2181 instruct subF_reg(regF dst, regF src) %{ 2182 predicate((UseSSE>=1) && (UseAVX == 0)); 2183 match(Set dst (SubF dst src)); 2184 2185 format %{ "subss $dst, $src" %} 2186 ins_cost(150); 2187 ins_encode %{ 2188 __ subss($dst$$XMMRegister, $src$$XMMRegister); 2189 %} 2190 ins_pipe(pipe_slow); 2191 %} 2192 2193 instruct subF_mem(regF dst, memory src) %{ 2194 predicate((UseSSE>=1) && (UseAVX == 0)); 2195 match(Set dst (SubF dst (LoadF src))); 2196 2197 format %{ "subss $dst, $src" %} 2198 ins_cost(150); 2199 ins_encode %{ 2200 __ subss($dst$$XMMRegister, $src$$Address); 2201 %} 2202 ins_pipe(pipe_slow); 2203 %} 2204 2205 instruct subF_imm(regF dst, immF con) %{ 2206 predicate((UseSSE>=1) && (UseAVX == 0)); 2207 match(Set dst (SubF dst con)); 2208 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2209 ins_cost(150); 2210 ins_encode %{ 2211 __ subss($dst$$XMMRegister, $constantaddress($con)); 2212 %} 2213 ins_pipe(pipe_slow); 2214 %} 2215 2216 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{ 2217 predicate(UseAVX > 0); 2218 match(Set dst (SubF src1 src2)); 2219 2220 format %{ "vsubss $dst, $src1, $src2" %} 2221 ins_cost(150); 2222 ins_encode %{ 2223 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2224 %} 2225 ins_pipe(pipe_slow); 2226 %} 2227 2228 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{ 2229 predicate(UseAVX > 0); 2230 match(Set dst (SubF src1 (LoadF src2))); 2231 2232 format %{ "vsubss $dst, $src1, $src2" %} 2233 ins_cost(150); 2234 ins_encode %{ 2235 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2236 %} 2237 ins_pipe(pipe_slow); 2238 %} 2239 2240 instruct subF_reg_imm(regF dst, regF src, immF con) %{ 2241 predicate(UseAVX > 0); 2242 match(Set dst (SubF src con)); 2243 2244 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2245 ins_cost(150); 2246 ins_encode %{ 2247 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2248 %} 2249 ins_pipe(pipe_slow); 2250 %} 2251 2252 instruct subD_reg(regD dst, regD src) %{ 2253 predicate((UseSSE>=2) && (UseAVX == 0)); 2254 match(Set dst (SubD dst src)); 2255 2256 format %{ "subsd $dst, $src" %} 2257 ins_cost(150); 2258 ins_encode %{ 2259 __ subsd($dst$$XMMRegister, $src$$XMMRegister); 2260 %} 2261 ins_pipe(pipe_slow); 2262 %} 2263 2264 instruct subD_mem(regD dst, memory src) %{ 2265 predicate((UseSSE>=2) && (UseAVX == 0)); 2266 match(Set dst (SubD dst (LoadD src))); 2267 2268 format %{ "subsd $dst, $src" %} 2269 ins_cost(150); 2270 ins_encode %{ 2271 __ subsd($dst$$XMMRegister, $src$$Address); 2272 %} 2273 ins_pipe(pipe_slow); 2274 %} 2275 2276 instruct subD_imm(regD dst, immD con) %{ 2277 predicate((UseSSE>=2) && (UseAVX == 0)); 2278 match(Set dst (SubD dst con)); 2279 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2280 ins_cost(150); 2281 ins_encode %{ 2282 __ subsd($dst$$XMMRegister, $constantaddress($con)); 2283 %} 2284 ins_pipe(pipe_slow); 2285 %} 2286 2287 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{ 2288 predicate(UseAVX > 0); 2289 match(Set dst (SubD src1 src2)); 2290 2291 format %{ "vsubsd $dst, $src1, $src2" %} 2292 ins_cost(150); 2293 ins_encode %{ 2294 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2295 %} 2296 ins_pipe(pipe_slow); 2297 %} 2298 2299 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{ 2300 predicate(UseAVX > 0); 2301 match(Set dst (SubD src1 (LoadD src2))); 2302 2303 format %{ "vsubsd $dst, $src1, $src2" %} 2304 ins_cost(150); 2305 ins_encode %{ 2306 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2307 %} 2308 ins_pipe(pipe_slow); 2309 %} 2310 2311 instruct subD_reg_imm(regD dst, regD src, immD con) %{ 2312 predicate(UseAVX > 0); 2313 match(Set dst (SubD src con)); 2314 2315 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2316 ins_cost(150); 2317 ins_encode %{ 2318 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2319 %} 2320 ins_pipe(pipe_slow); 2321 %} 2322 2323 instruct mulF_reg(regF dst, regF src) %{ 2324 predicate((UseSSE>=1) && (UseAVX == 0)); 2325 match(Set dst (MulF dst src)); 2326 2327 format %{ "mulss $dst, $src" %} 2328 ins_cost(150); 2329 ins_encode %{ 2330 __ mulss($dst$$XMMRegister, $src$$XMMRegister); 2331 %} 2332 ins_pipe(pipe_slow); 2333 %} 2334 2335 instruct mulF_mem(regF dst, memory src) %{ 2336 predicate((UseSSE>=1) && (UseAVX == 0)); 2337 match(Set dst (MulF dst (LoadF src))); 2338 2339 format %{ "mulss $dst, $src" %} 2340 ins_cost(150); 2341 ins_encode %{ 2342 __ mulss($dst$$XMMRegister, $src$$Address); 2343 %} 2344 ins_pipe(pipe_slow); 2345 %} 2346 2347 instruct mulF_imm(regF dst, immF con) %{ 2348 predicate((UseSSE>=1) && (UseAVX == 0)); 2349 match(Set dst (MulF dst con)); 2350 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2351 ins_cost(150); 2352 ins_encode %{ 2353 __ mulss($dst$$XMMRegister, $constantaddress($con)); 2354 %} 2355 ins_pipe(pipe_slow); 2356 %} 2357 2358 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{ 2359 predicate(UseAVX > 0); 2360 match(Set dst (MulF src1 src2)); 2361 2362 format %{ "vmulss $dst, $src1, $src2" %} 2363 ins_cost(150); 2364 ins_encode %{ 2365 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2366 %} 2367 ins_pipe(pipe_slow); 2368 %} 2369 2370 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{ 2371 predicate(UseAVX > 0); 2372 match(Set dst (MulF src1 (LoadF src2))); 2373 2374 format %{ "vmulss $dst, $src1, $src2" %} 2375 ins_cost(150); 2376 ins_encode %{ 2377 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2378 %} 2379 ins_pipe(pipe_slow); 2380 %} 2381 2382 instruct mulF_reg_imm(regF dst, regF src, immF con) %{ 2383 predicate(UseAVX > 0); 2384 match(Set dst (MulF src con)); 2385 2386 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2387 ins_cost(150); 2388 ins_encode %{ 2389 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2390 %} 2391 ins_pipe(pipe_slow); 2392 %} 2393 2394 instruct mulD_reg(regD dst, regD src) %{ 2395 predicate((UseSSE>=2) && (UseAVX == 0)); 2396 match(Set dst (MulD dst src)); 2397 2398 format %{ "mulsd $dst, $src" %} 2399 ins_cost(150); 2400 ins_encode %{ 2401 __ mulsd($dst$$XMMRegister, $src$$XMMRegister); 2402 %} 2403 ins_pipe(pipe_slow); 2404 %} 2405 2406 instruct mulD_mem(regD dst, memory src) %{ 2407 predicate((UseSSE>=2) && (UseAVX == 0)); 2408 match(Set dst (MulD dst (LoadD src))); 2409 2410 format %{ "mulsd $dst, $src" %} 2411 ins_cost(150); 2412 ins_encode %{ 2413 __ mulsd($dst$$XMMRegister, $src$$Address); 2414 %} 2415 ins_pipe(pipe_slow); 2416 %} 2417 2418 instruct mulD_imm(regD dst, immD con) %{ 2419 predicate((UseSSE>=2) && (UseAVX == 0)); 2420 match(Set dst (MulD dst con)); 2421 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2422 ins_cost(150); 2423 ins_encode %{ 2424 __ mulsd($dst$$XMMRegister, $constantaddress($con)); 2425 %} 2426 ins_pipe(pipe_slow); 2427 %} 2428 2429 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{ 2430 predicate(UseAVX > 0); 2431 match(Set dst (MulD src1 src2)); 2432 2433 format %{ "vmulsd $dst, $src1, $src2" %} 2434 ins_cost(150); 2435 ins_encode %{ 2436 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2437 %} 2438 ins_pipe(pipe_slow); 2439 %} 2440 2441 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{ 2442 predicate(UseAVX > 0); 2443 match(Set dst (MulD src1 (LoadD src2))); 2444 2445 format %{ "vmulsd $dst, $src1, $src2" %} 2446 ins_cost(150); 2447 ins_encode %{ 2448 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2449 %} 2450 ins_pipe(pipe_slow); 2451 %} 2452 2453 instruct mulD_reg_imm(regD dst, regD src, immD con) %{ 2454 predicate(UseAVX > 0); 2455 match(Set dst (MulD src con)); 2456 2457 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2458 ins_cost(150); 2459 ins_encode %{ 2460 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2461 %} 2462 ins_pipe(pipe_slow); 2463 %} 2464 2465 instruct divF_reg(regF dst, regF src) %{ 2466 predicate((UseSSE>=1) && (UseAVX == 0)); 2467 match(Set dst (DivF dst src)); 2468 2469 format %{ "divss $dst, $src" %} 2470 ins_cost(150); 2471 ins_encode %{ 2472 __ divss($dst$$XMMRegister, $src$$XMMRegister); 2473 %} 2474 ins_pipe(pipe_slow); 2475 %} 2476 2477 instruct divF_mem(regF dst, memory src) %{ 2478 predicate((UseSSE>=1) && (UseAVX == 0)); 2479 match(Set dst (DivF dst (LoadF src))); 2480 2481 format %{ "divss $dst, $src" %} 2482 ins_cost(150); 2483 ins_encode %{ 2484 __ divss($dst$$XMMRegister, $src$$Address); 2485 %} 2486 ins_pipe(pipe_slow); 2487 %} 2488 2489 instruct divF_imm(regF dst, immF con) %{ 2490 predicate((UseSSE>=1) && (UseAVX == 0)); 2491 match(Set dst (DivF dst con)); 2492 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2493 ins_cost(150); 2494 ins_encode %{ 2495 __ divss($dst$$XMMRegister, $constantaddress($con)); 2496 %} 2497 ins_pipe(pipe_slow); 2498 %} 2499 2500 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{ 2501 predicate(UseAVX > 0); 2502 match(Set dst (DivF src1 src2)); 2503 2504 format %{ "vdivss $dst, $src1, $src2" %} 2505 ins_cost(150); 2506 ins_encode %{ 2507 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2508 %} 2509 ins_pipe(pipe_slow); 2510 %} 2511 2512 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{ 2513 predicate(UseAVX > 0); 2514 match(Set dst (DivF src1 (LoadF src2))); 2515 2516 format %{ "vdivss $dst, $src1, $src2" %} 2517 ins_cost(150); 2518 ins_encode %{ 2519 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2520 %} 2521 ins_pipe(pipe_slow); 2522 %} 2523 2524 instruct divF_reg_imm(regF dst, regF src, immF con) %{ 2525 predicate(UseAVX > 0); 2526 match(Set dst (DivF src con)); 2527 2528 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2529 ins_cost(150); 2530 ins_encode %{ 2531 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2532 %} 2533 ins_pipe(pipe_slow); 2534 %} 2535 2536 instruct divD_reg(regD dst, regD src) %{ 2537 predicate((UseSSE>=2) && (UseAVX == 0)); 2538 match(Set dst (DivD dst src)); 2539 2540 format %{ "divsd $dst, $src" %} 2541 ins_cost(150); 2542 ins_encode %{ 2543 __ divsd($dst$$XMMRegister, $src$$XMMRegister); 2544 %} 2545 ins_pipe(pipe_slow); 2546 %} 2547 2548 instruct divD_mem(regD dst, memory src) %{ 2549 predicate((UseSSE>=2) && (UseAVX == 0)); 2550 match(Set dst (DivD dst (LoadD src))); 2551 2552 format %{ "divsd $dst, $src" %} 2553 ins_cost(150); 2554 ins_encode %{ 2555 __ divsd($dst$$XMMRegister, $src$$Address); 2556 %} 2557 ins_pipe(pipe_slow); 2558 %} 2559 2560 instruct divD_imm(regD dst, immD con) %{ 2561 predicate((UseSSE>=2) && (UseAVX == 0)); 2562 match(Set dst (DivD dst con)); 2563 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2564 ins_cost(150); 2565 ins_encode %{ 2566 __ divsd($dst$$XMMRegister, $constantaddress($con)); 2567 %} 2568 ins_pipe(pipe_slow); 2569 %} 2570 2571 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{ 2572 predicate(UseAVX > 0); 2573 match(Set dst (DivD src1 src2)); 2574 2575 format %{ "vdivsd $dst, $src1, $src2" %} 2576 ins_cost(150); 2577 ins_encode %{ 2578 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2579 %} 2580 ins_pipe(pipe_slow); 2581 %} 2582 2583 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{ 2584 predicate(UseAVX > 0); 2585 match(Set dst (DivD src1 (LoadD src2))); 2586 2587 format %{ "vdivsd $dst, $src1, $src2" %} 2588 ins_cost(150); 2589 ins_encode %{ 2590 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2591 %} 2592 ins_pipe(pipe_slow); 2593 %} 2594 2595 instruct divD_reg_imm(regD dst, regD src, immD con) %{ 2596 predicate(UseAVX > 0); 2597 match(Set dst (DivD src con)); 2598 2599 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2600 ins_cost(150); 2601 ins_encode %{ 2602 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2603 %} 2604 ins_pipe(pipe_slow); 2605 %} 2606 2607 instruct absF_reg(regF dst) %{ 2608 predicate((UseSSE>=1) && (UseAVX == 0)); 2609 match(Set dst (AbsF dst)); 2610 ins_cost(150); 2611 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %} 2612 ins_encode %{ 2613 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask())); 2614 %} 2615 ins_pipe(pipe_slow); 2616 %} 2617 2618 instruct absF_reg_reg(regF dst, regF src) %{ 2619 predicate(UseAVX > 0); 2620 match(Set dst (AbsF src)); 2621 ins_cost(150); 2622 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 2623 ins_encode %{ 2624 int vector_len = 0; 2625 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 2626 ExternalAddress(float_signmask()), vector_len); 2627 %} 2628 ins_pipe(pipe_slow); 2629 %} 2630 2631 instruct absD_reg(regD dst) %{ 2632 predicate((UseSSE>=2) && (UseAVX == 0)); 2633 match(Set dst (AbsD dst)); 2634 ins_cost(150); 2635 format %{ "andpd $dst, [0x7fffffffffffffff]\t" 2636 "# abs double by sign masking" %} 2637 ins_encode %{ 2638 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask())); 2639 %} 2640 ins_pipe(pipe_slow); 2641 %} 2642 2643 instruct absD_reg_reg(regD dst, regD src) %{ 2644 predicate(UseAVX > 0); 2645 match(Set dst (AbsD src)); 2646 ins_cost(150); 2647 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 2648 "# abs double by sign masking" %} 2649 ins_encode %{ 2650 int vector_len = 0; 2651 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 2652 ExternalAddress(double_signmask()), vector_len); 2653 %} 2654 ins_pipe(pipe_slow); 2655 %} 2656 2657 instruct negF_reg(regF dst) %{ 2658 predicate((UseSSE>=1) && (UseAVX == 0)); 2659 match(Set dst (NegF dst)); 2660 ins_cost(150); 2661 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %} 2662 ins_encode %{ 2663 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip())); 2664 %} 2665 ins_pipe(pipe_slow); 2666 %} 2667 2668 instruct negF_reg_reg(regF dst, regF src) %{ 2669 predicate(UseAVX > 0); 2670 match(Set dst (NegF src)); 2671 ins_cost(150); 2672 format %{ "vxorps $dst, $src, [0x80000000]\t# neg float by sign flipping" %} 2673 ins_encode %{ 2674 int vector_len = 0; 2675 __ vxorps($dst$$XMMRegister, $src$$XMMRegister, 2676 ExternalAddress(float_signflip()), vector_len); 2677 %} 2678 ins_pipe(pipe_slow); 2679 %} 2680 2681 instruct negD_reg(regD dst) %{ 2682 predicate((UseSSE>=2) && (UseAVX == 0)); 2683 match(Set dst (NegD dst)); 2684 ins_cost(150); 2685 format %{ "xorpd $dst, [0x8000000000000000]\t" 2686 "# neg double by sign flipping" %} 2687 ins_encode %{ 2688 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip())); 2689 %} 2690 ins_pipe(pipe_slow); 2691 %} 2692 2693 instruct negD_reg_reg(regD dst, regD src) %{ 2694 predicate(UseAVX > 0); 2695 match(Set dst (NegD src)); 2696 ins_cost(150); 2697 format %{ "vxorpd $dst, $src, [0x8000000000000000]\t" 2698 "# neg double by sign flipping" %} 2699 ins_encode %{ 2700 int vector_len = 0; 2701 __ vxorpd($dst$$XMMRegister, $src$$XMMRegister, 2702 ExternalAddress(double_signflip()), vector_len); 2703 %} 2704 ins_pipe(pipe_slow); 2705 %} 2706 2707 instruct sqrtF_reg(regF dst, regF src) %{ 2708 predicate(UseSSE>=1); 2709 match(Set dst (ConvD2F (SqrtD (ConvF2D src)))); 2710 2711 format %{ "sqrtss $dst, $src" %} 2712 ins_cost(150); 2713 ins_encode %{ 2714 __ sqrtss($dst$$XMMRegister, $src$$XMMRegister); 2715 %} 2716 ins_pipe(pipe_slow); 2717 %} 2718 2719 instruct sqrtF_mem(regF dst, memory src) %{ 2720 predicate(UseSSE>=1); 2721 match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF src))))); 2722 2723 format %{ "sqrtss $dst, $src" %} 2724 ins_cost(150); 2725 ins_encode %{ 2726 __ sqrtss($dst$$XMMRegister, $src$$Address); 2727 %} 2728 ins_pipe(pipe_slow); 2729 %} 2730 2731 instruct sqrtF_imm(regF dst, immF con) %{ 2732 predicate(UseSSE>=1); 2733 match(Set dst (ConvD2F (SqrtD (ConvF2D con)))); 2734 format %{ "sqrtss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2735 ins_cost(150); 2736 ins_encode %{ 2737 __ sqrtss($dst$$XMMRegister, $constantaddress($con)); 2738 %} 2739 ins_pipe(pipe_slow); 2740 %} 2741 2742 instruct sqrtD_reg(regD dst, regD src) %{ 2743 predicate(UseSSE>=2); 2744 match(Set dst (SqrtD src)); 2745 2746 format %{ "sqrtsd $dst, $src" %} 2747 ins_cost(150); 2748 ins_encode %{ 2749 __ sqrtsd($dst$$XMMRegister, $src$$XMMRegister); 2750 %} 2751 ins_pipe(pipe_slow); 2752 %} 2753 2754 instruct sqrtD_mem(regD dst, memory src) %{ 2755 predicate(UseSSE>=2); 2756 match(Set dst (SqrtD (LoadD src))); 2757 2758 format %{ "sqrtsd $dst, $src" %} 2759 ins_cost(150); 2760 ins_encode %{ 2761 __ sqrtsd($dst$$XMMRegister, $src$$Address); 2762 %} 2763 ins_pipe(pipe_slow); 2764 %} 2765 2766 instruct sqrtD_imm(regD dst, immD con) %{ 2767 predicate(UseSSE>=2); 2768 match(Set dst (SqrtD con)); 2769 format %{ "sqrtsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2770 ins_cost(150); 2771 ins_encode %{ 2772 __ sqrtsd($dst$$XMMRegister, $constantaddress($con)); 2773 %} 2774 ins_pipe(pipe_slow); 2775 %} 2776 2777 // ====================VECTOR INSTRUCTIONS===================================== 2778 2779 // Load vectors (4 bytes long) 2780 instruct loadV4(vecS dst, memory mem) %{ 2781 predicate(n->as_LoadVector()->memory_size() == 4); 2782 match(Set dst (LoadVector mem)); 2783 ins_cost(125); 2784 format %{ "movd $dst,$mem\t! load vector (4 bytes)" %} 2785 ins_encode %{ 2786 __ movdl($dst$$XMMRegister, $mem$$Address); 2787 %} 2788 ins_pipe( pipe_slow ); 2789 %} 2790 2791 // Load vectors (8 bytes long) 2792 instruct loadV8(vecD dst, memory mem) %{ 2793 predicate(n->as_LoadVector()->memory_size() == 8); 2794 match(Set dst (LoadVector mem)); 2795 ins_cost(125); 2796 format %{ "movq $dst,$mem\t! load vector (8 bytes)" %} 2797 ins_encode %{ 2798 __ movq($dst$$XMMRegister, $mem$$Address); 2799 %} 2800 ins_pipe( pipe_slow ); 2801 %} 2802 2803 // Load vectors (16 bytes long) 2804 instruct loadV16(vecX dst, memory mem) %{ 2805 predicate(n->as_LoadVector()->memory_size() == 16); 2806 match(Set dst (LoadVector mem)); 2807 ins_cost(125); 2808 format %{ "movdqu $dst,$mem\t! load vector (16 bytes)" %} 2809 ins_encode %{ 2810 __ movdqu($dst$$XMMRegister, $mem$$Address); 2811 %} 2812 ins_pipe( pipe_slow ); 2813 %} 2814 2815 // Load vectors (32 bytes long) 2816 instruct loadV32(vecY dst, memory mem) %{ 2817 predicate(n->as_LoadVector()->memory_size() == 32); 2818 match(Set dst (LoadVector mem)); 2819 ins_cost(125); 2820 format %{ "vmovdqu $dst,$mem\t! load vector (32 bytes)" %} 2821 ins_encode %{ 2822 __ vmovdqu($dst$$XMMRegister, $mem$$Address); 2823 %} 2824 ins_pipe( pipe_slow ); 2825 %} 2826 2827 // Load vectors (64 bytes long) 2828 instruct loadV64(vecZ dst, memory mem) %{ 2829 predicate(n->as_LoadVector()->memory_size() == 64); 2830 match(Set dst (LoadVector mem)); 2831 ins_cost(125); 2832 format %{ "vmovdqu $dst k0,$mem\t! load vector (64 bytes)" %} 2833 ins_encode %{ 2834 int vector_len = 2; 2835 __ evmovdqu($dst$$XMMRegister, $mem$$Address, vector_len); 2836 %} 2837 ins_pipe( pipe_slow ); 2838 %} 2839 2840 // Store vectors 2841 instruct storeV4(memory mem, vecS src) %{ 2842 predicate(n->as_StoreVector()->memory_size() == 4); 2843 match(Set mem (StoreVector mem src)); 2844 ins_cost(145); 2845 format %{ "movd $mem,$src\t! store vector (4 bytes)" %} 2846 ins_encode %{ 2847 __ movdl($mem$$Address, $src$$XMMRegister); 2848 %} 2849 ins_pipe( pipe_slow ); 2850 %} 2851 2852 instruct storeV8(memory mem, vecD src) %{ 2853 predicate(n->as_StoreVector()->memory_size() == 8); 2854 match(Set mem (StoreVector mem src)); 2855 ins_cost(145); 2856 format %{ "movq $mem,$src\t! store vector (8 bytes)" %} 2857 ins_encode %{ 2858 __ movq($mem$$Address, $src$$XMMRegister); 2859 %} 2860 ins_pipe( pipe_slow ); 2861 %} 2862 2863 instruct storeV16(memory mem, vecX src) %{ 2864 predicate(n->as_StoreVector()->memory_size() == 16); 2865 match(Set mem (StoreVector mem src)); 2866 ins_cost(145); 2867 format %{ "movdqu $mem,$src\t! store vector (16 bytes)" %} 2868 ins_encode %{ 2869 __ movdqu($mem$$Address, $src$$XMMRegister); 2870 %} 2871 ins_pipe( pipe_slow ); 2872 %} 2873 2874 instruct storeV32(memory mem, vecY src) %{ 2875 predicate(n->as_StoreVector()->memory_size() == 32); 2876 match(Set mem (StoreVector mem src)); 2877 ins_cost(145); 2878 format %{ "vmovdqu $mem,$src\t! store vector (32 bytes)" %} 2879 ins_encode %{ 2880 __ vmovdqu($mem$$Address, $src$$XMMRegister); 2881 %} 2882 ins_pipe( pipe_slow ); 2883 %} 2884 2885 instruct storeV64(memory mem, vecZ src) %{ 2886 predicate(n->as_StoreVector()->memory_size() == 64); 2887 match(Set mem (StoreVector mem src)); 2888 ins_cost(145); 2889 format %{ "vmovdqu $mem k0,$src\t! store vector (64 bytes)" %} 2890 ins_encode %{ 2891 int vector_len = 2; 2892 __ evmovdqu($mem$$Address, $src$$XMMRegister, vector_len); 2893 %} 2894 ins_pipe( pipe_slow ); 2895 %} 2896 2897 // ====================LEGACY REPLICATE======================================= 2898 2899 instruct Repl16B(vecX dst, rRegI src) %{ 2900 predicate(n->as_Vector()->length() == 16 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 2901 match(Set dst (ReplicateB src)); 2902 format %{ "movd $dst,$src\n\t" 2903 "punpcklbw $dst,$dst\n\t" 2904 "pshuflw $dst,$dst,0x00\n\t" 2905 "punpcklqdq $dst,$dst\t! replicate16B" %} 2906 ins_encode %{ 2907 __ movdl($dst$$XMMRegister, $src$$Register); 2908 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 2909 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2910 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2911 %} 2912 ins_pipe( pipe_slow ); 2913 %} 2914 2915 instruct Repl16B_mem(vecX dst, memory mem) %{ 2916 predicate(n->as_Vector()->length() == 16 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 2917 match(Set dst (ReplicateB (LoadB mem))); 2918 format %{ "punpcklbw $dst,$mem\n\t" 2919 "pshuflw $dst,$dst,0x00\n\t" 2920 "punpcklqdq $dst,$dst\t! replicate16B" %} 2921 ins_encode %{ 2922 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 2923 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2924 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2925 %} 2926 ins_pipe( pipe_slow ); 2927 %} 2928 2929 instruct Repl32B(vecY dst, rRegI src) %{ 2930 predicate(n->as_Vector()->length() == 32 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 2931 match(Set dst (ReplicateB src)); 2932 format %{ "movd $dst,$src\n\t" 2933 "punpcklbw $dst,$dst\n\t" 2934 "pshuflw $dst,$dst,0x00\n\t" 2935 "punpcklqdq $dst,$dst\n\t" 2936 "vinserti128h $dst,$dst,$dst\t! replicate32B" %} 2937 ins_encode %{ 2938 __ movdl($dst$$XMMRegister, $src$$Register); 2939 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 2940 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2941 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2942 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2943 %} 2944 ins_pipe( pipe_slow ); 2945 %} 2946 2947 instruct Repl32B_mem(vecY dst, memory mem) %{ 2948 predicate(n->as_Vector()->length() == 32 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 2949 match(Set dst (ReplicateB (LoadB mem))); 2950 format %{ "punpcklbw $dst,$mem\n\t" 2951 "pshuflw $dst,$dst,0x00\n\t" 2952 "punpcklqdq $dst,$dst\n\t" 2953 "vinserti128h $dst,$dst,$dst\t! replicate32B" %} 2954 ins_encode %{ 2955 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 2956 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2957 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2958 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2959 %} 2960 ins_pipe( pipe_slow ); 2961 %} 2962 2963 instruct Repl16B_imm(vecX dst, immI con) %{ 2964 predicate(n->as_Vector()->length() == 16 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 2965 match(Set dst (ReplicateB con)); 2966 format %{ "movq $dst,[$constantaddress]\n\t" 2967 "punpcklqdq $dst,$dst\t! replicate16B($con)" %} 2968 ins_encode %{ 2969 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 2970 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2971 %} 2972 ins_pipe( pipe_slow ); 2973 %} 2974 2975 instruct Repl32B_imm(vecY dst, immI con) %{ 2976 predicate(n->as_Vector()->length() == 32 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 2977 match(Set dst (ReplicateB con)); 2978 format %{ "movq $dst,[$constantaddress]\n\t" 2979 "punpcklqdq $dst,$dst\n\t" 2980 "vinserti128h $dst,$dst,$dst\t! lreplicate32B($con)" %} 2981 ins_encode %{ 2982 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 2983 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2984 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2985 %} 2986 ins_pipe( pipe_slow ); 2987 %} 2988 2989 instruct Repl16B_zero(vecX dst, immI0 zero) %{ 2990 predicate(n->as_Vector()->length() == 16 && UseAVX > 0 && UseAVX < 3); 2991 match(Set dst (ReplicateB zero)); 2992 format %{ "pxor $dst,$dst\t! replicate16B zero" %} 2993 ins_encode %{ 2994 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 2995 %} 2996 ins_pipe( fpu_reg_reg ); 2997 %} 2998 2999 instruct Repl32B_zero(vecY dst, immI0 zero) %{ 3000 predicate(n->as_Vector()->length() == 32 && UseAVX > 0 && UseAVX < 3); 3001 match(Set dst (ReplicateB zero)); 3002 format %{ "vpxor $dst,$dst,$dst\t! replicate32B zero" %} 3003 ins_encode %{ 3004 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3005 int vector_len = 1; 3006 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3007 %} 3008 ins_pipe( fpu_reg_reg ); 3009 %} 3010 3011 instruct Repl8S(vecX dst, rRegI src) %{ 3012 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3013 match(Set dst (ReplicateS src)); 3014 format %{ "movd $dst,$src\n\t" 3015 "pshuflw $dst,$dst,0x00\n\t" 3016 "punpcklqdq $dst,$dst\t! replicate8S" %} 3017 ins_encode %{ 3018 __ movdl($dst$$XMMRegister, $src$$Register); 3019 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3020 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3021 %} 3022 ins_pipe( pipe_slow ); 3023 %} 3024 3025 instruct Repl16S(vecY dst, rRegI src) %{ 3026 predicate(n->as_Vector()->length() == 16 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3027 match(Set dst (ReplicateS src)); 3028 format %{ "movd $dst,$src\n\t" 3029 "pshuflw $dst,$dst,0x00\n\t" 3030 "punpcklqdq $dst,$dst\n\t" 3031 "vinserti128h $dst,$dst,$dst\t! replicate16S" %} 3032 ins_encode %{ 3033 __ movdl($dst$$XMMRegister, $src$$Register); 3034 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3035 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3036 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3037 %} 3038 ins_pipe( pipe_slow ); 3039 %} 3040 3041 instruct Repl8S_imm(vecX dst, immI con) %{ 3042 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3043 match(Set dst (ReplicateS con)); 3044 format %{ "movq $dst,[$constantaddress]\n\t" 3045 "punpcklqdq $dst,$dst\t! replicate8S($con)" %} 3046 ins_encode %{ 3047 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3048 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3049 %} 3050 ins_pipe( pipe_slow ); 3051 %} 3052 3053 instruct Repl16S_imm(vecY dst, immI con) %{ 3054 predicate(n->as_Vector()->length() == 16 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3055 match(Set dst (ReplicateS con)); 3056 format %{ "movq $dst,[$constantaddress]\n\t" 3057 "punpcklqdq $dst,$dst\n\t" 3058 "vinserti128h $dst,$dst,$dst\t! replicate16S($con)" %} 3059 ins_encode %{ 3060 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3061 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3062 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3063 %} 3064 ins_pipe( pipe_slow ); 3065 %} 3066 3067 instruct Repl8S_zero(vecX dst, immI0 zero) %{ 3068 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && UseAVX < 3); 3069 match(Set dst (ReplicateS zero)); 3070 format %{ "pxor $dst,$dst\t! replicate8S zero" %} 3071 ins_encode %{ 3072 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3073 %} 3074 ins_pipe( fpu_reg_reg ); 3075 %} 3076 3077 instruct Repl16S_zero(vecY dst, immI0 zero) %{ 3078 predicate(n->as_Vector()->length() == 16 && UseAVX > 0 && UseAVX < 3); 3079 match(Set dst (ReplicateS zero)); 3080 format %{ "vpxor $dst,$dst,$dst\t! replicate16S zero" %} 3081 ins_encode %{ 3082 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3083 int vector_len = 1; 3084 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3085 %} 3086 ins_pipe( fpu_reg_reg ); 3087 %} 3088 3089 instruct Repl4I(vecX dst, rRegI src) %{ 3090 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3091 match(Set dst (ReplicateI src)); 3092 format %{ "movd $dst,$src\n\t" 3093 "pshufd $dst,$dst,0x00\t! replicate4I" %} 3094 ins_encode %{ 3095 __ movdl($dst$$XMMRegister, $src$$Register); 3096 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3097 %} 3098 ins_pipe( pipe_slow ); 3099 %} 3100 3101 instruct Repl4I_mem(vecX dst, memory mem) %{ 3102 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3103 match(Set dst (ReplicateI (LoadI mem))); 3104 format %{ "pshufd $dst,$mem,0x00\t! replicate4I" %} 3105 ins_encode %{ 3106 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3107 %} 3108 ins_pipe( pipe_slow ); 3109 %} 3110 3111 instruct Repl8I(vecY dst, rRegI src) %{ 3112 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3113 match(Set dst (ReplicateI src)); 3114 format %{ "movd $dst,$src\n\t" 3115 "pshufd $dst,$dst,0x00\n\t" 3116 "vinserti128h $dst,$dst,$dst\t! replicate8I" %} 3117 ins_encode %{ 3118 __ movdl($dst$$XMMRegister, $src$$Register); 3119 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3120 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3121 %} 3122 ins_pipe( pipe_slow ); 3123 %} 3124 3125 instruct Repl8I_mem(vecY dst, memory mem) %{ 3126 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3127 match(Set dst (ReplicateI (LoadI mem))); 3128 format %{ "pshufd $dst,$mem,0x00\n\t" 3129 "vinserti128h $dst,$dst,$dst\t! replicate8I" %} 3130 ins_encode %{ 3131 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3132 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3133 %} 3134 ins_pipe( pipe_slow ); 3135 %} 3136 3137 instruct Repl4I_imm(vecX dst, immI con) %{ 3138 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3139 match(Set dst (ReplicateI con)); 3140 format %{ "movq $dst,[$constantaddress]\t! replicate4I($con)\n\t" 3141 "punpcklqdq $dst,$dst" %} 3142 ins_encode %{ 3143 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3144 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3145 %} 3146 ins_pipe( pipe_slow ); 3147 %} 3148 3149 instruct Repl8I_imm(vecY dst, immI con) %{ 3150 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3151 match(Set dst (ReplicateI con)); 3152 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 3153 "punpcklqdq $dst,$dst\n\t" 3154 "vinserti128h $dst,$dst,$dst" %} 3155 ins_encode %{ 3156 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3157 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3158 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3159 %} 3160 ins_pipe( pipe_slow ); 3161 %} 3162 3163 instruct Repl4I_zero(vecX dst, immI0 zero) %{ 3164 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && UseAVX < 3); 3165 match(Set dst (ReplicateI zero)); 3166 format %{ "pxor $dst,$dst\t! replicate4I zero)" %} 3167 ins_encode %{ 3168 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3169 %} 3170 ins_pipe( fpu_reg_reg ); 3171 %} 3172 3173 instruct Repl8I_zero(vecY dst, immI0 zero) %{ 3174 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && UseAVX < 3); 3175 match(Set dst (ReplicateI zero)); 3176 format %{ "vpxor $dst,$dst,$dst\t! replicate8I zero" %} 3177 ins_encode %{ 3178 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3179 int vector_len = 1; 3180 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3181 %} 3182 ins_pipe( fpu_reg_reg ); 3183 %} 3184 3185 // Replicate long (8 byte) scalar to be vector 3186 #ifdef _LP64 3187 instruct Repl4L(vecY dst, rRegL src) %{ 3188 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3189 match(Set dst (ReplicateL src)); 3190 format %{ "movdq $dst,$src\n\t" 3191 "punpcklqdq $dst,$dst\n\t" 3192 "vinserti128h $dst,$dst,$dst\t! replicate4L" %} 3193 ins_encode %{ 3194 __ movdq($dst$$XMMRegister, $src$$Register); 3195 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3196 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3197 %} 3198 ins_pipe( pipe_slow ); 3199 %} 3200 #else // _LP64 3201 instruct Repl4L(vecY dst, eRegL src, regD tmp) %{ 3202 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3203 match(Set dst (ReplicateL src)); 3204 effect(TEMP dst, USE src, TEMP tmp); 3205 format %{ "movdl $dst,$src.lo\n\t" 3206 "movdl $tmp,$src.hi\n\t" 3207 "punpckldq $dst,$tmp\n\t" 3208 "punpcklqdq $dst,$dst\n\t" 3209 "vinserti128h $dst,$dst,$dst\t! replicate4L" %} 3210 ins_encode %{ 3211 __ movdl($dst$$XMMRegister, $src$$Register); 3212 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3213 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3214 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3215 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3216 %} 3217 ins_pipe( pipe_slow ); 3218 %} 3219 #endif // _LP64 3220 3221 instruct Repl4L_imm(vecY dst, immL con) %{ 3222 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3223 match(Set dst (ReplicateL con)); 3224 format %{ "movq $dst,[$constantaddress]\n\t" 3225 "punpcklqdq $dst,$dst\n\t" 3226 "vinserti128h $dst,$dst,$dst\t! replicate4L($con)" %} 3227 ins_encode %{ 3228 __ movq($dst$$XMMRegister, $constantaddress($con)); 3229 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3230 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3231 %} 3232 ins_pipe( pipe_slow ); 3233 %} 3234 3235 instruct Repl4L_mem(vecY dst, memory mem) %{ 3236 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3237 match(Set dst (ReplicateL (LoadL mem))); 3238 format %{ "movq $dst,$mem\n\t" 3239 "punpcklqdq $dst,$dst\n\t" 3240 "vinserti128h $dst,$dst,$dst\t! replicate4L" %} 3241 ins_encode %{ 3242 __ movq($dst$$XMMRegister, $mem$$Address); 3243 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3244 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3245 %} 3246 ins_pipe( pipe_slow ); 3247 %} 3248 3249 instruct Repl8L_mem(vecZ dst, memory mem) %{ 3250 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && UseAVX < 3); 3251 match(Set dst (ReplicateL (LoadL mem))); 3252 format %{ "movq $dst,$mem\n\t" 3253 "punpcklqdq $dst,$dst\n\t" 3254 "vinserti128h $dst,$dst,$dst\t! lower replicate4L\n\t" 3255 "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate4L" %} 3256 ins_encode %{ 3257 __ movq($dst$$XMMRegister, $mem$$Address); 3258 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3259 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3260 __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3261 %} 3262 ins_pipe( pipe_slow ); 3263 %} 3264 3265 instruct Repl8F(vecY dst, regF src) %{ 3266 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3267 match(Set dst (ReplicateF src)); 3268 format %{ "pshufd $dst,$src,0x00\n\t" 3269 "vinsertf128h $dst,$dst,$dst\t! replicate8F" %} 3270 ins_encode %{ 3271 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3272 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3273 %} 3274 ins_pipe( pipe_slow ); 3275 %} 3276 3277 instruct Repl8F_mem(vecY dst, memory mem) %{ 3278 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3279 match(Set dst (ReplicateF (LoadF mem))); 3280 format %{ "pshufd $dst,$mem,0x00\n\t" 3281 "vinsertf128h $dst,$dst,$dst\t! replicate8F" %} 3282 ins_encode %{ 3283 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3284 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3285 %} 3286 ins_pipe( pipe_slow ); 3287 %} 3288 3289 instruct Repl4D(vecY dst, regD src) %{ 3290 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3291 match(Set dst (ReplicateD src)); 3292 format %{ "pshufd $dst,$src,0x44\n\t" 3293 "vinsertf128h $dst,$dst,$dst\t! replicate4D" %} 3294 ins_encode %{ 3295 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 3296 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3297 %} 3298 ins_pipe( pipe_slow ); 3299 %} 3300 3301 instruct Repl4D_mem(vecY dst, memory mem) %{ 3302 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3303 match(Set dst (ReplicateD (LoadD mem))); 3304 format %{ "pshufd $dst,$mem,0x44\n\t" 3305 "vinsertf128h $dst,$dst,$dst\t! replicate4D" %} 3306 ins_encode %{ 3307 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44); 3308 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3309 %} 3310 ins_pipe( pipe_slow ); 3311 %} 3312 3313 // ====================GENERIC REPLICATE========================================== 3314 3315 // Replicate byte scalar to be vector 3316 instruct Repl4B(vecS dst, rRegI src) %{ 3317 predicate(n->as_Vector()->length() == 4); 3318 match(Set dst (ReplicateB src)); 3319 format %{ "movd $dst,$src\n\t" 3320 "punpcklbw $dst,$dst\n\t" 3321 "pshuflw $dst,$dst,0x00\t! replicate4B" %} 3322 ins_encode %{ 3323 __ movdl($dst$$XMMRegister, $src$$Register); 3324 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3325 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3326 %} 3327 ins_pipe( pipe_slow ); 3328 %} 3329 3330 instruct Repl4B_mem(vecS dst, memory mem) %{ 3331 predicate(n->as_Vector()->length() == 4); 3332 match(Set dst (ReplicateB (LoadB mem))); 3333 format %{ "punpcklbw $dst,$mem\n\t" 3334 "pshuflw $dst,$dst,0x00\t! replicate4B" %} 3335 ins_encode %{ 3336 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 3337 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3338 %} 3339 ins_pipe( pipe_slow ); 3340 %} 3341 3342 instruct Repl8B(vecD dst, rRegI src) %{ 3343 predicate(n->as_Vector()->length() == 8); 3344 match(Set dst (ReplicateB src)); 3345 format %{ "movd $dst,$src\n\t" 3346 "punpcklbw $dst,$dst\n\t" 3347 "pshuflw $dst,$dst,0x00\t! replicate8B" %} 3348 ins_encode %{ 3349 __ movdl($dst$$XMMRegister, $src$$Register); 3350 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3351 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3352 %} 3353 ins_pipe( pipe_slow ); 3354 %} 3355 3356 instruct Repl8B_mem(vecD dst, memory mem) %{ 3357 predicate(n->as_Vector()->length() == 8); 3358 match(Set dst (ReplicateB (LoadB mem))); 3359 format %{ "punpcklbw $dst,$mem\n\t" 3360 "pshuflw $dst,$dst,0x00\t! replicate8B" %} 3361 ins_encode %{ 3362 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 3363 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3364 %} 3365 ins_pipe( pipe_slow ); 3366 %} 3367 3368 // Replicate byte scalar immediate to be vector by loading from const table. 3369 instruct Repl4B_imm(vecS dst, immI con) %{ 3370 predicate(n->as_Vector()->length() == 4); 3371 match(Set dst (ReplicateB con)); 3372 format %{ "movdl $dst,[$constantaddress]\t! replicate4B($con)" %} 3373 ins_encode %{ 3374 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 1))); 3375 %} 3376 ins_pipe( pipe_slow ); 3377 %} 3378 3379 instruct Repl8B_imm(vecD dst, immI con) %{ 3380 predicate(n->as_Vector()->length() == 8); 3381 match(Set dst (ReplicateB con)); 3382 format %{ "movq $dst,[$constantaddress]\t! replicate8B($con)" %} 3383 ins_encode %{ 3384 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3385 %} 3386 ins_pipe( pipe_slow ); 3387 %} 3388 3389 // Replicate byte scalar zero to be vector 3390 instruct Repl4B_zero(vecS dst, immI0 zero) %{ 3391 predicate(n->as_Vector()->length() == 4); 3392 match(Set dst (ReplicateB zero)); 3393 format %{ "pxor $dst,$dst\t! replicate4B zero" %} 3394 ins_encode %{ 3395 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3396 %} 3397 ins_pipe( fpu_reg_reg ); 3398 %} 3399 3400 instruct Repl8B_zero(vecD dst, immI0 zero) %{ 3401 predicate(n->as_Vector()->length() == 8); 3402 match(Set dst (ReplicateB zero)); 3403 format %{ "pxor $dst,$dst\t! replicate8B zero" %} 3404 ins_encode %{ 3405 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3406 %} 3407 ins_pipe( fpu_reg_reg ); 3408 %} 3409 3410 // Replicate char/short (2 byte) scalar to be vector 3411 instruct Repl2S(vecS dst, rRegI src) %{ 3412 predicate(n->as_Vector()->length() == 2); 3413 match(Set dst (ReplicateS src)); 3414 format %{ "movd $dst,$src\n\t" 3415 "pshuflw $dst,$dst,0x00\t! replicate2S" %} 3416 ins_encode %{ 3417 __ movdl($dst$$XMMRegister, $src$$Register); 3418 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3419 %} 3420 ins_pipe( fpu_reg_reg ); 3421 %} 3422 3423 instruct Repl4S(vecD dst, rRegI src) %{ 3424 predicate(n->as_Vector()->length() == 4); 3425 match(Set dst (ReplicateS src)); 3426 format %{ "movd $dst,$src\n\t" 3427 "pshuflw $dst,$dst,0x00\t! replicate4S" %} 3428 ins_encode %{ 3429 __ movdl($dst$$XMMRegister, $src$$Register); 3430 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3431 %} 3432 ins_pipe( fpu_reg_reg ); 3433 %} 3434 3435 instruct Repl4S_mem(vecD dst, memory mem) %{ 3436 predicate(n->as_Vector()->length() == 4); 3437 match(Set dst (ReplicateS (LoadS mem))); 3438 format %{ "pshuflw $dst,$mem,0x00\t! replicate4S" %} 3439 ins_encode %{ 3440 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3441 %} 3442 ins_pipe( fpu_reg_reg ); 3443 %} 3444 3445 // Replicate char/short (2 byte) scalar immediate to be vector by loading from const table. 3446 instruct Repl2S_imm(vecS dst, immI con) %{ 3447 predicate(n->as_Vector()->length() == 2); 3448 match(Set dst (ReplicateS con)); 3449 format %{ "movdl $dst,[$constantaddress]\t! replicate2S($con)" %} 3450 ins_encode %{ 3451 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 2))); 3452 %} 3453 ins_pipe( fpu_reg_reg ); 3454 %} 3455 3456 instruct Repl4S_imm(vecD dst, immI con) %{ 3457 predicate(n->as_Vector()->length() == 4); 3458 match(Set dst (ReplicateS con)); 3459 format %{ "movq $dst,[$constantaddress]\t! replicate4S($con)" %} 3460 ins_encode %{ 3461 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3462 %} 3463 ins_pipe( fpu_reg_reg ); 3464 %} 3465 3466 // Replicate char/short (2 byte) scalar zero to be vector 3467 instruct Repl2S_zero(vecS dst, immI0 zero) %{ 3468 predicate(n->as_Vector()->length() == 2); 3469 match(Set dst (ReplicateS zero)); 3470 format %{ "pxor $dst,$dst\t! replicate2S zero" %} 3471 ins_encode %{ 3472 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3473 %} 3474 ins_pipe( fpu_reg_reg ); 3475 %} 3476 3477 instruct Repl4S_zero(vecD dst, immI0 zero) %{ 3478 predicate(n->as_Vector()->length() == 4); 3479 match(Set dst (ReplicateS zero)); 3480 format %{ "pxor $dst,$dst\t! replicate4S zero" %} 3481 ins_encode %{ 3482 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3483 %} 3484 ins_pipe( fpu_reg_reg ); 3485 %} 3486 3487 // Replicate integer (4 byte) scalar to be vector 3488 instruct Repl2I(vecD dst, rRegI src) %{ 3489 predicate(n->as_Vector()->length() == 2); 3490 match(Set dst (ReplicateI src)); 3491 format %{ "movd $dst,$src\n\t" 3492 "pshufd $dst,$dst,0x00\t! replicate2I" %} 3493 ins_encode %{ 3494 __ movdl($dst$$XMMRegister, $src$$Register); 3495 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3496 %} 3497 ins_pipe( fpu_reg_reg ); 3498 %} 3499 3500 // Integer could be loaded into xmm register directly from memory. 3501 instruct Repl2I_mem(vecD dst, memory mem) %{ 3502 predicate(n->as_Vector()->length() == 2); 3503 match(Set dst (ReplicateI (LoadI mem))); 3504 format %{ "movd $dst,$mem\n\t" 3505 "pshufd $dst,$dst,0x00\t! replicate2I" %} 3506 ins_encode %{ 3507 __ movdl($dst$$XMMRegister, $mem$$Address); 3508 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3509 %} 3510 ins_pipe( fpu_reg_reg ); 3511 %} 3512 3513 // Replicate integer (4 byte) scalar immediate to be vector by loading from const table. 3514 instruct Repl2I_imm(vecD dst, immI con) %{ 3515 predicate(n->as_Vector()->length() == 2); 3516 match(Set dst (ReplicateI con)); 3517 format %{ "movq $dst,[$constantaddress]\t! replicate2I($con)" %} 3518 ins_encode %{ 3519 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3520 %} 3521 ins_pipe( fpu_reg_reg ); 3522 %} 3523 3524 // Replicate integer (4 byte) scalar zero to be vector 3525 instruct Repl2I_zero(vecD dst, immI0 zero) %{ 3526 predicate(n->as_Vector()->length() == 2); 3527 match(Set dst (ReplicateI zero)); 3528 format %{ "pxor $dst,$dst\t! replicate2I" %} 3529 ins_encode %{ 3530 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3531 %} 3532 ins_pipe( fpu_reg_reg ); 3533 %} 3534 3535 // Replicate long (8 byte) scalar to be vector 3536 #ifdef _LP64 3537 instruct Repl2L(vecX dst, rRegL src) %{ 3538 predicate(n->as_Vector()->length() == 2); 3539 match(Set dst (ReplicateL src)); 3540 format %{ "movdq $dst,$src\n\t" 3541 "punpcklqdq $dst,$dst\t! replicate2L" %} 3542 ins_encode %{ 3543 __ movdq($dst$$XMMRegister, $src$$Register); 3544 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3545 %} 3546 ins_pipe( pipe_slow ); 3547 %} 3548 #else // _LP64 3549 instruct Repl2L(vecX dst, eRegL src, regD tmp) %{ 3550 predicate(n->as_Vector()->length() == 2); 3551 match(Set dst (ReplicateL src)); 3552 effect(TEMP dst, USE src, TEMP tmp); 3553 format %{ "movdl $dst,$src.lo\n\t" 3554 "movdl $tmp,$src.hi\n\t" 3555 "punpckldq $dst,$tmp\n\t" 3556 "punpcklqdq $dst,$dst\t! replicate2L"%} 3557 ins_encode %{ 3558 __ movdl($dst$$XMMRegister, $src$$Register); 3559 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3560 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3561 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3562 %} 3563 ins_pipe( pipe_slow ); 3564 %} 3565 #endif // _LP64 3566 3567 // Replicate long (8 byte) scalar immediate to be vector by loading from const table. 3568 instruct Repl2L_imm(vecX dst, immL con) %{ 3569 predicate(n->as_Vector()->length() == 2); 3570 match(Set dst (ReplicateL con)); 3571 format %{ "movq $dst,[$constantaddress]\n\t" 3572 "punpcklqdq $dst,$dst\t! replicate2L($con)" %} 3573 ins_encode %{ 3574 __ movq($dst$$XMMRegister, $constantaddress($con)); 3575 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3576 %} 3577 ins_pipe( pipe_slow ); 3578 %} 3579 3580 // Long could be loaded into xmm register directly from memory. 3581 instruct Repl2L_mem(vecX dst, memory mem) %{ 3582 predicate(n->as_Vector()->length() == 2); 3583 match(Set dst (ReplicateL (LoadL mem))); 3584 format %{ "movq $dst,$mem\n\t" 3585 "punpcklqdq $dst,$dst\t! replicate2L" %} 3586 ins_encode %{ 3587 __ movq($dst$$XMMRegister, $mem$$Address); 3588 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3589 %} 3590 ins_pipe( pipe_slow ); 3591 %} 3592 3593 // Replicate long (8 byte) scalar zero to be vector 3594 instruct Repl2L_zero(vecX dst, immL0 zero) %{ 3595 predicate(n->as_Vector()->length() == 2); 3596 match(Set dst (ReplicateL zero)); 3597 format %{ "pxor $dst,$dst\t! replicate2L zero" %} 3598 ins_encode %{ 3599 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3600 %} 3601 ins_pipe( fpu_reg_reg ); 3602 %} 3603 3604 instruct Repl4L_zero(vecY dst, immL0 zero) %{ 3605 predicate(n->as_Vector()->length() == 4); 3606 match(Set dst (ReplicateL zero)); 3607 format %{ "vpxor $dst,$dst,$dst\t! replicate4L zero" %} 3608 ins_encode %{ 3609 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3610 int vector_len = 1; 3611 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3612 %} 3613 ins_pipe( fpu_reg_reg ); 3614 %} 3615 3616 // Replicate float (4 byte) scalar to be vector 3617 instruct Repl2F(vecD dst, regF src) %{ 3618 predicate(n->as_Vector()->length() == 2); 3619 match(Set dst (ReplicateF src)); 3620 format %{ "pshufd $dst,$dst,0x00\t! replicate2F" %} 3621 ins_encode %{ 3622 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3623 %} 3624 ins_pipe( fpu_reg_reg ); 3625 %} 3626 3627 instruct Repl2F_mem(vecD dst, memory mem) %{ 3628 predicate(n->as_Vector()->length() == 2); 3629 match(Set dst (ReplicateF (LoadF mem))); 3630 format %{ "pshufd $dst,$mem,0x00\t! replicate2F" %} 3631 ins_encode %{ 3632 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3633 %} 3634 ins_pipe( pipe_slow ); 3635 %} 3636 3637 instruct Repl4F(vecX dst, regF src) %{ 3638 predicate(n->as_Vector()->length() == 4); 3639 match(Set dst (ReplicateF src)); 3640 format %{ "pshufd $dst,$dst,0x00\t! replicate4F" %} 3641 ins_encode %{ 3642 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3643 %} 3644 ins_pipe( pipe_slow ); 3645 %} 3646 3647 instruct Repl4F_mem(vecX dst, memory mem) %{ 3648 predicate(n->as_Vector()->length() == 4); 3649 match(Set dst (ReplicateF (LoadF mem))); 3650 format %{ "pshufd $dst,$mem,0x00\t! replicate4F" %} 3651 ins_encode %{ 3652 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3653 %} 3654 ins_pipe( pipe_slow ); 3655 %} 3656 3657 // Replicate float (4 byte) scalar zero to be vector 3658 instruct Repl2F_zero(vecD dst, immF0 zero) %{ 3659 predicate(n->as_Vector()->length() == 2); 3660 match(Set dst (ReplicateF zero)); 3661 format %{ "xorps $dst,$dst\t! replicate2F zero" %} 3662 ins_encode %{ 3663 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 3664 %} 3665 ins_pipe( fpu_reg_reg ); 3666 %} 3667 3668 instruct Repl4F_zero(vecX dst, immF0 zero) %{ 3669 predicate(n->as_Vector()->length() == 4); 3670 match(Set dst (ReplicateF zero)); 3671 format %{ "xorps $dst,$dst\t! replicate4F zero" %} 3672 ins_encode %{ 3673 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 3674 %} 3675 ins_pipe( fpu_reg_reg ); 3676 %} 3677 3678 instruct Repl8F_zero(vecY dst, immF0 zero) %{ 3679 predicate(n->as_Vector()->length() == 8); 3680 match(Set dst (ReplicateF zero)); 3681 format %{ "vxorps $dst,$dst,$dst\t! replicate8F zero" %} 3682 ins_encode %{ 3683 int vector_len = 1; 3684 __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3685 %} 3686 ins_pipe( fpu_reg_reg ); 3687 %} 3688 3689 // Replicate double (8 bytes) scalar to be vector 3690 instruct Repl2D(vecX dst, regD src) %{ 3691 predicate(n->as_Vector()->length() == 2); 3692 match(Set dst (ReplicateD src)); 3693 format %{ "pshufd $dst,$src,0x44\t! replicate2D" %} 3694 ins_encode %{ 3695 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 3696 %} 3697 ins_pipe( pipe_slow ); 3698 %} 3699 3700 instruct Repl2D_mem(vecX dst, memory mem) %{ 3701 predicate(n->as_Vector()->length() == 2); 3702 match(Set dst (ReplicateD (LoadD mem))); 3703 format %{ "pshufd $dst,$mem,0x44\t! replicate2D" %} 3704 ins_encode %{ 3705 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44); 3706 %} 3707 ins_pipe( pipe_slow ); 3708 %} 3709 3710 // Replicate double (8 byte) scalar zero to be vector 3711 instruct Repl2D_zero(vecX dst, immD0 zero) %{ 3712 predicate(n->as_Vector()->length() == 2); 3713 match(Set dst (ReplicateD zero)); 3714 format %{ "xorpd $dst,$dst\t! replicate2D zero" %} 3715 ins_encode %{ 3716 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister); 3717 %} 3718 ins_pipe( fpu_reg_reg ); 3719 %} 3720 3721 instruct Repl4D_zero(vecY dst, immD0 zero) %{ 3722 predicate(n->as_Vector()->length() == 4); 3723 match(Set dst (ReplicateD zero)); 3724 format %{ "vxorpd $dst,$dst,$dst,vect256\t! replicate4D zero" %} 3725 ins_encode %{ 3726 int vector_len = 1; 3727 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3728 %} 3729 ins_pipe( fpu_reg_reg ); 3730 %} 3731 3732 // ====================EVEX REPLICATE============================================= 3733 3734 // Note: some of the legacy forms are applicable to EVEX 3735 3736 instruct Repl16B_evex(vecX dst, rRegI src) %{ 3737 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 3738 match(Set dst (ReplicateB src)); 3739 format %{ "vpbroadcastb $dst,$src\t! replicate16B" %} 3740 ins_encode %{ 3741 int vector_len = 0; 3742 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 3743 %} 3744 ins_pipe( pipe_slow ); 3745 %} 3746 3747 instruct Repl16B_mem_evex(vecX dst, memory mem) %{ 3748 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 3749 match(Set dst (ReplicateB (LoadB mem))); 3750 format %{ "vpbroadcastb $dst,$mem\t! replicate16B" %} 3751 ins_encode %{ 3752 int vector_len = 0; 3753 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 3754 %} 3755 ins_pipe( pipe_slow ); 3756 %} 3757 3758 instruct Repl32B_evex(vecY dst, rRegI src) %{ 3759 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw()); 3760 match(Set dst (ReplicateB src)); 3761 format %{ "vpbroadcastb $dst,$src\t! replicate32B" %} 3762 ins_encode %{ 3763 int vector_len = 1; 3764 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 3765 %} 3766 ins_pipe( pipe_slow ); 3767 %} 3768 3769 instruct Repl32B_mem_evex(vecY dst, memory mem) %{ 3770 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw()); 3771 match(Set dst (ReplicateB (LoadB mem))); 3772 format %{ "vpbroadcastb $dst,$mem\t! replicate32B" %} 3773 ins_encode %{ 3774 int vector_len = 1; 3775 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 3776 %} 3777 ins_pipe( pipe_slow ); 3778 %} 3779 3780 instruct Repl64B_evex(vecZ dst, rRegI src) %{ 3781 predicate(n->as_Vector()->length() == 64 && UseAVX > 2); 3782 match(Set dst (ReplicateB src)); 3783 format %{ "vpbroadcastb $dst,$src\t! upper replicate64B" %} 3784 ins_encode %{ 3785 int vector_len = 2; 3786 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 3787 %} 3788 ins_pipe( pipe_slow ); 3789 %} 3790 3791 instruct Repl64B_mem_evex(vecZ dst, memory mem) %{ 3792 predicate(n->as_Vector()->length() == 64 && VM_Version::supports_avx512vlbw()); 3793 match(Set dst (ReplicateB (LoadB mem))); 3794 format %{ "vpbroadcastb $dst,$mem\t! replicate64B" %} 3795 ins_encode %{ 3796 int vector_len = 2; 3797 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 3798 %} 3799 ins_pipe( pipe_slow ); 3800 %} 3801 3802 instruct Repl16B_imm_evex(vecX dst, immI con) %{ 3803 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 3804 match(Set dst (ReplicateB con)); 3805 format %{ "movq $dst,[$constantaddress]\n\t" 3806 "vpbroadcastb $dst,$dst\t! replicate16B" %} 3807 ins_encode %{ 3808 int vector_len = 0; 3809 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3810 __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3811 %} 3812 ins_pipe( pipe_slow ); 3813 %} 3814 3815 instruct Repl32B_imm_evex(vecY dst, immI con) %{ 3816 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw()); 3817 match(Set dst (ReplicateB con)); 3818 format %{ "movq $dst,[$constantaddress]\n\t" 3819 "vpbroadcastb $dst,$dst\t! replicate32B" %} 3820 ins_encode %{ 3821 int vector_len = 1; 3822 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3823 __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3824 %} 3825 ins_pipe( pipe_slow ); 3826 %} 3827 3828 instruct Repl64B_imm_evex(vecZ dst, immI con) %{ 3829 predicate(n->as_Vector()->length() == 64 && UseAVX > 2); 3830 match(Set dst (ReplicateB con)); 3831 format %{ "movq $dst,[$constantaddress]\n\t" 3832 "vpbroadcastb $dst,$dst\t! upper replicate64B" %} 3833 ins_encode %{ 3834 int vector_len = 2; 3835 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3836 __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3837 %} 3838 ins_pipe( pipe_slow ); 3839 ins_pipe( pipe_slow ); 3840 %} 3841 3842 instruct Repl64B_zero_evex(vecZ dst, immI0 zero) %{ 3843 predicate(n->as_Vector()->length() == 64 && UseAVX > 2); 3844 match(Set dst (ReplicateB zero)); 3845 format %{ "vpxor $dst k0,$dst,$dst\t! replicate64B zero" %} 3846 ins_encode %{ 3847 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 3848 int vector_len = 2; 3849 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3850 %} 3851 ins_pipe( fpu_reg_reg ); 3852 %} 3853 3854 instruct Repl8S_evex(vecX dst, rRegI src) %{ 3855 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); 3856 match(Set dst (ReplicateS src)); 3857 format %{ "vpbroadcastw $dst,$src\t! replicate8S" %} 3858 ins_encode %{ 3859 int vector_len = 0; 3860 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 3861 %} 3862 ins_pipe( pipe_slow ); 3863 %} 3864 3865 instruct Repl8S_mem_evex(vecX dst, memory mem) %{ 3866 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); 3867 match(Set dst (ReplicateS (LoadS mem))); 3868 format %{ "vpbroadcastw $dst,$mem\t! replicate8S" %} 3869 ins_encode %{ 3870 int vector_len = 0; 3871 __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 3872 %} 3873 ins_pipe( pipe_slow ); 3874 %} 3875 3876 instruct Repl16S_evex(vecY dst, rRegI src) %{ 3877 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 3878 match(Set dst (ReplicateS src)); 3879 format %{ "vpbroadcastw $dst,$src\t! replicate16S" %} 3880 ins_encode %{ 3881 int vector_len = 1; 3882 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 3883 %} 3884 ins_pipe( pipe_slow ); 3885 %} 3886 3887 instruct Repl16S_mem_evex(vecY dst, memory mem) %{ 3888 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 3889 match(Set dst (ReplicateS (LoadS mem))); 3890 format %{ "vpbroadcastw $dst,$mem\t! replicate16S" %} 3891 ins_encode %{ 3892 int vector_len = 1; 3893 __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 3894 %} 3895 ins_pipe( pipe_slow ); 3896 %} 3897 3898 instruct Repl32S_evex(vecZ dst, rRegI src) %{ 3899 predicate(n->as_Vector()->length() == 32 && UseAVX > 2); 3900 match(Set dst (ReplicateS src)); 3901 format %{ "vpbroadcastw $dst,$src\t! replicate32S" %} 3902 ins_encode %{ 3903 int vector_len = 2; 3904 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 3905 %} 3906 ins_pipe( pipe_slow ); 3907 %} 3908 3909 instruct Repl32S_mem_evex(vecZ dst, memory mem) %{ 3910 predicate(n->as_Vector()->length() == 32 && UseAVX > 2); 3911 match(Set dst (ReplicateS (LoadS mem))); 3912 format %{ "vpbroadcastw $dst,$mem\t! replicate32S" %} 3913 ins_encode %{ 3914 int vector_len = 2; 3915 __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 3916 %} 3917 ins_pipe( pipe_slow ); 3918 %} 3919 3920 instruct Repl8S_imm_evex(vecX dst, immI con) %{ 3921 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); 3922 match(Set dst (ReplicateS con)); 3923 format %{ "movq $dst,[$constantaddress]\n\t" 3924 "vpbroadcastw $dst,$dst\t! replicate8S" %} 3925 ins_encode %{ 3926 int vector_len = 0; 3927 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3928 __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3929 %} 3930 ins_pipe( pipe_slow ); 3931 %} 3932 3933 instruct Repl16S_imm_evex(vecY dst, immI con) %{ 3934 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 3935 match(Set dst (ReplicateS con)); 3936 format %{ "movq $dst,[$constantaddress]\n\t" 3937 "vpbroadcastw $dst,$dst\t! replicate16S" %} 3938 ins_encode %{ 3939 int vector_len = 1; 3940 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3941 __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3942 %} 3943 ins_pipe( pipe_slow ); 3944 %} 3945 3946 instruct Repl32S_imm_evex(vecZ dst, immI con) %{ 3947 predicate(n->as_Vector()->length() == 32 && UseAVX > 2); 3948 match(Set dst (ReplicateS con)); 3949 format %{ "movq $dst,[$constantaddress]\n\t" 3950 "vpbroadcastw $dst,$dst\t! replicate32S" %} 3951 ins_encode %{ 3952 int vector_len = 2; 3953 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3954 __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3955 %} 3956 ins_pipe( pipe_slow ); 3957 %} 3958 3959 instruct Repl32S_zero_evex(vecZ dst, immI0 zero) %{ 3960 predicate(n->as_Vector()->length() == 32 && UseAVX > 2); 3961 match(Set dst (ReplicateS zero)); 3962 format %{ "vpxor $dst k0,$dst,$dst\t! replicate32S zero" %} 3963 ins_encode %{ 3964 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 3965 int vector_len = 2; 3966 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3967 %} 3968 ins_pipe( fpu_reg_reg ); 3969 %} 3970 3971 instruct Repl4I_evex(vecX dst, rRegI src) %{ 3972 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 3973 match(Set dst (ReplicateI src)); 3974 format %{ "vpbroadcastd $dst,$src\t! replicate4I" %} 3975 ins_encode %{ 3976 int vector_len = 0; 3977 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 3978 %} 3979 ins_pipe( pipe_slow ); 3980 %} 3981 3982 instruct Repl4I_mem_evex(vecX dst, memory mem) %{ 3983 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 3984 match(Set dst (ReplicateI (LoadI mem))); 3985 format %{ "vpbroadcastd $dst,$mem\t! replicate4I" %} 3986 ins_encode %{ 3987 int vector_len = 0; 3988 __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 3989 %} 3990 ins_pipe( pipe_slow ); 3991 %} 3992 3993 instruct Repl8I_evex(vecY dst, rRegI src) %{ 3994 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 3995 match(Set dst (ReplicateI src)); 3996 format %{ "vpbroadcastd $dst,$src\t! replicate8I" %} 3997 ins_encode %{ 3998 int vector_len = 1; 3999 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 4000 %} 4001 ins_pipe( pipe_slow ); 4002 %} 4003 4004 instruct Repl8I_mem_evex(vecY dst, memory mem) %{ 4005 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4006 match(Set dst (ReplicateI (LoadI mem))); 4007 format %{ "vpbroadcastd $dst,$mem\t! replicate8I" %} 4008 ins_encode %{ 4009 int vector_len = 1; 4010 __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 4011 %} 4012 ins_pipe( pipe_slow ); 4013 %} 4014 4015 instruct Repl16I_evex(vecZ dst, rRegI src) %{ 4016 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4017 match(Set dst (ReplicateI src)); 4018 format %{ "vpbroadcastd $dst,$src\t! replicate16I" %} 4019 ins_encode %{ 4020 int vector_len = 2; 4021 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 4022 %} 4023 ins_pipe( pipe_slow ); 4024 %} 4025 4026 instruct Repl16I_mem_evex(vecZ dst, memory mem) %{ 4027 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4028 match(Set dst (ReplicateI (LoadI mem))); 4029 format %{ "vpbroadcastd $dst,$mem\t! replicate16I" %} 4030 ins_encode %{ 4031 int vector_len = 2; 4032 __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 4033 %} 4034 ins_pipe( pipe_slow ); 4035 %} 4036 4037 instruct Repl4I_imm_evex(vecX dst, immI con) %{ 4038 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4039 match(Set dst (ReplicateI con)); 4040 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 4041 "vpbroadcastd $dst,$dst\t! replicate4I" %} 4042 ins_encode %{ 4043 int vector_len = 0; 4044 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4045 __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4046 %} 4047 ins_pipe( pipe_slow ); 4048 %} 4049 4050 instruct Repl8I_imm_evex(vecY dst, immI con) %{ 4051 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4052 match(Set dst (ReplicateI con)); 4053 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 4054 "vpbroadcastd $dst,$dst\t! replicate8I" %} 4055 ins_encode %{ 4056 int vector_len = 1; 4057 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4058 __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4059 %} 4060 ins_pipe( pipe_slow ); 4061 %} 4062 4063 instruct Repl16I_imm_evex(vecZ dst, immI con) %{ 4064 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4065 match(Set dst (ReplicateI con)); 4066 format %{ "movq $dst,[$constantaddress]\t! replicate16I($con)\n\t" 4067 "vpbroadcastd $dst,$dst\t! replicate16I" %} 4068 ins_encode %{ 4069 int vector_len = 2; 4070 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4071 __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4072 %} 4073 ins_pipe( pipe_slow ); 4074 %} 4075 4076 instruct Repl16I_zero_evex(vecZ dst, immI0 zero) %{ 4077 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4078 match(Set dst (ReplicateI zero)); 4079 format %{ "vpxor $dst k0,$dst,$dst\t! replicate16I zero" %} 4080 ins_encode %{ 4081 // Use vxorpd since AVX does not have vpxor for 512-bit (AVX2 will have it). 4082 int vector_len = 2; 4083 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4084 %} 4085 ins_pipe( fpu_reg_reg ); 4086 %} 4087 4088 // Replicate long (8 byte) scalar to be vector 4089 #ifdef _LP64 4090 instruct Repl4L_evex(vecY dst, rRegL src) %{ 4091 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4092 match(Set dst (ReplicateL src)); 4093 format %{ "vpbroadcastq $dst,$src\t! replicate4L" %} 4094 ins_encode %{ 4095 int vector_len = 1; 4096 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vector_len); 4097 %} 4098 ins_pipe( pipe_slow ); 4099 %} 4100 4101 instruct Repl8L_evex(vecZ dst, rRegL src) %{ 4102 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4103 match(Set dst (ReplicateL src)); 4104 format %{ "vpbroadcastq $dst,$src\t! replicate8L" %} 4105 ins_encode %{ 4106 int vector_len = 2; 4107 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vector_len); 4108 %} 4109 ins_pipe( pipe_slow ); 4110 %} 4111 #else // _LP64 4112 instruct Repl4L_evex(vecY dst, eRegL src, regD tmp) %{ 4113 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4114 match(Set dst (ReplicateL src)); 4115 effect(TEMP dst, USE src, TEMP tmp); 4116 format %{ "movdl $dst,$src.lo\n\t" 4117 "movdl $tmp,$src.hi\n\t" 4118 "punpckldq $dst,$tmp\n\t" 4119 "vpbroadcastq $dst,$dst\t! replicate4L" %} 4120 ins_encode %{ 4121 int vector_len = 1; 4122 __ movdl($dst$$XMMRegister, $src$$Register); 4123 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4124 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4125 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4126 %} 4127 ins_pipe( pipe_slow ); 4128 %} 4129 4130 instruct Repl8L_evex(vecZ dst, eRegL src, regD tmp) %{ 4131 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4132 match(Set dst (ReplicateL src)); 4133 effect(TEMP dst, USE src, TEMP tmp); 4134 format %{ "movdl $dst,$src.lo\n\t" 4135 "movdl $tmp,$src.hi\n\t" 4136 "punpckldq $dst,$tmp\n\t" 4137 "vpbroadcastq $dst,$dst\t! replicate8L" %} 4138 ins_encode %{ 4139 int vector_len = 2; 4140 __ movdl($dst$$XMMRegister, $src$$Register); 4141 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4142 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4143 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4144 %} 4145 ins_pipe( pipe_slow ); 4146 %} 4147 #endif // _LP64 4148 4149 instruct Repl4L_imm_evex(vecY dst, immL con) %{ 4150 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4151 match(Set dst (ReplicateL con)); 4152 format %{ "movq $dst,[$constantaddress]\n\t" 4153 "vpbroadcastq $dst,$dst\t! replicate4L" %} 4154 ins_encode %{ 4155 int vector_len = 1; 4156 __ movq($dst$$XMMRegister, $constantaddress($con)); 4157 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4158 %} 4159 ins_pipe( pipe_slow ); 4160 %} 4161 4162 instruct Repl8L_imm_evex(vecZ dst, immL con) %{ 4163 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4164 match(Set dst (ReplicateL con)); 4165 format %{ "movq $dst,[$constantaddress]\n\t" 4166 "vpbroadcastq $dst,$dst\t! replicate8L" %} 4167 ins_encode %{ 4168 int vector_len = 2; 4169 __ movq($dst$$XMMRegister, $constantaddress($con)); 4170 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4171 %} 4172 ins_pipe( pipe_slow ); 4173 %} 4174 4175 instruct Repl4L_mem_evex(vecY dst, memory mem) %{ 4176 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4177 match(Set dst (ReplicateL (LoadL mem))); 4178 format %{ "vpbroadcastd $dst,$mem\t! replicate4L" %} 4179 ins_encode %{ 4180 int vector_len = 1; 4181 __ evpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4182 %} 4183 ins_pipe( pipe_slow ); 4184 %} 4185 4186 instruct Repl8L_mem_evex(vecZ dst, memory mem) %{ 4187 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4188 match(Set dst (ReplicateL (LoadL mem))); 4189 format %{ "vpbroadcastd $dst,$mem\t! replicate8L" %} 4190 ins_encode %{ 4191 int vector_len = 2; 4192 __ evpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4193 %} 4194 ins_pipe( pipe_slow ); 4195 %} 4196 4197 instruct Repl8L_zero_evex(vecZ dst, immL0 zero) %{ 4198 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4199 match(Set dst (ReplicateL zero)); 4200 format %{ "vpxor $dst k0,$dst,$dst\t! replicate8L zero" %} 4201 ins_encode %{ 4202 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 4203 int vector_len = 2; 4204 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4205 %} 4206 ins_pipe( fpu_reg_reg ); 4207 %} 4208 4209 instruct Repl8F_evex(vecY dst, regF src) %{ 4210 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4211 match(Set dst (ReplicateF src)); 4212 format %{ "vbroadcastss $dst,$src\t! replicate8F" %} 4213 ins_encode %{ 4214 int vector_len = 1; 4215 __ evpbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4216 %} 4217 ins_pipe( pipe_slow ); 4218 %} 4219 4220 instruct Repl8F_mem_evex(vecY dst, memory mem) %{ 4221 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4222 match(Set dst (ReplicateF (LoadF mem))); 4223 format %{ "vbroadcastss $dst,$mem\t! replicate8F" %} 4224 ins_encode %{ 4225 int vector_len = 1; 4226 __ evpbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len); 4227 %} 4228 ins_pipe( pipe_slow ); 4229 %} 4230 4231 instruct Repl16F_evex(vecZ dst, regF src) %{ 4232 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4233 match(Set dst (ReplicateF src)); 4234 format %{ "vbroadcastss $dst,$src\t! replicate16F" %} 4235 ins_encode %{ 4236 int vector_len = 2; 4237 __ evpbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4238 %} 4239 ins_pipe( pipe_slow ); 4240 %} 4241 4242 instruct Repl16F_mem_evex(vecZ dst, memory mem) %{ 4243 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4244 match(Set dst (ReplicateF (LoadF mem))); 4245 format %{ "vbroadcastss $dst,$mem\t! replicate16F" %} 4246 ins_encode %{ 4247 int vector_len = 2; 4248 __ evpbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len); 4249 %} 4250 ins_pipe( pipe_slow ); 4251 %} 4252 4253 instruct Repl16F_zero_evex(vecZ dst, immF0 zero) %{ 4254 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4255 match(Set dst (ReplicateF zero)); 4256 format %{ "vxorps $dst k0,$dst,$dst\t! replicate16F zero" %} 4257 ins_encode %{ 4258 int vector_len = 2; 4259 __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4260 %} 4261 ins_pipe( fpu_reg_reg ); 4262 %} 4263 4264 instruct Repl4D_evex(vecY dst, regD src) %{ 4265 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4266 match(Set dst (ReplicateD src)); 4267 format %{ "vbroadcastsd $dst,$src\t! replicate4D" %} 4268 ins_encode %{ 4269 int vector_len = 1; 4270 __ evpbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4271 %} 4272 ins_pipe( pipe_slow ); 4273 %} 4274 4275 instruct Repl4D_mem_evex(vecY dst, memory mem) %{ 4276 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4277 match(Set dst (ReplicateD (LoadD mem))); 4278 format %{ "vbroadcastsd $dst,$mem\t! replicate4D" %} 4279 ins_encode %{ 4280 int vector_len = 1; 4281 __ evpbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len); 4282 %} 4283 ins_pipe( pipe_slow ); 4284 %} 4285 4286 instruct Repl8D_evex(vecZ dst, regD src) %{ 4287 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4288 match(Set dst (ReplicateD src)); 4289 format %{ "vbroadcastsd $dst,$src\t! replicate8D" %} 4290 ins_encode %{ 4291 int vector_len = 2; 4292 __ evpbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4293 %} 4294 ins_pipe( pipe_slow ); 4295 %} 4296 4297 instruct Repl8D_mem_evex(vecZ dst, memory mem) %{ 4298 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4299 match(Set dst (ReplicateD (LoadD mem))); 4300 format %{ "vbroadcastsd $dst,$mem\t! replicate8D" %} 4301 ins_encode %{ 4302 int vector_len = 2; 4303 __ evpbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len); 4304 %} 4305 ins_pipe( pipe_slow ); 4306 %} 4307 4308 instruct Repl8D_zero_evex(vecZ dst, immD0 zero) %{ 4309 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4310 match(Set dst (ReplicateD zero)); 4311 format %{ "vxorpd $dst k0,$dst,$dst,vect512\t! replicate8D zero" %} 4312 ins_encode %{ 4313 int vector_len = 2; 4314 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4315 %} 4316 ins_pipe( fpu_reg_reg ); 4317 %} 4318 4319 // ====================REDUCTION ARITHMETIC======================================= 4320 4321 instruct rsadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 4322 predicate(UseSSE > 2 && UseAVX == 0); 4323 match(Set dst (AddReductionVI src1 src2)); 4324 effect(TEMP tmp2, TEMP tmp); 4325 format %{ "movdqu $tmp2,$src2\n\t" 4326 "phaddd $tmp2,$tmp2\n\t" 4327 "movd $tmp,$src1\n\t" 4328 "paddd $tmp,$tmp2\n\t" 4329 "movd $dst,$tmp\t! add reduction2I" %} 4330 ins_encode %{ 4331 __ movdqu($tmp2$$XMMRegister, $src2$$XMMRegister); 4332 __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister); 4333 __ movdl($tmp$$XMMRegister, $src1$$Register); 4334 __ paddd($tmp$$XMMRegister, $tmp2$$XMMRegister); 4335 __ movdl($dst$$Register, $tmp$$XMMRegister); 4336 %} 4337 ins_pipe( pipe_slow ); 4338 %} 4339 4340 instruct rvadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 4341 predicate(UseAVX > 0 && UseAVX < 3); 4342 match(Set dst (AddReductionVI src1 src2)); 4343 effect(TEMP tmp, TEMP tmp2); 4344 format %{ "vphaddd $tmp,$src2,$src2\n\t" 4345 "movd $tmp2,$src1\n\t" 4346 "vpaddd $tmp2,$tmp2,$tmp\n\t" 4347 "movd $dst,$tmp2\t! add reduction2I" %} 4348 ins_encode %{ 4349 int vector_len = 0; 4350 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 4351 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4352 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 4353 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4354 %} 4355 ins_pipe( pipe_slow ); 4356 %} 4357 4358 instruct rvadd2I_reduction_reg_evex(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 4359 predicate(UseAVX > 2); 4360 match(Set dst (AddReductionVI src1 src2)); 4361 effect(TEMP tmp, TEMP tmp2); 4362 format %{ "pshufd $tmp2,$src2,0x1\n\t" 4363 "vpaddd $tmp,$src2,$tmp2\n\t" 4364 "movd $tmp2,$src1\n\t" 4365 "vpaddd $tmp2,$tmp,$tmp2\n\t" 4366 "movd $dst,$tmp2\t! add reduction2I" %} 4367 ins_encode %{ 4368 int vector_len = 0; 4369 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 4370 __ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4371 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4372 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4373 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4374 %} 4375 ins_pipe( pipe_slow ); 4376 %} 4377 4378 instruct rsadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 4379 predicate(UseSSE > 2 && UseAVX == 0); 4380 match(Set dst (AddReductionVI src1 src2)); 4381 effect(TEMP tmp2, TEMP tmp); 4382 format %{ "movdqu $tmp2,$src2\n\t" 4383 "phaddd $tmp2,$tmp2\n\t" 4384 "phaddd $tmp2,$tmp2\n\t" 4385 "movd $tmp,$src1\n\t" 4386 "paddd $tmp,$tmp2\n\t" 4387 "movd $dst,$tmp\t! add reduction4I" %} 4388 ins_encode %{ 4389 __ movdqu($tmp2$$XMMRegister, $src2$$XMMRegister); 4390 __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister); 4391 __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister); 4392 __ movdl($tmp$$XMMRegister, $src1$$Register); 4393 __ paddd($tmp$$XMMRegister, $tmp2$$XMMRegister); 4394 __ movdl($dst$$Register, $tmp$$XMMRegister); 4395 %} 4396 ins_pipe( pipe_slow ); 4397 %} 4398 4399 instruct rvadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 4400 predicate(UseAVX > 0 && UseAVX < 3); 4401 match(Set dst (AddReductionVI src1 src2)); 4402 effect(TEMP tmp, TEMP tmp2); 4403 format %{ "vphaddd $tmp,$src2,$src2\n\t" 4404 "vphaddd $tmp,$tmp,$tmp2\n\t" 4405 "movd $tmp2,$src1\n\t" 4406 "vpaddd $tmp2,$tmp2,$tmp\n\t" 4407 "movd $dst,$tmp2\t! add reduction4I" %} 4408 ins_encode %{ 4409 int vector_len = 0; 4410 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 4411 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4412 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4413 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 4414 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4415 %} 4416 ins_pipe( pipe_slow ); 4417 %} 4418 4419 instruct rvadd4I_reduction_reg_evex(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 4420 predicate(UseAVX > 2); 4421 match(Set dst (AddReductionVI src1 src2)); 4422 effect(TEMP tmp, TEMP tmp2); 4423 format %{ "pshufd $tmp2,$src2,0xE\n\t" 4424 "vpaddd $tmp,$src2,$tmp2\n\t" 4425 "pshufd $tmp2,$tmp,0x1\n\t" 4426 "vpaddd $tmp,$tmp,$tmp2\n\t" 4427 "movd $tmp2,$src1\n\t" 4428 "vpaddd $tmp2,$tmp,$tmp2\n\t" 4429 "movd $dst,$tmp2\t! add reduction4I" %} 4430 ins_encode %{ 4431 int vector_len = 0; 4432 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 4433 __ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4434 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 4435 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4436 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4437 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4438 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4439 %} 4440 ins_pipe( pipe_slow ); 4441 %} 4442 4443 instruct rvadd8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 4444 predicate(UseAVX > 0 && UseAVX < 3); 4445 match(Set dst (AddReductionVI src1 src2)); 4446 effect(TEMP tmp, TEMP tmp2); 4447 format %{ "vphaddd $tmp,$src2,$src2\n\t" 4448 "vphaddd $tmp,$tmp,$tmp2\n\t" 4449 "vextracti128 $tmp2,$tmp\n\t" 4450 "vpaddd $tmp,$tmp,$tmp2\n\t" 4451 "movd $tmp2,$src1\n\t" 4452 "vpaddd $tmp2,$tmp2,$tmp\n\t" 4453 "movd $dst,$tmp2\t! add reduction8I" %} 4454 ins_encode %{ 4455 int vector_len = 1; 4456 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 4457 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4458 __ vextracti128h($tmp2$$XMMRegister, $tmp$$XMMRegister); 4459 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4460 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4461 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4462 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4463 %} 4464 ins_pipe( pipe_slow ); 4465 %} 4466 4467 instruct rvadd8I_reduction_reg_evex(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 4468 predicate(UseAVX > 2); 4469 match(Set dst (AddReductionVI src1 src2)); 4470 effect(TEMP tmp, TEMP tmp2); 4471 format %{ "vextracti128 $tmp,$src2\n\t" 4472 "vpaddd $tmp,$tmp,$src2\n\t" 4473 "pshufd $tmp2,$tmp,0xE\n\t" 4474 "vpaddd $tmp,$tmp,$tmp2\n\t" 4475 "pshufd $tmp2,$tmp,0x1\n\t" 4476 "vpaddd $tmp,$tmp,$tmp2\n\t" 4477 "movd $tmp2,$src1\n\t" 4478 "vpaddd $tmp2,$tmp,$tmp2\n\t" 4479 "movd $dst,$tmp2\t! add reduction8I" %} 4480 ins_encode %{ 4481 int vector_len = 0; 4482 __ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister); 4483 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 4484 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 4485 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4486 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 4487 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4488 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4489 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4490 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4491 %} 4492 ins_pipe( pipe_slow ); 4493 %} 4494 4495 instruct rvadd16I_reduction_reg_evex(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ 4496 predicate(UseAVX > 2); 4497 match(Set dst (AddReductionVI src1 src2)); 4498 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 4499 format %{ "vextracti64x4 $tmp3,$src2\n\t" 4500 "vpaddd $tmp3,$tmp3,$src2\n\t" 4501 "vextracti128 $tmp,$tmp3\n\t" 4502 "vpaddd $tmp,$tmp,$tmp3\n\t" 4503 "pshufd $tmp2,$tmp,0xE\n\t" 4504 "vpaddd $tmp,$tmp,$tmp2\n\t" 4505 "pshufd $tmp2,$tmp,0x1\n\t" 4506 "vpaddd $tmp,$tmp,$tmp2\n\t" 4507 "movd $tmp2,$src1\n\t" 4508 "vpaddd $tmp2,$tmp,$tmp2\n\t" 4509 "movd $dst,$tmp2\t! mul reduction16I" %} 4510 ins_encode %{ 4511 __ vextracti64x4h($tmp3$$XMMRegister, $src2$$XMMRegister); 4512 __ vpaddd($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); 4513 __ vextracti128h($tmp$$XMMRegister, $tmp3$$XMMRegister); 4514 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); 4515 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 4516 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4517 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 4518 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4519 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4520 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4521 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4522 %} 4523 ins_pipe( pipe_slow ); 4524 %} 4525 4526 #ifdef _LP64 4527 instruct rvadd2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, regF tmp, regF tmp2) %{ 4528 predicate(UseAVX > 2); 4529 match(Set dst (AddReductionVL src1 src2)); 4530 effect(TEMP tmp, TEMP tmp2); 4531 format %{ "pshufd $tmp2,$src2,0xE\n\t" 4532 "vpaddq $tmp,$src2,$tmp2\n\t" 4533 "movdq $tmp2,$src1\n\t" 4534 "vpaddq $tmp2,$tmp,$tmp2\n\t" 4535 "movdq $dst,$tmp2\t! add reduction2L" %} 4536 ins_encode %{ 4537 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 4538 __ vpaddq($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, 0); 4539 __ movdq($tmp2$$XMMRegister, $src1$$Register); 4540 __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4541 __ movdq($dst$$Register, $tmp2$$XMMRegister); 4542 %} 4543 ins_pipe( pipe_slow ); 4544 %} 4545 4546 instruct rvadd4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{ 4547 predicate(UseAVX > 2); 4548 match(Set dst (AddReductionVL src1 src2)); 4549 effect(TEMP tmp, TEMP tmp2); 4550 format %{ "vextracti64x2 $tmp,$src2, 0x1\n\t" 4551 "vpaddq $tmp2,$tmp,$src2\n\t" 4552 "pshufd $tmp,$tmp2,0xE\n\t" 4553 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4554 "movdq $tmp,$src1\n\t" 4555 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4556 "movdq $dst,$tmp2\t! add reduction4L" %} 4557 ins_encode %{ 4558 __ vextracti64x2h($tmp$$XMMRegister, $src2$$XMMRegister, 0x1); 4559 __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); 4560 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 4561 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4562 __ movdq($tmp$$XMMRegister, $src1$$Register); 4563 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4564 __ movdq($dst$$Register, $tmp2$$XMMRegister); 4565 %} 4566 ins_pipe( pipe_slow ); 4567 %} 4568 4569 instruct rvadd8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{ 4570 predicate(UseAVX > 2); 4571 match(Set dst (AddReductionVL src1 src2)); 4572 effect(TEMP tmp, TEMP tmp2); 4573 format %{ "vextracti64x4 $tmp2,$src2\n\t" 4574 "vpaddq $tmp2,$tmp2,$src2\n\t" 4575 "vextracti128 $tmp,$tmp2\n\t" 4576 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4577 "pshufd $tmp,$tmp2,0xE\n\t" 4578 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4579 "movdq $tmp,$src1\n\t" 4580 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4581 "movdq $dst,$tmp2\t! add reduction8L" %} 4582 ins_encode %{ 4583 __ vextracti64x4h($tmp2$$XMMRegister, $src2$$XMMRegister); 4584 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 4585 __ vextracti128h($tmp$$XMMRegister, $tmp2$$XMMRegister); 4586 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4587 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 4588 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4589 __ movdq($tmp$$XMMRegister, $src1$$Register); 4590 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4591 __ movdq($dst$$Register, $tmp2$$XMMRegister); 4592 %} 4593 ins_pipe( pipe_slow ); 4594 %} 4595 #endif 4596 4597 instruct rsadd2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{ 4598 predicate(UseSSE >= 1 && UseAVX == 0); 4599 match(Set dst (AddReductionVF src1 src2)); 4600 effect(TEMP tmp, TEMP tmp2); 4601 format %{ "movdqu $tmp,$src1\n\t" 4602 "addss $tmp,$src2\n\t" 4603 "pshufd $tmp2,$src2,0x01\n\t" 4604 "addss $tmp,$tmp2\n\t" 4605 "movdqu $dst,$tmp\t! add reduction2F" %} 4606 ins_encode %{ 4607 __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); 4608 __ addss($tmp$$XMMRegister, $src2$$XMMRegister); 4609 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01); 4610 __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister); 4611 __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister); 4612 %} 4613 ins_pipe( pipe_slow ); 4614 %} 4615 4616 instruct rvadd2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{ 4617 predicate(UseAVX > 0); 4618 match(Set dst (AddReductionVF src1 src2)); 4619 effect(TEMP tmp2, TEMP tmp); 4620 format %{ "vaddss $tmp2,$src1,$src2\n\t" 4621 "pshufd $tmp,$src2,0x01\n\t" 4622 "vaddss $dst,$tmp2,$tmp\t! add reduction2F" %} 4623 ins_encode %{ 4624 __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 4625 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 4626 __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4627 %} 4628 ins_pipe( pipe_slow ); 4629 %} 4630 4631 instruct rsadd4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{ 4632 predicate(UseSSE >= 1 && UseAVX == 0); 4633 match(Set dst (AddReductionVF src1 src2)); 4634 effect(TEMP tmp, TEMP tmp2); 4635 format %{ "movdqu $tmp,$src1\n\t" 4636 "addss $tmp,$src2\n\t" 4637 "pshufd $tmp2,$src2,0x01\n\t" 4638 "addss $tmp,$tmp2\n\t" 4639 "pshufd $tmp2,$src2,0x02\n\t" 4640 "addss $tmp,$tmp2\n\t" 4641 "pshufd $tmp2,$src2,0x03\n\t" 4642 "addss $tmp,$tmp2\n\t" 4643 "movdqu $dst,$tmp\t! add reduction4F" %} 4644 ins_encode %{ 4645 __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); 4646 __ addss($tmp$$XMMRegister, $src2$$XMMRegister); 4647 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01); 4648 __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister); 4649 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x02); 4650 __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister); 4651 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x03); 4652 __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister); 4653 __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister); 4654 %} 4655 ins_pipe( pipe_slow ); 4656 %} 4657 4658 instruct rvadd4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{ 4659 predicate(UseAVX > 0); 4660 match(Set dst (AddReductionVF src1 src2)); 4661 effect(TEMP tmp, TEMP tmp2); 4662 format %{ "vaddss $tmp2,$src1,$src2\n\t" 4663 "pshufd $tmp,$src2,0x01\n\t" 4664 "vaddss $tmp2,$tmp2,$tmp\n\t" 4665 "pshufd $tmp,$src2,0x02\n\t" 4666 "vaddss $tmp2,$tmp2,$tmp\n\t" 4667 "pshufd $tmp,$src2,0x03\n\t" 4668 "vaddss $dst,$tmp2,$tmp\t! add reduction4F" %} 4669 ins_encode %{ 4670 __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 4671 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 4672 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4673 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 4674 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4675 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 4676 __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4677 %} 4678 ins_pipe( pipe_slow ); 4679 %} 4680 4681 instruct radd8F_reduction_reg(regF dst, regF src1, vecY src2, regF tmp, regF tmp2, regF tmp3) %{ 4682 predicate(UseAVX > 0); 4683 match(Set dst (AddReductionVF src1 src2)); 4684 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 4685 format %{ "vaddss $tmp2,$src1,$src2\n\t" 4686 "pshufd $tmp,$src2,0x01\n\t" 4687 "vaddss $tmp2,$tmp2,$tmp\n\t" 4688 "pshufd $tmp,$src2,0x02\n\t" 4689 "vaddss $tmp2,$tmp2,$tmp\n\t" 4690 "pshufd $tmp,$src2,0x03\n\t" 4691 "vaddss $tmp2,$tmp2,$tmp\n\t" 4692 "vextractf128 $tmp3,$src2\n\t" 4693 "vaddss $tmp2,$tmp2,$tmp3\n\t" 4694 "pshufd $tmp,$tmp3,0x01\n\t" 4695 "vaddss $tmp2,$tmp2,$tmp\n\t" 4696 "pshufd $tmp,$tmp3,0x02\n\t" 4697 "vaddss $tmp2,$tmp2,$tmp\n\t" 4698 "pshufd $tmp,$tmp3,0x03\n\t" 4699 "vaddss $dst,$tmp2,$tmp\t! add reduction8F" %} 4700 ins_encode %{ 4701 __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 4702 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 4703 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4704 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 4705 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4706 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 4707 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4708 __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister); 4709 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 4710 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); 4711 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4712 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); 4713 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4714 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); 4715 __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4716 %} 4717 ins_pipe( pipe_slow ); 4718 %} 4719 4720 instruct radd16F_reduction_reg(regF dst, regF src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ 4721 predicate(UseAVX > 2); 4722 match(Set dst (AddReductionVF src1 src2)); 4723 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 4724 format %{ "vaddss $tmp2,$src1,$src2\n\t" 4725 "pshufd $tmp,$src2,0x01\n\t" 4726 "vaddss $tmp2,$tmp2,$tmp\n\t" 4727 "pshufd $tmp,$src2,0x02\n\t" 4728 "vaddss $tmp2,$tmp2,$tmp\n\t" 4729 "pshufd $tmp,$src2,0x03\n\t" 4730 "vaddss $tmp2,$tmp2,$tmp\n\t" 4731 "vextractf64x2 $tmp3,$src2, 0x1\n\t" 4732 "vaddss $tmp2,$tmp2,$tmp3\n\t" 4733 "pshufd $tmp,$tmp3,0x01\n\t" 4734 "vaddss $tmp2,$tmp2,$tmp\n\t" 4735 "pshufd $tmp,$tmp3,0x02\n\t" 4736 "vaddss $tmp2,$tmp2,$tmp\n\t" 4737 "pshufd $tmp,$tmp3,0x03\n\t" 4738 "vaddss $tmp2,$tmp2,$tmp\n\t" 4739 "vextractf64x2 $tmp3,$src2, 0x2\n\t" 4740 "vaddss $tmp2,$tmp2,$tmp3\n\t" 4741 "pshufd $tmp,$tmp3,0x01\n\t" 4742 "vaddss $tmp2,$tmp2,$tmp\n\t" 4743 "pshufd $tmp,$tmp3,0x02\n\t" 4744 "vaddss $tmp2,$tmp2,$tmp\n\t" 4745 "pshufd $tmp,$tmp3,0x03\n\t" 4746 "vaddss $tmp2,$tmp2,$tmp\n\t" 4747 "vextractf64x2 $tmp3,$src2, 0x3\n\t" 4748 "vaddss $tmp2,$tmp2,$tmp3\n\t" 4749 "pshufd $tmp,$tmp3,0x01\n\t" 4750 "vaddss $tmp2,$tmp2,$tmp\n\t" 4751 "pshufd $tmp,$tmp3,0x02\n\t" 4752 "vaddss $tmp2,$tmp2,$tmp\n\t" 4753 "pshufd $tmp,$tmp3,0x03\n\t" 4754 "vaddss $dst,$tmp2,$tmp\t! add reduction16F" %} 4755 ins_encode %{ 4756 __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 4757 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 4758 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4759 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 4760 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4761 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 4762 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4763 __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1); 4764 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 4765 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); 4766 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4767 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); 4768 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4769 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); 4770 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4771 __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x2); 4772 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 4773 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); 4774 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4775 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); 4776 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4777 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); 4778 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4779 __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x3); 4780 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 4781 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); 4782 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4783 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); 4784 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4785 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); 4786 __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4787 %} 4788 ins_pipe( pipe_slow ); 4789 %} 4790 4791 instruct rsadd2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp) %{ 4792 predicate(UseSSE >= 1 && UseAVX == 0); 4793 match(Set dst (AddReductionVD src1 src2)); 4794 effect(TEMP tmp, TEMP dst); 4795 format %{ "movdqu $tmp,$src1\n\t" 4796 "addsd $tmp,$src2\n\t" 4797 "pshufd $dst,$src2,0xE\n\t" 4798 "addsd $dst,$tmp\t! add reduction2D" %} 4799 ins_encode %{ 4800 __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); 4801 __ addsd($tmp$$XMMRegister, $src2$$XMMRegister); 4802 __ pshufd($dst$$XMMRegister, $src2$$XMMRegister, 0xE); 4803 __ addsd($dst$$XMMRegister, $tmp$$XMMRegister); 4804 %} 4805 ins_pipe( pipe_slow ); 4806 %} 4807 4808 instruct rvadd2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp, regD tmp2) %{ 4809 predicate(UseAVX > 0); 4810 match(Set dst (AddReductionVD src1 src2)); 4811 effect(TEMP tmp, TEMP tmp2); 4812 format %{ "vaddsd $tmp2,$src1,$src2\n\t" 4813 "pshufd $tmp,$src2,0xE\n\t" 4814 "vaddsd $dst,$tmp2,$tmp\t! add reduction2D" %} 4815 ins_encode %{ 4816 __ vaddsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 4817 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 4818 __ vaddsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4819 %} 4820 ins_pipe( pipe_slow ); 4821 %} 4822 4823 instruct rvadd4D_reduction_reg(regD dst, regD src1, vecY src2, regD tmp, regD tmp2, regD tmp3) %{ 4824 predicate(UseAVX > 0); 4825 match(Set dst (AddReductionVD src1 src2)); 4826 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 4827 format %{ "vaddsd $tmp2,$src1,$src2\n\t" 4828 "pshufd $tmp,$src2,0xE\n\t" 4829 "vaddsd $tmp2,$tmp2,$tmp\n\t" 4830 "vextractf128 $tmp3,$src2\n\t" 4831 "vaddsd $tmp2,$tmp2,$tmp3\n\t" 4832 "pshufd $tmp,$tmp3,0xE\n\t" 4833 "vaddsd $dst,$tmp2,$tmp\t! add reduction4D" %} 4834 ins_encode %{ 4835 __ vaddsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 4836 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 4837 __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4838 __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister); 4839 __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 4840 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); 4841 __ vaddsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4842 %} 4843 ins_pipe( pipe_slow ); 4844 %} 4845 4846 instruct rvadd8D_reduction_reg(regD dst, regD src1, vecZ src2, regD tmp, regD tmp2, regD tmp3) %{ 4847 predicate(UseAVX > 2); 4848 match(Set dst (AddReductionVD src1 src2)); 4849 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 4850 format %{ "vaddsd $tmp2,$src1,$src2\n\t" 4851 "pshufd $tmp,$src2,0xE\n\t" 4852 "vaddsd $tmp2,$tmp2,$tmp\n\t" 4853 "vextractf64x2 $tmp3,$src2, 0x1\n\t" 4854 "vaddsd $tmp2,$tmp2,$tmp3\n\t" 4855 "pshufd $tmp,$tmp3,0xE\n\t" 4856 "vaddsd $tmp2,$tmp2,$tmp\n\t" 4857 "vextractf64x2 $tmp3,$src2, 0x2\n\t" 4858 "vaddsd $tmp2,$tmp2,$tmp3\n\t" 4859 "pshufd $tmp,$tmp3,0xE\n\t" 4860 "vaddsd $tmp2,$tmp2,$tmp\n\t" 4861 "vextractf64x2 $tmp3,$src2, 0x3\n\t" 4862 "vaddsd $tmp2,$tmp2,$tmp3\n\t" 4863 "pshufd $tmp,$tmp3,0xE\n\t" 4864 "vaddsd $dst,$tmp2,$tmp\t! add reduction8D" %} 4865 ins_encode %{ 4866 __ vaddsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 4867 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 4868 __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4869 __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1); 4870 __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 4871 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); 4872 __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4873 __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x2); 4874 __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 4875 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); 4876 __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4877 __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x3); 4878 __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 4879 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); 4880 __ vaddsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4881 %} 4882 ins_pipe( pipe_slow ); 4883 %} 4884 4885 instruct rsmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 4886 predicate(UseSSE > 3 && UseAVX == 0); 4887 match(Set dst (MulReductionVI src1 src2)); 4888 effect(TEMP tmp, TEMP tmp2); 4889 format %{ "pshufd $tmp2,$src2,0x1\n\t" 4890 "pmulld $tmp2,$src2\n\t" 4891 "movd $tmp,$src1\n\t" 4892 "pmulld $tmp2,$tmp\n\t" 4893 "movd $dst,$tmp2\t! mul reduction2I" %} 4894 ins_encode %{ 4895 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 4896 __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister); 4897 __ movdl($tmp$$XMMRegister, $src1$$Register); 4898 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 4899 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4900 %} 4901 ins_pipe( pipe_slow ); 4902 %} 4903 4904 instruct rvmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 4905 predicate(UseAVX > 0); 4906 match(Set dst (MulReductionVI src1 src2)); 4907 effect(TEMP tmp, TEMP tmp2); 4908 format %{ "pshufd $tmp2,$src2,0x1\n\t" 4909 "vpmulld $tmp,$src2,$tmp2\n\t" 4910 "movd $tmp2,$src1\n\t" 4911 "vpmulld $tmp2,$tmp,$tmp2\n\t" 4912 "movd $dst,$tmp2\t! mul reduction2I" %} 4913 ins_encode %{ 4914 int vector_len = 0; 4915 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 4916 __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4917 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4918 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4919 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4920 %} 4921 ins_pipe( pipe_slow ); 4922 %} 4923 4924 instruct rsmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 4925 predicate(UseSSE > 3 && UseAVX == 0); 4926 match(Set dst (MulReductionVI src1 src2)); 4927 effect(TEMP tmp, TEMP tmp2); 4928 format %{ "pshufd $tmp2,$src2,0xE\n\t" 4929 "pmulld $tmp2,$src2\n\t" 4930 "pshufd $tmp,$tmp2,0x1\n\t" 4931 "pmulld $tmp2,$tmp\n\t" 4932 "movd $tmp,$src1\n\t" 4933 "pmulld $tmp2,$tmp\n\t" 4934 "movd $dst,$tmp2\t! mul reduction4I" %} 4935 ins_encode %{ 4936 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 4937 __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister); 4938 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x1); 4939 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 4940 __ movdl($tmp$$XMMRegister, $src1$$Register); 4941 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 4942 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4943 %} 4944 ins_pipe( pipe_slow ); 4945 %} 4946 4947 instruct rvmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 4948 predicate(UseAVX > 0); 4949 match(Set dst (MulReductionVI src1 src2)); 4950 effect(TEMP tmp, TEMP tmp2); 4951 format %{ "pshufd $tmp2,$src2,0xE\n\t" 4952 "vpmulld $tmp,$src2,$tmp2\n\t" 4953 "pshufd $tmp2,$tmp,0x1\n\t" 4954 "vpmulld $tmp,$tmp,$tmp2\n\t" 4955 "movd $tmp2,$src1\n\t" 4956 "vpmulld $tmp2,$tmp,$tmp2\n\t" 4957 "movd $dst,$tmp2\t! mul reduction4I" %} 4958 ins_encode %{ 4959 int vector_len = 0; 4960 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 4961 __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4962 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 4963 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4964 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4965 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4966 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4967 %} 4968 ins_pipe( pipe_slow ); 4969 %} 4970 4971 instruct rvmul8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 4972 predicate(UseAVX > 0); 4973 match(Set dst (MulReductionVI src1 src2)); 4974 effect(TEMP tmp, TEMP tmp2); 4975 format %{ "vextracti128 $tmp,$src2\n\t" 4976 "vpmulld $tmp,$tmp,$src2\n\t" 4977 "pshufd $tmp2,$tmp,0xE\n\t" 4978 "vpmulld $tmp,$tmp,$tmp2\n\t" 4979 "pshufd $tmp2,$tmp,0x1\n\t" 4980 "vpmulld $tmp,$tmp,$tmp2\n\t" 4981 "movd $tmp2,$src1\n\t" 4982 "vpmulld $tmp2,$tmp,$tmp2\n\t" 4983 "movd $dst,$tmp2\t! mul reduction8I" %} 4984 ins_encode %{ 4985 int vector_len = 0; 4986 __ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister); 4987 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 4988 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 4989 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4990 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 4991 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4992 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4993 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4994 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4995 %} 4996 ins_pipe( pipe_slow ); 4997 %} 4998 4999 instruct rvmul16I_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ 5000 predicate(UseAVX > 2); 5001 match(Set dst (MulReductionVI src1 src2)); 5002 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 5003 format %{ "vextracti64x4 $tmp3,$src2\n\t" 5004 "vpmulld $tmp3,$tmp3,$src2\n\t" 5005 "vextracti128 $tmp,$tmp3\n\t" 5006 "vpmulld $tmp,$tmp,$src2\n\t" 5007 "pshufd $tmp2,$tmp,0xE\n\t" 5008 "vpmulld $tmp,$tmp,$tmp2\n\t" 5009 "pshufd $tmp2,$tmp,0x1\n\t" 5010 "vpmulld $tmp,$tmp,$tmp2\n\t" 5011 "movd $tmp2,$src1\n\t" 5012 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5013 "movd $dst,$tmp2\t! mul reduction16I" %} 5014 ins_encode %{ 5015 __ vextracti64x4h($tmp3$$XMMRegister, $src2$$XMMRegister); 5016 __ vpmulld($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); 5017 __ vextracti128h($tmp$$XMMRegister, $tmp3$$XMMRegister); 5018 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); 5019 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5020 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5021 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5022 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5023 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5024 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5025 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5026 %} 5027 ins_pipe( pipe_slow ); 5028 %} 5029 5030 #ifdef _LP64 5031 instruct rvmul2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, regF tmp, regF tmp2) %{ 5032 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 5033 match(Set dst (MulReductionVL src1 src2)); 5034 effect(TEMP tmp, TEMP tmp2); 5035 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5036 "vpmullq $tmp,$src2,$tmp2\n\t" 5037 "movdq $tmp2,$src1\n\t" 5038 "vpmullq $tmp2,$tmp,$tmp2\n\t" 5039 "movdq $dst,$tmp2\t! mul reduction2L" %} 5040 ins_encode %{ 5041 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5042 __ vpmullq($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, 0); 5043 __ movdq($tmp2$$XMMRegister, $src1$$Register); 5044 __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5045 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5046 %} 5047 ins_pipe( pipe_slow ); 5048 %} 5049 5050 instruct rvmul4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{ 5051 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 5052 match(Set dst (MulReductionVL src1 src2)); 5053 effect(TEMP tmp, TEMP tmp2); 5054 format %{ "vextracti64x2 $tmp,$src2, 0x1\n\t" 5055 "vpmullq $tmp2,$tmp,$src2\n\t" 5056 "pshufd $tmp,$tmp2,0xE\n\t" 5057 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5058 "movdq $tmp,$src1\n\t" 5059 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5060 "movdq $dst,$tmp2\t! mul reduction4L" %} 5061 ins_encode %{ 5062 __ vextracti64x2h($tmp$$XMMRegister, $src2$$XMMRegister, 0x1); 5063 __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); 5064 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5065 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5066 __ movdq($tmp$$XMMRegister, $src1$$Register); 5067 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5068 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5069 %} 5070 ins_pipe( pipe_slow ); 5071 %} 5072 5073 instruct rvmul8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{ 5074 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 5075 match(Set dst (MulReductionVL src1 src2)); 5076 effect(TEMP tmp, TEMP tmp2); 5077 format %{ "vextracti64x4 $tmp2,$src2\n\t" 5078 "vpmullq $tmp2,$tmp2,$src2\n\t" 5079 "vextracti128 $tmp,$tmp2\n\t" 5080 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5081 "pshufd $tmp,$tmp2,0xE\n\t" 5082 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5083 "movdq $tmp,$src1\n\t" 5084 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5085 "movdq $dst,$tmp2\t! mul reduction8L" %} 5086 ins_encode %{ 5087 __ vextracti64x4h($tmp2$$XMMRegister, $src2$$XMMRegister); 5088 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 5089 __ vextracti128h($tmp$$XMMRegister, $tmp2$$XMMRegister); 5090 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5091 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5092 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5093 __ movdq($tmp$$XMMRegister, $src1$$Register); 5094 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5095 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5096 %} 5097 ins_pipe( pipe_slow ); 5098 %} 5099 #endif 5100 5101 instruct rsmul2F_reduction(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{ 5102 predicate(UseSSE >= 1 && UseAVX == 0); 5103 match(Set dst (MulReductionVF src1 src2)); 5104 effect(TEMP tmp, TEMP tmp2); 5105 format %{ "movdqu $tmp,$src1\n\t" 5106 "mulss $tmp,$src2\n\t" 5107 "pshufd $tmp2,$src2,0x01\n\t" 5108 "mulss $tmp,$tmp2\n\t" 5109 "movdqu $dst,$tmp\t! mul reduction2F" %} 5110 ins_encode %{ 5111 __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); 5112 __ mulss($tmp$$XMMRegister, $src2$$XMMRegister); 5113 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01); 5114 __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister); 5115 __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister); 5116 %} 5117 ins_pipe( pipe_slow ); 5118 %} 5119 5120 instruct rvmul2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{ 5121 predicate(UseAVX > 0); 5122 match(Set dst (MulReductionVF src1 src2)); 5123 effect(TEMP tmp, TEMP tmp2); 5124 format %{ "vmulss $tmp2,$src1,$src2\n\t" 5125 "pshufd $tmp,$src2,0x01\n\t" 5126 "vmulss $dst,$tmp2,$tmp\t! mul reduction2F" %} 5127 ins_encode %{ 5128 __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 5129 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5130 __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5131 %} 5132 ins_pipe( pipe_slow ); 5133 %} 5134 5135 instruct rsmul4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{ 5136 predicate(UseSSE >= 1 && UseAVX == 0); 5137 match(Set dst (MulReductionVF src1 src2)); 5138 effect(TEMP tmp, TEMP tmp2); 5139 format %{ "movdqu $tmp,$src1\n\t" 5140 "mulss $tmp,$src2\n\t" 5141 "pshufd $tmp2,$src2,0x01\n\t" 5142 "mulss $tmp,$tmp2\n\t" 5143 "pshufd $tmp2,$src2,0x02\n\t" 5144 "mulss $tmp,$tmp2\n\t" 5145 "pshufd $tmp2,$src2,0x03\n\t" 5146 "mulss $tmp,$tmp2\n\t" 5147 "movdqu $dst,$tmp\t! mul reduction4F" %} 5148 ins_encode %{ 5149 __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); 5150 __ mulss($tmp$$XMMRegister, $src2$$XMMRegister); 5151 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01); 5152 __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister); 5153 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x02); 5154 __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister); 5155 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x03); 5156 __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister); 5157 __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister); 5158 %} 5159 ins_pipe( pipe_slow ); 5160 %} 5161 5162 instruct rvmul4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{ 5163 predicate(UseAVX > 0); 5164 match(Set dst (MulReductionVF src1 src2)); 5165 effect(TEMP tmp, TEMP tmp2); 5166 format %{ "vmulss $tmp2,$src1,$src2\n\t" 5167 "pshufd $tmp,$src2,0x01\n\t" 5168 "vmulss $tmp2,$tmp2,$tmp\n\t" 5169 "pshufd $tmp,$src2,0x02\n\t" 5170 "vmulss $tmp2,$tmp2,$tmp\n\t" 5171 "pshufd $tmp,$src2,0x03\n\t" 5172 "vmulss $dst,$tmp2,$tmp\t! mul reduction4F" %} 5173 ins_encode %{ 5174 __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 5175 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5176 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5177 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5178 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5179 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5180 __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5181 %} 5182 ins_pipe( pipe_slow ); 5183 %} 5184 5185 instruct rvmul8F_reduction_reg(regF dst, regF src1, vecY src2, regF tmp, regF tmp2, regF tmp3) %{ 5186 predicate(UseAVX > 0); 5187 match(Set dst (MulReductionVF src1 src2)); 5188 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 5189 format %{ "vmulss $tmp2,$src1,$src2\n\t" 5190 "pshufd $tmp,$src2,0x01\n\t" 5191 "vmulss $tmp2,$tmp2,$tmp\n\t" 5192 "pshufd $tmp,$src2,0x02\n\t" 5193 "vmulss $tmp2,$tmp2,$tmp\n\t" 5194 "pshufd $tmp,$src2,0x03\n\t" 5195 "vmulss $tmp2,$tmp2,$tmp\n\t" 5196 "vextractf128 $tmp3,$src2\n\t" 5197 "vmulss $tmp2,$tmp2,$tmp3\n\t" 5198 "pshufd $tmp,$tmp3,0x01\n\t" 5199 "vmulss $tmp2,$tmp2,$tmp\n\t" 5200 "pshufd $tmp,$tmp3,0x02\n\t" 5201 "vmulss $tmp2,$tmp2,$tmp\n\t" 5202 "pshufd $tmp,$tmp3,0x03\n\t" 5203 "vmulss $dst,$tmp2,$tmp\t! mul reduction8F" %} 5204 ins_encode %{ 5205 __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 5206 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5207 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5208 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5209 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5210 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5211 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5212 __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister); 5213 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 5214 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); 5215 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5216 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); 5217 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5218 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); 5219 __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5220 %} 5221 ins_pipe( pipe_slow ); 5222 %} 5223 5224 instruct rvmul16F_reduction_reg(regF dst, regF src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ 5225 predicate(UseAVX > 2); 5226 match(Set dst (MulReductionVF src1 src2)); 5227 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 5228 format %{ "vmulss $tmp2,$src1,$src2\n\t" 5229 "pshufd $tmp,$src2,0x01\n\t" 5230 "vmulss $tmp2,$tmp2,$tmp\n\t" 5231 "pshufd $tmp,$src2,0x02\n\t" 5232 "vmulss $tmp2,$tmp2,$tmp\n\t" 5233 "pshufd $tmp,$src2,0x03\n\t" 5234 "vmulss $tmp2,$tmp2,$tmp\n\t" 5235 "vextractf32x4 $tmp3,$src2, 0x1\n\t" 5236 "vmulss $tmp2,$tmp2,$tmp3\n\t" 5237 "pshufd $tmp,$tmp3,0x01\n\t" 5238 "vmulss $tmp2,$tmp2,$tmp\n\t" 5239 "pshufd $tmp,$tmp3,0x02\n\t" 5240 "vmulss $tmp2,$tmp2,$tmp\n\t" 5241 "pshufd $tmp,$tmp3,0x03\n\t" 5242 "vmulss $tmp2,$tmp2,$tmp\n\t" 5243 "vextractf32x4 $tmp3,$src2, 0x2\n\t" 5244 "vmulss $tmp2,$tmp2,$tmp3\n\t" 5245 "pshufd $tmp,$tmp3,0x01\n\t" 5246 "vmulss $tmp2,$tmp2,$tmp\n\t" 5247 "pshufd $tmp,$tmp3,0x02\n\t" 5248 "vmulss $tmp2,$tmp2,$tmp\n\t" 5249 "pshufd $tmp,$tmp3,0x03\n\t" 5250 "vmulss $tmp2,$tmp2,$tmp\n\t" 5251 "vextractf32x4 $tmp3,$src2, 0x3\n\t" 5252 "vmulss $tmp2,$tmp2,$tmp3\n\t" 5253 "pshufd $tmp,$tmp3,0x01\n\t" 5254 "vmulss $tmp2,$tmp2,$tmp\n\t" 5255 "pshufd $tmp,$tmp3,0x02\n\t" 5256 "vmulss $tmp2,$tmp2,$tmp\n\t" 5257 "pshufd $tmp,$tmp3,0x03\n\t" 5258 "vmulss $dst,$tmp2,$tmp\t! mul reduction16F" %} 5259 ins_encode %{ 5260 __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 5261 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5262 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5263 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5264 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5265 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5266 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5267 __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1); 5268 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 5269 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); 5270 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5271 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); 5272 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5273 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); 5274 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5275 __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x2); 5276 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 5277 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); 5278 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5279 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); 5280 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5281 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); 5282 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5283 __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x3); 5284 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 5285 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); 5286 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5287 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); 5288 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5289 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); 5290 __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5291 %} 5292 ins_pipe( pipe_slow ); 5293 %} 5294 5295 instruct rsmul2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp) %{ 5296 predicate(UseSSE >= 1 && UseAVX == 0); 5297 match(Set dst (MulReductionVD src1 src2)); 5298 effect(TEMP tmp, TEMP dst); 5299 format %{ "movdqu $tmp,$src1\n\t" 5300 "mulsd $tmp,$src2\n\t" 5301 "pshufd $dst,$src2,0xE\n\t" 5302 "mulsd $dst,$tmp\t! mul reduction2D" %} 5303 ins_encode %{ 5304 __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); 5305 __ mulsd($tmp$$XMMRegister, $src2$$XMMRegister); 5306 __ pshufd($dst$$XMMRegister, $src2$$XMMRegister, 0xE); 5307 __ mulsd($dst$$XMMRegister, $tmp$$XMMRegister); 5308 %} 5309 ins_pipe( pipe_slow ); 5310 %} 5311 5312 instruct rvmul2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp, regD tmp2) %{ 5313 predicate(UseAVX > 0); 5314 match(Set dst (MulReductionVD src1 src2)); 5315 effect(TEMP tmp, TEMP tmp2); 5316 format %{ "vmulsd $tmp2,$src1,$src2\n\t" 5317 "pshufd $tmp,$src2,0xE\n\t" 5318 "vmulsd $dst,$tmp2,$tmp\t! mul reduction2D" %} 5319 ins_encode %{ 5320 __ vmulsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 5321 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5322 __ vmulsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5323 %} 5324 ins_pipe( pipe_slow ); 5325 %} 5326 5327 instruct rvmul4D_reduction_reg(regD dst, regD src1, vecY src2, regD tmp, regD tmp2, regD tmp3) %{ 5328 predicate(UseAVX > 0); 5329 match(Set dst (MulReductionVD src1 src2)); 5330 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 5331 format %{ "vmulsd $tmp2,$src1,$src2\n\t" 5332 "pshufd $tmp,$src2,0xE\n\t" 5333 "vmulsd $tmp2,$tmp2,$tmp\n\t" 5334 "vextractf128 $tmp3,$src2\n\t" 5335 "vmulsd $tmp2,$tmp2,$tmp3\n\t" 5336 "pshufd $tmp,$tmp3,0xE\n\t" 5337 "vmulsd $dst,$tmp2,$tmp\t! mul reduction4D" %} 5338 ins_encode %{ 5339 __ vmulsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 5340 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5341 __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5342 __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister); 5343 __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 5344 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); 5345 __ vmulsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5346 %} 5347 ins_pipe( pipe_slow ); 5348 %} 5349 5350 instruct rvmul8D_reduction_reg(regD dst, regD src1, vecZ src2, regD tmp, regD tmp2, regD tmp3) %{ 5351 predicate(UseAVX > 2); 5352 match(Set dst (MulReductionVD src1 src2)); 5353 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 5354 format %{ "vmulsd $tmp2,$src1,$src2\n\t" 5355 "pshufd $tmp,$src2,0xE\n\t" 5356 "vmulsd $tmp2,$tmp2,$tmp\n\t" 5357 "vextractf64x2 $tmp3,$src2, 0x1\n\t" 5358 "vmulsd $tmp2,$tmp2,$tmp3\n\t" 5359 "pshufd $tmp,$src2,0xE\n\t" 5360 "vmulsd $tmp2,$tmp2,$tmp\n\t" 5361 "vextractf64x2 $tmp3,$src2, 0x2\n\t" 5362 "vmulsd $tmp2,$tmp2,$tmp3\n\t" 5363 "pshufd $tmp,$tmp3,0xE\n\t" 5364 "vmulsd $tmp2,$tmp2,$tmp\n\t" 5365 "vextractf64x2 $tmp3,$src2, 0x3\n\t" 5366 "vmulsd $tmp2,$tmp2,$tmp3\n\t" 5367 "pshufd $tmp,$tmp3,0xE\n\t" 5368 "vmulsd $dst,$tmp2,$tmp\t! mul reduction8D" %} 5369 ins_encode %{ 5370 __ vmulsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 5371 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5372 __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5373 __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1); 5374 __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 5375 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); 5376 __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5377 __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x2); 5378 __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 5379 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); 5380 __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5381 __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x3); 5382 __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 5383 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); 5384 __ vmulsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5385 %} 5386 ins_pipe( pipe_slow ); 5387 %} 5388 5389 // ====================VECTOR ARITHMETIC======================================= 5390 5391 // --------------------------------- ADD -------------------------------------- 5392 5393 // Bytes vector add 5394 instruct vadd4B(vecS dst, vecS src) %{ 5395 predicate(n->as_Vector()->length() == 4); 5396 match(Set dst (AddVB dst src)); 5397 format %{ "paddb $dst,$src\t! add packed4B" %} 5398 ins_encode %{ 5399 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5400 %} 5401 ins_pipe( pipe_slow ); 5402 %} 5403 5404 instruct vadd4B_reg(vecS dst, vecS src1, vecS src2) %{ 5405 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5406 match(Set dst (AddVB src1 src2)); 5407 format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %} 5408 ins_encode %{ 5409 int vector_len = 0; 5410 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5411 %} 5412 ins_pipe( pipe_slow ); 5413 %} 5414 5415 instruct vadd4B_mem(vecS dst, vecS src, memory mem) %{ 5416 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5417 match(Set dst (AddVB src (LoadVector mem))); 5418 format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %} 5419 ins_encode %{ 5420 int vector_len = 0; 5421 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5422 %} 5423 ins_pipe( pipe_slow ); 5424 %} 5425 5426 instruct vadd8B(vecD dst, vecD src) %{ 5427 predicate(n->as_Vector()->length() == 8); 5428 match(Set dst (AddVB dst src)); 5429 format %{ "paddb $dst,$src\t! add packed8B" %} 5430 ins_encode %{ 5431 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5432 %} 5433 ins_pipe( pipe_slow ); 5434 %} 5435 5436 instruct vadd8B_reg(vecD dst, vecD src1, vecD src2) %{ 5437 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 5438 match(Set dst (AddVB src1 src2)); 5439 format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %} 5440 ins_encode %{ 5441 int vector_len = 0; 5442 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5443 %} 5444 ins_pipe( pipe_slow ); 5445 %} 5446 5447 instruct vadd8B_mem(vecD dst, vecD src, memory mem) %{ 5448 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 5449 match(Set dst (AddVB src (LoadVector mem))); 5450 format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %} 5451 ins_encode %{ 5452 int vector_len = 0; 5453 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5454 %} 5455 ins_pipe( pipe_slow ); 5456 %} 5457 5458 instruct vadd16B(vecX dst, vecX src) %{ 5459 predicate(n->as_Vector()->length() == 16); 5460 match(Set dst (AddVB dst src)); 5461 format %{ "paddb $dst,$src\t! add packed16B" %} 5462 ins_encode %{ 5463 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5464 %} 5465 ins_pipe( pipe_slow ); 5466 %} 5467 5468 instruct vadd16B_reg(vecX dst, vecX src1, vecX src2) %{ 5469 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 5470 match(Set dst (AddVB src1 src2)); 5471 format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %} 5472 ins_encode %{ 5473 int vector_len = 0; 5474 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5475 %} 5476 ins_pipe( pipe_slow ); 5477 %} 5478 5479 instruct vadd16B_mem(vecX dst, vecX src, memory mem) %{ 5480 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 5481 match(Set dst (AddVB src (LoadVector mem))); 5482 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} 5483 ins_encode %{ 5484 int vector_len = 0; 5485 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5486 %} 5487 ins_pipe( pipe_slow ); 5488 %} 5489 5490 instruct vadd32B_reg(vecY dst, vecY src1, vecY src2) %{ 5491 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 5492 match(Set dst (AddVB src1 src2)); 5493 format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %} 5494 ins_encode %{ 5495 int vector_len = 1; 5496 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5497 %} 5498 ins_pipe( pipe_slow ); 5499 %} 5500 5501 instruct vadd32B_mem(vecY dst, vecY src, memory mem) %{ 5502 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 5503 match(Set dst (AddVB src (LoadVector mem))); 5504 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} 5505 ins_encode %{ 5506 int vector_len = 1; 5507 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5508 %} 5509 ins_pipe( pipe_slow ); 5510 %} 5511 5512 instruct vadd64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 5513 predicate(UseAVX > 2 && n->as_Vector()->length() == 64); 5514 match(Set dst (AddVB src1 src2)); 5515 format %{ "vpaddb $dst,$src1,$src2\t! add packed64B" %} 5516 ins_encode %{ 5517 int vector_len = 2; 5518 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5519 %} 5520 ins_pipe( pipe_slow ); 5521 %} 5522 5523 instruct vadd64B_mem(vecZ dst, vecZ src, memory mem) %{ 5524 predicate(UseAVX > 2 && n->as_Vector()->length() == 64); 5525 match(Set dst (AddVB src (LoadVector mem))); 5526 format %{ "vpaddb $dst,$src,$mem\t! add packed64B" %} 5527 ins_encode %{ 5528 int vector_len = 2; 5529 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5530 %} 5531 ins_pipe( pipe_slow ); 5532 %} 5533 5534 // Shorts/Chars vector add 5535 instruct vadd2S(vecS dst, vecS src) %{ 5536 predicate(n->as_Vector()->length() == 2); 5537 match(Set dst (AddVS dst src)); 5538 format %{ "paddw $dst,$src\t! add packed2S" %} 5539 ins_encode %{ 5540 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 5541 %} 5542 ins_pipe( pipe_slow ); 5543 %} 5544 5545 instruct vadd2S_reg(vecS dst, vecS src1, vecS src2) %{ 5546 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 5547 match(Set dst (AddVS src1 src2)); 5548 format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %} 5549 ins_encode %{ 5550 int vector_len = 0; 5551 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5552 %} 5553 ins_pipe( pipe_slow ); 5554 %} 5555 5556 instruct vadd2S_mem(vecS dst, vecS src, memory mem) %{ 5557 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 5558 match(Set dst (AddVS src (LoadVector mem))); 5559 format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %} 5560 ins_encode %{ 5561 int vector_len = 0; 5562 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5563 %} 5564 ins_pipe( pipe_slow ); 5565 %} 5566 5567 instruct vadd4S(vecD dst, vecD src) %{ 5568 predicate(n->as_Vector()->length() == 4); 5569 match(Set dst (AddVS dst src)); 5570 format %{ "paddw $dst,$src\t! add packed4S" %} 5571 ins_encode %{ 5572 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 5573 %} 5574 ins_pipe( pipe_slow ); 5575 %} 5576 5577 instruct vadd4S_reg(vecD dst, vecD src1, vecD src2) %{ 5578 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5579 match(Set dst (AddVS src1 src2)); 5580 format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %} 5581 ins_encode %{ 5582 int vector_len = 0; 5583 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5584 %} 5585 ins_pipe( pipe_slow ); 5586 %} 5587 5588 instruct vadd4S_mem(vecD dst, vecD src, memory mem) %{ 5589 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5590 match(Set dst (AddVS src (LoadVector mem))); 5591 format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %} 5592 ins_encode %{ 5593 int vector_len = 0; 5594 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5595 %} 5596 ins_pipe( pipe_slow ); 5597 %} 5598 5599 instruct vadd8S(vecX dst, vecX src) %{ 5600 predicate(n->as_Vector()->length() == 8); 5601 match(Set dst (AddVS dst src)); 5602 format %{ "paddw $dst,$src\t! add packed8S" %} 5603 ins_encode %{ 5604 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 5605 %} 5606 ins_pipe( pipe_slow ); 5607 %} 5608 5609 instruct vadd8S_reg(vecX dst, vecX src1, vecX src2) %{ 5610 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 5611 match(Set dst (AddVS src1 src2)); 5612 format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %} 5613 ins_encode %{ 5614 int vector_len = 0; 5615 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5616 %} 5617 ins_pipe( pipe_slow ); 5618 %} 5619 5620 instruct vadd8S_mem(vecX dst, vecX src, memory mem) %{ 5621 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 5622 match(Set dst (AddVS src (LoadVector mem))); 5623 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} 5624 ins_encode %{ 5625 int vector_len = 0; 5626 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5627 %} 5628 ins_pipe( pipe_slow ); 5629 %} 5630 5631 instruct vadd16S_reg(vecY dst, vecY src1, vecY src2) %{ 5632 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 5633 match(Set dst (AddVS src1 src2)); 5634 format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %} 5635 ins_encode %{ 5636 int vector_len = 1; 5637 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5638 %} 5639 ins_pipe( pipe_slow ); 5640 %} 5641 5642 instruct vadd16S_mem(vecY dst, vecY src, memory mem) %{ 5643 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 5644 match(Set dst (AddVS src (LoadVector mem))); 5645 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} 5646 ins_encode %{ 5647 int vector_len = 1; 5648 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5649 %} 5650 ins_pipe( pipe_slow ); 5651 %} 5652 5653 instruct vadd32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 5654 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); 5655 match(Set dst (AddVS src1 src2)); 5656 format %{ "vpaddw $dst,$src1,$src2\t! add packed32S" %} 5657 ins_encode %{ 5658 int vector_len = 2; 5659 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5660 %} 5661 ins_pipe( pipe_slow ); 5662 %} 5663 5664 instruct vadd32S_mem(vecZ dst, vecZ src, memory mem) %{ 5665 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); 5666 match(Set dst (AddVS src (LoadVector mem))); 5667 format %{ "vpaddw $dst,$src,$mem\t! add packed32S" %} 5668 ins_encode %{ 5669 int vector_len = 2; 5670 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5671 %} 5672 ins_pipe( pipe_slow ); 5673 %} 5674 5675 // Integers vector add 5676 instruct vadd2I(vecD dst, vecD src) %{ 5677 predicate(n->as_Vector()->length() == 2); 5678 match(Set dst (AddVI dst src)); 5679 format %{ "paddd $dst,$src\t! add packed2I" %} 5680 ins_encode %{ 5681 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 5682 %} 5683 ins_pipe( pipe_slow ); 5684 %} 5685 5686 instruct vadd2I_reg(vecD dst, vecD src1, vecD src2) %{ 5687 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 5688 match(Set dst (AddVI src1 src2)); 5689 format %{ "vpaddd $dst,$src1,$src2\t! add packed2I" %} 5690 ins_encode %{ 5691 int vector_len = 0; 5692 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5693 %} 5694 ins_pipe( pipe_slow ); 5695 %} 5696 5697 instruct vadd2I_mem(vecD dst, vecD src, memory mem) %{ 5698 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 5699 match(Set dst (AddVI src (LoadVector mem))); 5700 format %{ "vpaddd $dst,$src,$mem\t! add packed2I" %} 5701 ins_encode %{ 5702 int vector_len = 0; 5703 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5704 %} 5705 ins_pipe( pipe_slow ); 5706 %} 5707 5708 instruct vadd4I(vecX dst, vecX src) %{ 5709 predicate(n->as_Vector()->length() == 4); 5710 match(Set dst (AddVI dst src)); 5711 format %{ "paddd $dst,$src\t! add packed4I" %} 5712 ins_encode %{ 5713 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 5714 %} 5715 ins_pipe( pipe_slow ); 5716 %} 5717 5718 instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{ 5719 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5720 match(Set dst (AddVI src1 src2)); 5721 format %{ "vpaddd $dst,$src1,$src2\t! add packed4I" %} 5722 ins_encode %{ 5723 int vector_len = 0; 5724 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5725 %} 5726 ins_pipe( pipe_slow ); 5727 %} 5728 5729 instruct vadd4I_mem(vecX dst, vecX src, memory mem) %{ 5730 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5731 match(Set dst (AddVI src (LoadVector mem))); 5732 format %{ "vpaddd $dst,$src,$mem\t! add packed4I" %} 5733 ins_encode %{ 5734 int vector_len = 0; 5735 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5736 %} 5737 ins_pipe( pipe_slow ); 5738 %} 5739 5740 instruct vadd8I_reg(vecY dst, vecY src1, vecY src2) %{ 5741 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 5742 match(Set dst (AddVI src1 src2)); 5743 format %{ "vpaddd $dst,$src1,$src2\t! add packed8I" %} 5744 ins_encode %{ 5745 int vector_len = 1; 5746 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5747 %} 5748 ins_pipe( pipe_slow ); 5749 %} 5750 5751 instruct vadd8I_mem(vecY dst, vecY src, memory mem) %{ 5752 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 5753 match(Set dst (AddVI src (LoadVector mem))); 5754 format %{ "vpaddd $dst,$src,$mem\t! add packed8I" %} 5755 ins_encode %{ 5756 int vector_len = 1; 5757 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5758 %} 5759 ins_pipe( pipe_slow ); 5760 %} 5761 5762 instruct vadd16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 5763 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 5764 match(Set dst (AddVI src1 src2)); 5765 format %{ "vpaddd $dst,$src1,$src2\t! add packed16I" %} 5766 ins_encode %{ 5767 int vector_len = 2; 5768 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5769 %} 5770 ins_pipe( pipe_slow ); 5771 %} 5772 5773 instruct vadd16I_mem(vecZ dst, vecZ src, memory mem) %{ 5774 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 5775 match(Set dst (AddVI src (LoadVector mem))); 5776 format %{ "vpaddd $dst,$src,$mem\t! add packed16I" %} 5777 ins_encode %{ 5778 int vector_len = 2; 5779 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5780 %} 5781 ins_pipe( pipe_slow ); 5782 %} 5783 5784 // Longs vector add 5785 instruct vadd2L(vecX dst, vecX src) %{ 5786 predicate(n->as_Vector()->length() == 2); 5787 match(Set dst (AddVL dst src)); 5788 format %{ "paddq $dst,$src\t! add packed2L" %} 5789 ins_encode %{ 5790 __ paddq($dst$$XMMRegister, $src$$XMMRegister); 5791 %} 5792 ins_pipe( pipe_slow ); 5793 %} 5794 5795 instruct vadd2L_reg(vecX dst, vecX src1, vecX src2) %{ 5796 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 5797 match(Set dst (AddVL src1 src2)); 5798 format %{ "vpaddq $dst,$src1,$src2\t! add packed2L" %} 5799 ins_encode %{ 5800 int vector_len = 0; 5801 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5802 %} 5803 ins_pipe( pipe_slow ); 5804 %} 5805 5806 instruct vadd2L_mem(vecX dst, vecX src, memory mem) %{ 5807 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 5808 match(Set dst (AddVL src (LoadVector mem))); 5809 format %{ "vpaddq $dst,$src,$mem\t! add packed2L" %} 5810 ins_encode %{ 5811 int vector_len = 0; 5812 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5813 %} 5814 ins_pipe( pipe_slow ); 5815 %} 5816 5817 instruct vadd4L_reg(vecY dst, vecY src1, vecY src2) %{ 5818 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 5819 match(Set dst (AddVL src1 src2)); 5820 format %{ "vpaddq $dst,$src1,$src2\t! add packed4L" %} 5821 ins_encode %{ 5822 int vector_len = 1; 5823 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5824 %} 5825 ins_pipe( pipe_slow ); 5826 %} 5827 5828 instruct vadd4L_mem(vecY dst, vecY src, memory mem) %{ 5829 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 5830 match(Set dst (AddVL src (LoadVector mem))); 5831 format %{ "vpaddq $dst,$src,$mem\t! add packed4L" %} 5832 ins_encode %{ 5833 int vector_len = 1; 5834 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5835 %} 5836 ins_pipe( pipe_slow ); 5837 %} 5838 5839 instruct vadd8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 5840 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 5841 match(Set dst (AddVL src1 src2)); 5842 format %{ "vpaddq $dst,$src1,$src2\t! add packed8L" %} 5843 ins_encode %{ 5844 int vector_len = 2; 5845 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5846 %} 5847 ins_pipe( pipe_slow ); 5848 %} 5849 5850 instruct vadd8L_mem(vecZ dst, vecZ src, memory mem) %{ 5851 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 5852 match(Set dst (AddVL src (LoadVector mem))); 5853 format %{ "vpaddq $dst,$src,$mem\t! add packed8L" %} 5854 ins_encode %{ 5855 int vector_len = 2; 5856 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5857 %} 5858 ins_pipe( pipe_slow ); 5859 %} 5860 5861 // Floats vector add 5862 instruct vadd2F(vecD dst, vecD src) %{ 5863 predicate(n->as_Vector()->length() == 2); 5864 match(Set dst (AddVF dst src)); 5865 format %{ "addps $dst,$src\t! add packed2F" %} 5866 ins_encode %{ 5867 __ addps($dst$$XMMRegister, $src$$XMMRegister); 5868 %} 5869 ins_pipe( pipe_slow ); 5870 %} 5871 5872 instruct vadd2F_reg(vecD dst, vecD src1, vecD src2) %{ 5873 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 5874 match(Set dst (AddVF src1 src2)); 5875 format %{ "vaddps $dst,$src1,$src2\t! add packed2F" %} 5876 ins_encode %{ 5877 int vector_len = 0; 5878 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5879 %} 5880 ins_pipe( pipe_slow ); 5881 %} 5882 5883 instruct vadd2F_mem(vecD dst, vecD src, memory mem) %{ 5884 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 5885 match(Set dst (AddVF src (LoadVector mem))); 5886 format %{ "vaddps $dst,$src,$mem\t! add packed2F" %} 5887 ins_encode %{ 5888 int vector_len = 0; 5889 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5890 %} 5891 ins_pipe( pipe_slow ); 5892 %} 5893 5894 instruct vadd4F(vecX dst, vecX src) %{ 5895 predicate(n->as_Vector()->length() == 4); 5896 match(Set dst (AddVF dst src)); 5897 format %{ "addps $dst,$src\t! add packed4F" %} 5898 ins_encode %{ 5899 __ addps($dst$$XMMRegister, $src$$XMMRegister); 5900 %} 5901 ins_pipe( pipe_slow ); 5902 %} 5903 5904 instruct vadd4F_reg(vecX dst, vecX src1, vecX src2) %{ 5905 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5906 match(Set dst (AddVF src1 src2)); 5907 format %{ "vaddps $dst,$src1,$src2\t! add packed4F" %} 5908 ins_encode %{ 5909 int vector_len = 0; 5910 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5911 %} 5912 ins_pipe( pipe_slow ); 5913 %} 5914 5915 instruct vadd4F_mem(vecX dst, vecX src, memory mem) %{ 5916 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5917 match(Set dst (AddVF src (LoadVector mem))); 5918 format %{ "vaddps $dst,$src,$mem\t! add packed4F" %} 5919 ins_encode %{ 5920 int vector_len = 0; 5921 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5922 %} 5923 ins_pipe( pipe_slow ); 5924 %} 5925 5926 instruct vadd8F_reg(vecY dst, vecY src1, vecY src2) %{ 5927 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 5928 match(Set dst (AddVF src1 src2)); 5929 format %{ "vaddps $dst,$src1,$src2\t! add packed8F" %} 5930 ins_encode %{ 5931 int vector_len = 1; 5932 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5933 %} 5934 ins_pipe( pipe_slow ); 5935 %} 5936 5937 instruct vadd8F_mem(vecY dst, vecY src, memory mem) %{ 5938 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 5939 match(Set dst (AddVF src (LoadVector mem))); 5940 format %{ "vaddps $dst,$src,$mem\t! add packed8F" %} 5941 ins_encode %{ 5942 int vector_len = 1; 5943 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5944 %} 5945 ins_pipe( pipe_slow ); 5946 %} 5947 5948 instruct vadd16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 5949 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 5950 match(Set dst (AddVF src1 src2)); 5951 format %{ "vaddps $dst,$src1,$src2\t! add packed16F" %} 5952 ins_encode %{ 5953 int vector_len = 2; 5954 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5955 %} 5956 ins_pipe( pipe_slow ); 5957 %} 5958 5959 instruct vadd16F_mem(vecZ dst, vecZ src, memory mem) %{ 5960 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 5961 match(Set dst (AddVF src (LoadVector mem))); 5962 format %{ "vaddps $dst,$src,$mem\t! add packed16F" %} 5963 ins_encode %{ 5964 int vector_len = 2; 5965 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5966 %} 5967 ins_pipe( pipe_slow ); 5968 %} 5969 5970 // Doubles vector add 5971 instruct vadd2D(vecX dst, vecX src) %{ 5972 predicate(n->as_Vector()->length() == 2); 5973 match(Set dst (AddVD dst src)); 5974 format %{ "addpd $dst,$src\t! add packed2D" %} 5975 ins_encode %{ 5976 __ addpd($dst$$XMMRegister, $src$$XMMRegister); 5977 %} 5978 ins_pipe( pipe_slow ); 5979 %} 5980 5981 instruct vadd2D_reg(vecX dst, vecX src1, vecX src2) %{ 5982 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 5983 match(Set dst (AddVD src1 src2)); 5984 format %{ "vaddpd $dst,$src1,$src2\t! add packed2D" %} 5985 ins_encode %{ 5986 int vector_len = 0; 5987 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5988 %} 5989 ins_pipe( pipe_slow ); 5990 %} 5991 5992 instruct vadd2D_mem(vecX dst, vecX src, memory mem) %{ 5993 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 5994 match(Set dst (AddVD src (LoadVector mem))); 5995 format %{ "vaddpd $dst,$src,$mem\t! add packed2D" %} 5996 ins_encode %{ 5997 int vector_len = 0; 5998 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5999 %} 6000 ins_pipe( pipe_slow ); 6001 %} 6002 6003 instruct vadd4D_reg(vecY dst, vecY src1, vecY src2) %{ 6004 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6005 match(Set dst (AddVD src1 src2)); 6006 format %{ "vaddpd $dst,$src1,$src2\t! add packed4D" %} 6007 ins_encode %{ 6008 int vector_len = 1; 6009 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6010 %} 6011 ins_pipe( pipe_slow ); 6012 %} 6013 6014 instruct vadd4D_mem(vecY dst, vecY src, memory mem) %{ 6015 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6016 match(Set dst (AddVD src (LoadVector mem))); 6017 format %{ "vaddpd $dst,$src,$mem\t! add packed4D" %} 6018 ins_encode %{ 6019 int vector_len = 1; 6020 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6021 %} 6022 ins_pipe( pipe_slow ); 6023 %} 6024 6025 instruct vadd8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6026 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6027 match(Set dst (AddVD src1 src2)); 6028 format %{ "vaddpd $dst,$src1,$src2\t! add packed8D" %} 6029 ins_encode %{ 6030 int vector_len = 2; 6031 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6032 %} 6033 ins_pipe( pipe_slow ); 6034 %} 6035 6036 instruct vadd8D_mem(vecZ dst, vecZ src, memory mem) %{ 6037 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6038 match(Set dst (AddVD src (LoadVector mem))); 6039 format %{ "vaddpd $dst,$src,$mem\t! add packed8D" %} 6040 ins_encode %{ 6041 int vector_len = 2; 6042 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6043 %} 6044 ins_pipe( pipe_slow ); 6045 %} 6046 6047 // --------------------------------- SUB -------------------------------------- 6048 6049 // Bytes vector sub 6050 instruct vsub4B(vecS dst, vecS src) %{ 6051 predicate(n->as_Vector()->length() == 4); 6052 match(Set dst (SubVB dst src)); 6053 format %{ "psubb $dst,$src\t! sub packed4B" %} 6054 ins_encode %{ 6055 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 6056 %} 6057 ins_pipe( pipe_slow ); 6058 %} 6059 6060 instruct vsub4B_reg(vecS dst, vecS src1, vecS src2) %{ 6061 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6062 match(Set dst (SubVB src1 src2)); 6063 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} 6064 ins_encode %{ 6065 int vector_len = 0; 6066 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6067 %} 6068 ins_pipe( pipe_slow ); 6069 %} 6070 6071 instruct vsub4B_mem(vecS dst, vecS src, memory mem) %{ 6072 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6073 match(Set dst (SubVB src (LoadVector mem))); 6074 format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %} 6075 ins_encode %{ 6076 int vector_len = 0; 6077 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6078 %} 6079 ins_pipe( pipe_slow ); 6080 %} 6081 6082 instruct vsub8B(vecD dst, vecD src) %{ 6083 predicate(n->as_Vector()->length() == 8); 6084 match(Set dst (SubVB dst src)); 6085 format %{ "psubb $dst,$src\t! sub packed8B" %} 6086 ins_encode %{ 6087 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 6088 %} 6089 ins_pipe( pipe_slow ); 6090 %} 6091 6092 instruct vsub8B_reg(vecD dst, vecD src1, vecD src2) %{ 6093 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6094 match(Set dst (SubVB src1 src2)); 6095 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} 6096 ins_encode %{ 6097 int vector_len = 0; 6098 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6099 %} 6100 ins_pipe( pipe_slow ); 6101 %} 6102 6103 instruct vsub8B_mem(vecD dst, vecD src, memory mem) %{ 6104 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6105 match(Set dst (SubVB src (LoadVector mem))); 6106 format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %} 6107 ins_encode %{ 6108 int vector_len = 0; 6109 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6110 %} 6111 ins_pipe( pipe_slow ); 6112 %} 6113 6114 instruct vsub16B(vecX dst, vecX src) %{ 6115 predicate(n->as_Vector()->length() == 16); 6116 match(Set dst (SubVB dst src)); 6117 format %{ "psubb $dst,$src\t! sub packed16B" %} 6118 ins_encode %{ 6119 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 6120 %} 6121 ins_pipe( pipe_slow ); 6122 %} 6123 6124 instruct vsub16B_reg(vecX dst, vecX src1, vecX src2) %{ 6125 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 6126 match(Set dst (SubVB src1 src2)); 6127 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} 6128 ins_encode %{ 6129 int vector_len = 0; 6130 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6131 %} 6132 ins_pipe( pipe_slow ); 6133 %} 6134 6135 instruct vsub16B_mem(vecX dst, vecX src, memory mem) %{ 6136 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 6137 match(Set dst (SubVB src (LoadVector mem))); 6138 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} 6139 ins_encode %{ 6140 int vector_len = 0; 6141 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6142 %} 6143 ins_pipe( pipe_slow ); 6144 %} 6145 6146 instruct vsub32B_reg(vecY dst, vecY src1, vecY src2) %{ 6147 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 6148 match(Set dst (SubVB src1 src2)); 6149 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} 6150 ins_encode %{ 6151 int vector_len = 1; 6152 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6153 %} 6154 ins_pipe( pipe_slow ); 6155 %} 6156 6157 instruct vsub32B_mem(vecY dst, vecY src, memory mem) %{ 6158 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 6159 match(Set dst (SubVB src (LoadVector mem))); 6160 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} 6161 ins_encode %{ 6162 int vector_len = 1; 6163 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6164 %} 6165 ins_pipe( pipe_slow ); 6166 %} 6167 6168 instruct vsub64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6169 predicate(UseAVX > 2 && n->as_Vector()->length() == 64); 6170 match(Set dst (SubVB src1 src2)); 6171 format %{ "vpsubb $dst,$src1,$src2\t! sub packed64B" %} 6172 ins_encode %{ 6173 int vector_len = 2; 6174 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6175 %} 6176 ins_pipe( pipe_slow ); 6177 %} 6178 6179 instruct vsub64B_mem(vecZ dst, vecZ src, memory mem) %{ 6180 predicate(UseAVX > 2 && n->as_Vector()->length() == 64); 6181 match(Set dst (SubVB src (LoadVector mem))); 6182 format %{ "vpsubb $dst,$src,$mem\t! sub packed64B" %} 6183 ins_encode %{ 6184 int vector_len = 2; 6185 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6186 %} 6187 ins_pipe( pipe_slow ); 6188 %} 6189 6190 // Shorts/Chars vector sub 6191 instruct vsub2S(vecS dst, vecS src) %{ 6192 predicate(n->as_Vector()->length() == 2); 6193 match(Set dst (SubVS dst src)); 6194 format %{ "psubw $dst,$src\t! sub packed2S" %} 6195 ins_encode %{ 6196 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 6197 %} 6198 ins_pipe( pipe_slow ); 6199 %} 6200 6201 instruct vsub2S_reg(vecS dst, vecS src1, vecS src2) %{ 6202 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6203 match(Set dst (SubVS src1 src2)); 6204 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} 6205 ins_encode %{ 6206 int vector_len = 0; 6207 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6208 %} 6209 ins_pipe( pipe_slow ); 6210 %} 6211 6212 instruct vsub2S_mem(vecS dst, vecS src, memory mem) %{ 6213 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6214 match(Set dst (SubVS src (LoadVector mem))); 6215 format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %} 6216 ins_encode %{ 6217 int vector_len = 0; 6218 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6219 %} 6220 ins_pipe( pipe_slow ); 6221 %} 6222 6223 instruct vsub4S(vecD dst, vecD src) %{ 6224 predicate(n->as_Vector()->length() == 4); 6225 match(Set dst (SubVS dst src)); 6226 format %{ "psubw $dst,$src\t! sub packed4S" %} 6227 ins_encode %{ 6228 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 6229 %} 6230 ins_pipe( pipe_slow ); 6231 %} 6232 6233 instruct vsub4S_reg(vecD dst, vecD src1, vecD src2) %{ 6234 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6235 match(Set dst (SubVS src1 src2)); 6236 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} 6237 ins_encode %{ 6238 int vector_len = 0; 6239 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6240 %} 6241 ins_pipe( pipe_slow ); 6242 %} 6243 6244 instruct vsub4S_mem(vecD dst, vecD src, memory mem) %{ 6245 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6246 match(Set dst (SubVS src (LoadVector mem))); 6247 format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %} 6248 ins_encode %{ 6249 int vector_len = 0; 6250 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6251 %} 6252 ins_pipe( pipe_slow ); 6253 %} 6254 6255 instruct vsub8S(vecX dst, vecX src) %{ 6256 predicate(n->as_Vector()->length() == 8); 6257 match(Set dst (SubVS dst src)); 6258 format %{ "psubw $dst,$src\t! sub packed8S" %} 6259 ins_encode %{ 6260 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 6261 %} 6262 ins_pipe( pipe_slow ); 6263 %} 6264 6265 instruct vsub8S_reg(vecX dst, vecX src1, vecX src2) %{ 6266 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6267 match(Set dst (SubVS src1 src2)); 6268 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} 6269 ins_encode %{ 6270 int vector_len = 0; 6271 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6272 %} 6273 ins_pipe( pipe_slow ); 6274 %} 6275 6276 instruct vsub8S_mem(vecX dst, vecX src, memory mem) %{ 6277 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6278 match(Set dst (SubVS src (LoadVector mem))); 6279 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} 6280 ins_encode %{ 6281 int vector_len = 0; 6282 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6283 %} 6284 ins_pipe( pipe_slow ); 6285 %} 6286 6287 instruct vsub16S_reg(vecY dst, vecY src1, vecY src2) %{ 6288 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 6289 match(Set dst (SubVS src1 src2)); 6290 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} 6291 ins_encode %{ 6292 int vector_len = 1; 6293 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6294 %} 6295 ins_pipe( pipe_slow ); 6296 %} 6297 6298 instruct vsub16S_mem(vecY dst, vecY src, memory mem) %{ 6299 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 6300 match(Set dst (SubVS src (LoadVector mem))); 6301 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} 6302 ins_encode %{ 6303 int vector_len = 1; 6304 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6305 %} 6306 ins_pipe( pipe_slow ); 6307 %} 6308 6309 instruct vsub32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6310 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); 6311 match(Set dst (SubVS src1 src2)); 6312 format %{ "vpsubw $dst,$src1,$src2\t! sub packed32S" %} 6313 ins_encode %{ 6314 int vector_len = 2; 6315 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6316 %} 6317 ins_pipe( pipe_slow ); 6318 %} 6319 6320 instruct vsub32S_mem(vecZ dst, vecZ src, memory mem) %{ 6321 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); 6322 match(Set dst (SubVS src (LoadVector mem))); 6323 format %{ "vpsubw $dst,$src,$mem\t! sub packed32S" %} 6324 ins_encode %{ 6325 int vector_len = 2; 6326 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6327 %} 6328 ins_pipe( pipe_slow ); 6329 %} 6330 6331 // Integers vector sub 6332 instruct vsub2I(vecD dst, vecD src) %{ 6333 predicate(n->as_Vector()->length() == 2); 6334 match(Set dst (SubVI dst src)); 6335 format %{ "psubd $dst,$src\t! sub packed2I" %} 6336 ins_encode %{ 6337 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 6338 %} 6339 ins_pipe( pipe_slow ); 6340 %} 6341 6342 instruct vsub2I_reg(vecD dst, vecD src1, vecD src2) %{ 6343 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6344 match(Set dst (SubVI src1 src2)); 6345 format %{ "vpsubd $dst,$src1,$src2\t! sub packed2I" %} 6346 ins_encode %{ 6347 int vector_len = 0; 6348 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6349 %} 6350 ins_pipe( pipe_slow ); 6351 %} 6352 6353 instruct vsub2I_mem(vecD dst, vecD src, memory mem) %{ 6354 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6355 match(Set dst (SubVI src (LoadVector mem))); 6356 format %{ "vpsubd $dst,$src,$mem\t! sub packed2I" %} 6357 ins_encode %{ 6358 int vector_len = 0; 6359 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6360 %} 6361 ins_pipe( pipe_slow ); 6362 %} 6363 6364 instruct vsub4I(vecX dst, vecX src) %{ 6365 predicate(n->as_Vector()->length() == 4); 6366 match(Set dst (SubVI dst src)); 6367 format %{ "psubd $dst,$src\t! sub packed4I" %} 6368 ins_encode %{ 6369 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 6370 %} 6371 ins_pipe( pipe_slow ); 6372 %} 6373 6374 instruct vsub4I_reg(vecX dst, vecX src1, vecX src2) %{ 6375 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6376 match(Set dst (SubVI src1 src2)); 6377 format %{ "vpsubd $dst,$src1,$src2\t! sub packed4I" %} 6378 ins_encode %{ 6379 int vector_len = 0; 6380 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6381 %} 6382 ins_pipe( pipe_slow ); 6383 %} 6384 6385 instruct vsub4I_mem(vecX dst, vecX src, memory mem) %{ 6386 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6387 match(Set dst (SubVI src (LoadVector mem))); 6388 format %{ "vpsubd $dst,$src,$mem\t! sub packed4I" %} 6389 ins_encode %{ 6390 int vector_len = 0; 6391 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6392 %} 6393 ins_pipe( pipe_slow ); 6394 %} 6395 6396 instruct vsub8I_reg(vecY dst, vecY src1, vecY src2) %{ 6397 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 6398 match(Set dst (SubVI src1 src2)); 6399 format %{ "vpsubd $dst,$src1,$src2\t! sub packed8I" %} 6400 ins_encode %{ 6401 int vector_len = 1; 6402 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6403 %} 6404 ins_pipe( pipe_slow ); 6405 %} 6406 6407 instruct vsub8I_mem(vecY dst, vecY src, memory mem) %{ 6408 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 6409 match(Set dst (SubVI src (LoadVector mem))); 6410 format %{ "vpsubd $dst,$src,$mem\t! sub packed8I" %} 6411 ins_encode %{ 6412 int vector_len = 1; 6413 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6414 %} 6415 ins_pipe( pipe_slow ); 6416 %} 6417 6418 instruct vsub16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6419 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6420 match(Set dst (SubVI src1 src2)); 6421 format %{ "vpsubd $dst,$src1,$src2\t! sub packed16I" %} 6422 ins_encode %{ 6423 int vector_len = 2; 6424 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6425 %} 6426 ins_pipe( pipe_slow ); 6427 %} 6428 6429 instruct vsub16I_mem(vecZ dst, vecZ src, memory mem) %{ 6430 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6431 match(Set dst (SubVI src (LoadVector mem))); 6432 format %{ "vpsubd $dst,$src,$mem\t! sub packed16I" %} 6433 ins_encode %{ 6434 int vector_len = 2; 6435 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6436 %} 6437 ins_pipe( pipe_slow ); 6438 %} 6439 6440 // Longs vector sub 6441 instruct vsub2L(vecX dst, vecX src) %{ 6442 predicate(n->as_Vector()->length() == 2); 6443 match(Set dst (SubVL dst src)); 6444 format %{ "psubq $dst,$src\t! sub packed2L" %} 6445 ins_encode %{ 6446 __ psubq($dst$$XMMRegister, $src$$XMMRegister); 6447 %} 6448 ins_pipe( pipe_slow ); 6449 %} 6450 6451 instruct vsub2L_reg(vecX dst, vecX src1, vecX src2) %{ 6452 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6453 match(Set dst (SubVL src1 src2)); 6454 format %{ "vpsubq $dst,$src1,$src2\t! sub packed2L" %} 6455 ins_encode %{ 6456 int vector_len = 0; 6457 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6458 %} 6459 ins_pipe( pipe_slow ); 6460 %} 6461 6462 instruct vsub2L_mem(vecX dst, vecX src, memory mem) %{ 6463 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6464 match(Set dst (SubVL src (LoadVector mem))); 6465 format %{ "vpsubq $dst,$src,$mem\t! sub packed2L" %} 6466 ins_encode %{ 6467 int vector_len = 0; 6468 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6469 %} 6470 ins_pipe( pipe_slow ); 6471 %} 6472 6473 instruct vsub4L_reg(vecY dst, vecY src1, vecY src2) %{ 6474 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 6475 match(Set dst (SubVL src1 src2)); 6476 format %{ "vpsubq $dst,$src1,$src2\t! sub packed4L" %} 6477 ins_encode %{ 6478 int vector_len = 1; 6479 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6480 %} 6481 ins_pipe( pipe_slow ); 6482 %} 6483 6484 instruct vsub4L_mem(vecY dst, vecY src, memory mem) %{ 6485 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 6486 match(Set dst (SubVL src (LoadVector mem))); 6487 format %{ "vpsubq $dst,$src,$mem\t! sub packed4L" %} 6488 ins_encode %{ 6489 int vector_len = 1; 6490 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6491 %} 6492 ins_pipe( pipe_slow ); 6493 %} 6494 6495 instruct vsub8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6496 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6497 match(Set dst (SubVL src1 src2)); 6498 format %{ "vpsubq $dst,$src1,$src2\t! sub packed8L" %} 6499 ins_encode %{ 6500 int vector_len = 2; 6501 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6502 %} 6503 ins_pipe( pipe_slow ); 6504 %} 6505 6506 instruct vsub8L_mem(vecZ dst, vecZ src, memory mem) %{ 6507 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6508 match(Set dst (SubVL src (LoadVector mem))); 6509 format %{ "vpsubq $dst,$src,$mem\t! sub packed8L" %} 6510 ins_encode %{ 6511 int vector_len = 2; 6512 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6513 %} 6514 ins_pipe( pipe_slow ); 6515 %} 6516 6517 // Floats vector sub 6518 instruct vsub2F(vecD dst, vecD src) %{ 6519 predicate(n->as_Vector()->length() == 2); 6520 match(Set dst (SubVF dst src)); 6521 format %{ "subps $dst,$src\t! sub packed2F" %} 6522 ins_encode %{ 6523 __ subps($dst$$XMMRegister, $src$$XMMRegister); 6524 %} 6525 ins_pipe( pipe_slow ); 6526 %} 6527 6528 instruct vsub2F_reg(vecD dst, vecD src1, vecD src2) %{ 6529 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6530 match(Set dst (SubVF src1 src2)); 6531 format %{ "vsubps $dst,$src1,$src2\t! sub packed2F" %} 6532 ins_encode %{ 6533 int vector_len = 0; 6534 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6535 %} 6536 ins_pipe( pipe_slow ); 6537 %} 6538 6539 instruct vsub2F_mem(vecD dst, vecD src, memory mem) %{ 6540 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6541 match(Set dst (SubVF src (LoadVector mem))); 6542 format %{ "vsubps $dst,$src,$mem\t! sub packed2F" %} 6543 ins_encode %{ 6544 int vector_len = 0; 6545 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6546 %} 6547 ins_pipe( pipe_slow ); 6548 %} 6549 6550 instruct vsub4F(vecX dst, vecX src) %{ 6551 predicate(n->as_Vector()->length() == 4); 6552 match(Set dst (SubVF dst src)); 6553 format %{ "subps $dst,$src\t! sub packed4F" %} 6554 ins_encode %{ 6555 __ subps($dst$$XMMRegister, $src$$XMMRegister); 6556 %} 6557 ins_pipe( pipe_slow ); 6558 %} 6559 6560 instruct vsub4F_reg(vecX dst, vecX src1, vecX src2) %{ 6561 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6562 match(Set dst (SubVF src1 src2)); 6563 format %{ "vsubps $dst,$src1,$src2\t! sub packed4F" %} 6564 ins_encode %{ 6565 int vector_len = 0; 6566 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6567 %} 6568 ins_pipe( pipe_slow ); 6569 %} 6570 6571 instruct vsub4F_mem(vecX dst, vecX src, memory mem) %{ 6572 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6573 match(Set dst (SubVF src (LoadVector mem))); 6574 format %{ "vsubps $dst,$src,$mem\t! sub packed4F" %} 6575 ins_encode %{ 6576 int vector_len = 0; 6577 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6578 %} 6579 ins_pipe( pipe_slow ); 6580 %} 6581 6582 instruct vsub8F_reg(vecY dst, vecY src1, vecY src2) %{ 6583 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6584 match(Set dst (SubVF src1 src2)); 6585 format %{ "vsubps $dst,$src1,$src2\t! sub packed8F" %} 6586 ins_encode %{ 6587 int vector_len = 1; 6588 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6589 %} 6590 ins_pipe( pipe_slow ); 6591 %} 6592 6593 instruct vsub8F_mem(vecY dst, vecY src, memory mem) %{ 6594 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6595 match(Set dst (SubVF src (LoadVector mem))); 6596 format %{ "vsubps $dst,$src,$mem\t! sub packed8F" %} 6597 ins_encode %{ 6598 int vector_len = 1; 6599 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6600 %} 6601 ins_pipe( pipe_slow ); 6602 %} 6603 6604 instruct vsub16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6605 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6606 match(Set dst (SubVF src1 src2)); 6607 format %{ "vsubps $dst,$src1,$src2\t! sub packed16F" %} 6608 ins_encode %{ 6609 int vector_len = 2; 6610 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6611 %} 6612 ins_pipe( pipe_slow ); 6613 %} 6614 6615 instruct vsub16F_mem(vecZ dst, vecZ src, memory mem) %{ 6616 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6617 match(Set dst (SubVF src (LoadVector mem))); 6618 format %{ "vsubps $dst,$src,$mem\t! sub packed16F" %} 6619 ins_encode %{ 6620 int vector_len = 2; 6621 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6622 %} 6623 ins_pipe( pipe_slow ); 6624 %} 6625 6626 // Doubles vector sub 6627 instruct vsub2D(vecX dst, vecX src) %{ 6628 predicate(n->as_Vector()->length() == 2); 6629 match(Set dst (SubVD dst src)); 6630 format %{ "subpd $dst,$src\t! sub packed2D" %} 6631 ins_encode %{ 6632 __ subpd($dst$$XMMRegister, $src$$XMMRegister); 6633 %} 6634 ins_pipe( pipe_slow ); 6635 %} 6636 6637 instruct vsub2D_reg(vecX dst, vecX src1, vecX src2) %{ 6638 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6639 match(Set dst (SubVD src1 src2)); 6640 format %{ "vsubpd $dst,$src1,$src2\t! sub packed2D" %} 6641 ins_encode %{ 6642 int vector_len = 0; 6643 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6644 %} 6645 ins_pipe( pipe_slow ); 6646 %} 6647 6648 instruct vsub2D_mem(vecX dst, vecX src, memory mem) %{ 6649 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6650 match(Set dst (SubVD src (LoadVector mem))); 6651 format %{ "vsubpd $dst,$src,$mem\t! sub packed2D" %} 6652 ins_encode %{ 6653 int vector_len = 0; 6654 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6655 %} 6656 ins_pipe( pipe_slow ); 6657 %} 6658 6659 instruct vsub4D_reg(vecY dst, vecY src1, vecY src2) %{ 6660 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6661 match(Set dst (SubVD src1 src2)); 6662 format %{ "vsubpd $dst,$src1,$src2\t! sub packed4D" %} 6663 ins_encode %{ 6664 int vector_len = 1; 6665 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6666 %} 6667 ins_pipe( pipe_slow ); 6668 %} 6669 6670 instruct vsub4D_mem(vecY dst, vecY src, memory mem) %{ 6671 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6672 match(Set dst (SubVD src (LoadVector mem))); 6673 format %{ "vsubpd $dst,$src,$mem\t! sub packed4D" %} 6674 ins_encode %{ 6675 int vector_len = 1; 6676 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6677 %} 6678 ins_pipe( pipe_slow ); 6679 %} 6680 6681 instruct vsub8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6682 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6683 match(Set dst (SubVD src1 src2)); 6684 format %{ "vsubpd $dst,$src1,$src2\t! sub packed8D" %} 6685 ins_encode %{ 6686 int vector_len = 2; 6687 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6688 %} 6689 ins_pipe( pipe_slow ); 6690 %} 6691 6692 instruct vsub8D_mem(vecZ dst, vecZ src, memory mem) %{ 6693 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6694 match(Set dst (SubVD src (LoadVector mem))); 6695 format %{ "vsubpd $dst,$src,$mem\t! sub packed8D" %} 6696 ins_encode %{ 6697 int vector_len = 2; 6698 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6699 %} 6700 ins_pipe( pipe_slow ); 6701 %} 6702 6703 // --------------------------------- MUL -------------------------------------- 6704 6705 // Shorts/Chars vector mul 6706 instruct vmul2S(vecS dst, vecS src) %{ 6707 predicate(n->as_Vector()->length() == 2); 6708 match(Set dst (MulVS dst src)); 6709 format %{ "pmullw $dst,$src\t! mul packed2S" %} 6710 ins_encode %{ 6711 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 6712 %} 6713 ins_pipe( pipe_slow ); 6714 %} 6715 6716 instruct vmul2S_reg(vecS dst, vecS src1, vecS src2) %{ 6717 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6718 match(Set dst (MulVS src1 src2)); 6719 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} 6720 ins_encode %{ 6721 int vector_len = 0; 6722 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6723 %} 6724 ins_pipe( pipe_slow ); 6725 %} 6726 6727 instruct vmul2S_mem(vecS dst, vecS src, memory mem) %{ 6728 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6729 match(Set dst (MulVS src (LoadVector mem))); 6730 format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %} 6731 ins_encode %{ 6732 int vector_len = 0; 6733 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6734 %} 6735 ins_pipe( pipe_slow ); 6736 %} 6737 6738 instruct vmul4S(vecD dst, vecD src) %{ 6739 predicate(n->as_Vector()->length() == 4); 6740 match(Set dst (MulVS dst src)); 6741 format %{ "pmullw $dst,$src\t! mul packed4S" %} 6742 ins_encode %{ 6743 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 6744 %} 6745 ins_pipe( pipe_slow ); 6746 %} 6747 6748 instruct vmul4S_reg(vecD dst, vecD src1, vecD src2) %{ 6749 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6750 match(Set dst (MulVS src1 src2)); 6751 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} 6752 ins_encode %{ 6753 int vector_len = 0; 6754 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6755 %} 6756 ins_pipe( pipe_slow ); 6757 %} 6758 6759 instruct vmul4S_mem(vecD dst, vecD src, memory mem) %{ 6760 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6761 match(Set dst (MulVS src (LoadVector mem))); 6762 format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %} 6763 ins_encode %{ 6764 int vector_len = 0; 6765 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6766 %} 6767 ins_pipe( pipe_slow ); 6768 %} 6769 6770 instruct vmul8S(vecX dst, vecX src) %{ 6771 predicate(n->as_Vector()->length() == 8); 6772 match(Set dst (MulVS dst src)); 6773 format %{ "pmullw $dst,$src\t! mul packed8S" %} 6774 ins_encode %{ 6775 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 6776 %} 6777 ins_pipe( pipe_slow ); 6778 %} 6779 6780 instruct vmul8S_reg(vecX dst, vecX src1, vecX src2) %{ 6781 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6782 match(Set dst (MulVS src1 src2)); 6783 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} 6784 ins_encode %{ 6785 int vector_len = 0; 6786 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6787 %} 6788 ins_pipe( pipe_slow ); 6789 %} 6790 6791 instruct vmul8S_mem(vecX dst, vecX src, memory mem) %{ 6792 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6793 match(Set dst (MulVS src (LoadVector mem))); 6794 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} 6795 ins_encode %{ 6796 int vector_len = 0; 6797 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6798 %} 6799 ins_pipe( pipe_slow ); 6800 %} 6801 6802 instruct vmul16S_reg(vecY dst, vecY src1, vecY src2) %{ 6803 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 6804 match(Set dst (MulVS src1 src2)); 6805 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} 6806 ins_encode %{ 6807 int vector_len = 1; 6808 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6809 %} 6810 ins_pipe( pipe_slow ); 6811 %} 6812 6813 instruct vmul16S_mem(vecY dst, vecY src, memory mem) %{ 6814 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 6815 match(Set dst (MulVS src (LoadVector mem))); 6816 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} 6817 ins_encode %{ 6818 int vector_len = 1; 6819 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6820 %} 6821 ins_pipe( pipe_slow ); 6822 %} 6823 6824 instruct vmul32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6825 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); 6826 match(Set dst (MulVS src1 src2)); 6827 format %{ "vpmullw $dst,$src1,$src2\t! mul packed32S" %} 6828 ins_encode %{ 6829 int vector_len = 2; 6830 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6831 %} 6832 ins_pipe( pipe_slow ); 6833 %} 6834 6835 instruct vmul32S_mem(vecZ dst, vecZ src, memory mem) %{ 6836 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); 6837 match(Set dst (MulVS src (LoadVector mem))); 6838 format %{ "vpmullw $dst,$src,$mem\t! mul packed32S" %} 6839 ins_encode %{ 6840 int vector_len = 2; 6841 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6842 %} 6843 ins_pipe( pipe_slow ); 6844 %} 6845 6846 // Integers vector mul (sse4_1) 6847 instruct vmul2I(vecD dst, vecD src) %{ 6848 predicate(UseSSE > 3 && n->as_Vector()->length() == 2); 6849 match(Set dst (MulVI dst src)); 6850 format %{ "pmulld $dst,$src\t! mul packed2I" %} 6851 ins_encode %{ 6852 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 6853 %} 6854 ins_pipe( pipe_slow ); 6855 %} 6856 6857 instruct vmul2I_reg(vecD dst, vecD src1, vecD src2) %{ 6858 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6859 match(Set dst (MulVI src1 src2)); 6860 format %{ "vpmulld $dst,$src1,$src2\t! mul packed2I" %} 6861 ins_encode %{ 6862 int vector_len = 0; 6863 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6864 %} 6865 ins_pipe( pipe_slow ); 6866 %} 6867 6868 instruct vmul2I_mem(vecD dst, vecD src, memory mem) %{ 6869 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6870 match(Set dst (MulVI src (LoadVector mem))); 6871 format %{ "vpmulld $dst,$src,$mem\t! mul packed2I" %} 6872 ins_encode %{ 6873 int vector_len = 0; 6874 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6875 %} 6876 ins_pipe( pipe_slow ); 6877 %} 6878 6879 instruct vmul4I(vecX dst, vecX src) %{ 6880 predicate(UseSSE > 3 && n->as_Vector()->length() == 4); 6881 match(Set dst (MulVI dst src)); 6882 format %{ "pmulld $dst,$src\t! mul packed4I" %} 6883 ins_encode %{ 6884 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 6885 %} 6886 ins_pipe( pipe_slow ); 6887 %} 6888 6889 instruct vmul4I_reg(vecX dst, vecX src1, vecX src2) %{ 6890 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6891 match(Set dst (MulVI src1 src2)); 6892 format %{ "vpmulld $dst,$src1,$src2\t! mul packed4I" %} 6893 ins_encode %{ 6894 int vector_len = 0; 6895 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6896 %} 6897 ins_pipe( pipe_slow ); 6898 %} 6899 6900 instruct vmul4I_mem(vecX dst, vecX src, memory mem) %{ 6901 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6902 match(Set dst (MulVI src (LoadVector mem))); 6903 format %{ "vpmulld $dst,$src,$mem\t! mul packed4I" %} 6904 ins_encode %{ 6905 int vector_len = 0; 6906 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6907 %} 6908 ins_pipe( pipe_slow ); 6909 %} 6910 6911 instruct vmul2L_reg(vecX dst, vecX src1, vecX src2) %{ 6912 predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && VM_Version::supports_avx512dq()); 6913 match(Set dst (MulVL src1 src2)); 6914 format %{ "vpmullq $dst,$src1,$src2\t! mul packed2L" %} 6915 ins_encode %{ 6916 int vector_len = 0; 6917 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6918 %} 6919 ins_pipe( pipe_slow ); 6920 %} 6921 6922 instruct vmul2L_mem(vecX dst, vecX src, memory mem) %{ 6923 predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && VM_Version::supports_avx512dq()); 6924 match(Set dst (MulVL src (LoadVector mem))); 6925 format %{ "vpmullq $dst,$src,$mem\t! mul packed2L" %} 6926 ins_encode %{ 6927 int vector_len = 0; 6928 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6929 %} 6930 ins_pipe( pipe_slow ); 6931 %} 6932 6933 instruct vmul4L_reg(vecY dst, vecY src1, vecY src2) %{ 6934 predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && VM_Version::supports_avx512dq()); 6935 match(Set dst (MulVL src1 src2)); 6936 format %{ "vpmullq $dst,$src1,$src2\t! mul packed4L" %} 6937 ins_encode %{ 6938 int vector_len = 1; 6939 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6940 %} 6941 ins_pipe( pipe_slow ); 6942 %} 6943 6944 instruct vmul4L_mem(vecY dst, vecY src, memory mem) %{ 6945 predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && VM_Version::supports_avx512dq()); 6946 match(Set dst (MulVL src (LoadVector mem))); 6947 format %{ "vpmullq $dst,$src,$mem\t! mul packed4L" %} 6948 ins_encode %{ 6949 int vector_len = 1; 6950 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6951 %} 6952 ins_pipe( pipe_slow ); 6953 %} 6954 6955 instruct vmul8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6956 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq()); 6957 match(Set dst (MulVL src1 src2)); 6958 format %{ "vpmullq $dst,$src1,$src2\t! mul packed8L" %} 6959 ins_encode %{ 6960 int vector_len = 2; 6961 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6962 %} 6963 ins_pipe( pipe_slow ); 6964 %} 6965 6966 instruct vmul8L_mem(vecZ dst, vecZ src, memory mem) %{ 6967 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq()); 6968 match(Set dst (MulVL src (LoadVector mem))); 6969 format %{ "vpmullq $dst,$src,$mem\t! mul packed8L" %} 6970 ins_encode %{ 6971 int vector_len = 2; 6972 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6973 %} 6974 ins_pipe( pipe_slow ); 6975 %} 6976 6977 instruct vmul8I_reg(vecY dst, vecY src1, vecY src2) %{ 6978 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 6979 match(Set dst (MulVI src1 src2)); 6980 format %{ "vpmulld $dst,$src1,$src2\t! mul packed8I" %} 6981 ins_encode %{ 6982 int vector_len = 1; 6983 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6984 %} 6985 ins_pipe( pipe_slow ); 6986 %} 6987 6988 instruct vmul8I_mem(vecY dst, vecY src, memory mem) %{ 6989 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 6990 match(Set dst (MulVI src (LoadVector mem))); 6991 format %{ "vpmulld $dst,$src,$mem\t! mul packed8I" %} 6992 ins_encode %{ 6993 int vector_len = 1; 6994 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6995 %} 6996 ins_pipe( pipe_slow ); 6997 %} 6998 6999 instruct vmul16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7000 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7001 match(Set dst (MulVI src1 src2)); 7002 format %{ "vpmulld $dst,$src1,$src2\t! mul packed16I" %} 7003 ins_encode %{ 7004 int vector_len = 2; 7005 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7006 %} 7007 ins_pipe( pipe_slow ); 7008 %} 7009 7010 instruct vmul16I_mem(vecZ dst, vecZ src, memory mem) %{ 7011 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7012 match(Set dst (MulVI src (LoadVector mem))); 7013 format %{ "vpmulld $dst,$src,$mem\t! mul packed16I" %} 7014 ins_encode %{ 7015 int vector_len = 2; 7016 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7017 %} 7018 ins_pipe( pipe_slow ); 7019 %} 7020 7021 // Floats vector mul 7022 instruct vmul2F(vecD dst, vecD src) %{ 7023 predicate(n->as_Vector()->length() == 2); 7024 match(Set dst (MulVF dst src)); 7025 format %{ "mulps $dst,$src\t! mul packed2F" %} 7026 ins_encode %{ 7027 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 7028 %} 7029 ins_pipe( pipe_slow ); 7030 %} 7031 7032 instruct vmul2F_reg(vecD dst, vecD src1, vecD src2) %{ 7033 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7034 match(Set dst (MulVF src1 src2)); 7035 format %{ "vmulps $dst,$src1,$src2\t! mul packed2F" %} 7036 ins_encode %{ 7037 int vector_len = 0; 7038 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7039 %} 7040 ins_pipe( pipe_slow ); 7041 %} 7042 7043 instruct vmul2F_mem(vecD dst, vecD src, memory mem) %{ 7044 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7045 match(Set dst (MulVF src (LoadVector mem))); 7046 format %{ "vmulps $dst,$src,$mem\t! mul packed2F" %} 7047 ins_encode %{ 7048 int vector_len = 0; 7049 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7050 %} 7051 ins_pipe( pipe_slow ); 7052 %} 7053 7054 instruct vmul4F(vecX dst, vecX src) %{ 7055 predicate(n->as_Vector()->length() == 4); 7056 match(Set dst (MulVF dst src)); 7057 format %{ "mulps $dst,$src\t! mul packed4F" %} 7058 ins_encode %{ 7059 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 7060 %} 7061 ins_pipe( pipe_slow ); 7062 %} 7063 7064 instruct vmul4F_reg(vecX dst, vecX src1, vecX src2) %{ 7065 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7066 match(Set dst (MulVF src1 src2)); 7067 format %{ "vmulps $dst,$src1,$src2\t! mul packed4F" %} 7068 ins_encode %{ 7069 int vector_len = 0; 7070 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7071 %} 7072 ins_pipe( pipe_slow ); 7073 %} 7074 7075 instruct vmul4F_mem(vecX dst, vecX src, memory mem) %{ 7076 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7077 match(Set dst (MulVF src (LoadVector mem))); 7078 format %{ "vmulps $dst,$src,$mem\t! mul packed4F" %} 7079 ins_encode %{ 7080 int vector_len = 0; 7081 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7082 %} 7083 ins_pipe( pipe_slow ); 7084 %} 7085 7086 instruct vmul8F_reg(vecY dst, vecY src1, vecY src2) %{ 7087 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7088 match(Set dst (MulVF src1 src2)); 7089 format %{ "vmulps $dst,$src1,$src2\t! mul packed8F" %} 7090 ins_encode %{ 7091 int vector_len = 1; 7092 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7093 %} 7094 ins_pipe( pipe_slow ); 7095 %} 7096 7097 instruct vmul8F_mem(vecY dst, vecY src, memory mem) %{ 7098 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7099 match(Set dst (MulVF src (LoadVector mem))); 7100 format %{ "vmulps $dst,$src,$mem\t! mul packed8F" %} 7101 ins_encode %{ 7102 int vector_len = 1; 7103 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7104 %} 7105 ins_pipe( pipe_slow ); 7106 %} 7107 7108 instruct vmul16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7109 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7110 match(Set dst (MulVF src1 src2)); 7111 format %{ "vmulps $dst,$src1,$src2\t! mul packed16F" %} 7112 ins_encode %{ 7113 int vector_len = 2; 7114 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7115 %} 7116 ins_pipe( pipe_slow ); 7117 %} 7118 7119 instruct vmul16F_mem(vecZ dst, vecZ src, memory mem) %{ 7120 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7121 match(Set dst (MulVF src (LoadVector mem))); 7122 format %{ "vmulps $dst,$src,$mem\t! mul packed16F" %} 7123 ins_encode %{ 7124 int vector_len = 2; 7125 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7126 %} 7127 ins_pipe( pipe_slow ); 7128 %} 7129 7130 // Doubles vector mul 7131 instruct vmul2D(vecX dst, vecX src) %{ 7132 predicate(n->as_Vector()->length() == 2); 7133 match(Set dst (MulVD dst src)); 7134 format %{ "mulpd $dst,$src\t! mul packed2D" %} 7135 ins_encode %{ 7136 __ mulpd($dst$$XMMRegister, $src$$XMMRegister); 7137 %} 7138 ins_pipe( pipe_slow ); 7139 %} 7140 7141 instruct vmul2D_reg(vecX dst, vecX src1, vecX src2) %{ 7142 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7143 match(Set dst (MulVD src1 src2)); 7144 format %{ "vmulpd $dst,$src1,$src2\t! mul packed2D" %} 7145 ins_encode %{ 7146 int vector_len = 0; 7147 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7148 %} 7149 ins_pipe( pipe_slow ); 7150 %} 7151 7152 instruct vmul2D_mem(vecX dst, vecX src, memory mem) %{ 7153 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7154 match(Set dst (MulVD src (LoadVector mem))); 7155 format %{ "vmulpd $dst,$src,$mem\t! mul packed2D" %} 7156 ins_encode %{ 7157 int vector_len = 0; 7158 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7159 %} 7160 ins_pipe( pipe_slow ); 7161 %} 7162 7163 instruct vmul4D_reg(vecY dst, vecY src1, vecY src2) %{ 7164 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7165 match(Set dst (MulVD src1 src2)); 7166 format %{ "vmulpd $dst,$src1,$src2\t! mul packed4D" %} 7167 ins_encode %{ 7168 int vector_len = 1; 7169 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7170 %} 7171 ins_pipe( pipe_slow ); 7172 %} 7173 7174 instruct vmul4D_mem(vecY dst, vecY src, memory mem) %{ 7175 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7176 match(Set dst (MulVD src (LoadVector mem))); 7177 format %{ "vmulpd $dst,$src,$mem\t! mul packed4D" %} 7178 ins_encode %{ 7179 int vector_len = 1; 7180 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7181 %} 7182 ins_pipe( pipe_slow ); 7183 %} 7184 7185 instruct vmul8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7186 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7187 match(Set dst (MulVD src1 src2)); 7188 format %{ "vmulpd $dst k0,$src1,$src2\t! mul packed8D" %} 7189 ins_encode %{ 7190 int vector_len = 2; 7191 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7192 %} 7193 ins_pipe( pipe_slow ); 7194 %} 7195 7196 instruct vmul8D_mem(vecZ dst, vecZ src, memory mem) %{ 7197 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7198 match(Set dst (MulVD src (LoadVector mem))); 7199 format %{ "vmulpd $dst k0,$src,$mem\t! mul packed8D" %} 7200 ins_encode %{ 7201 int vector_len = 2; 7202 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7203 %} 7204 ins_pipe( pipe_slow ); 7205 %} 7206 7207 // --------------------------------- DIV -------------------------------------- 7208 7209 // Floats vector div 7210 instruct vdiv2F(vecD dst, vecD src) %{ 7211 predicate(n->as_Vector()->length() == 2); 7212 match(Set dst (DivVF dst src)); 7213 format %{ "divps $dst,$src\t! div packed2F" %} 7214 ins_encode %{ 7215 __ divps($dst$$XMMRegister, $src$$XMMRegister); 7216 %} 7217 ins_pipe( pipe_slow ); 7218 %} 7219 7220 instruct vdiv2F_reg(vecD dst, vecD src1, vecD src2) %{ 7221 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7222 match(Set dst (DivVF src1 src2)); 7223 format %{ "vdivps $dst,$src1,$src2\t! div packed2F" %} 7224 ins_encode %{ 7225 int vector_len = 0; 7226 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7227 %} 7228 ins_pipe( pipe_slow ); 7229 %} 7230 7231 instruct vdiv2F_mem(vecD dst, vecD src, memory mem) %{ 7232 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7233 match(Set dst (DivVF src (LoadVector mem))); 7234 format %{ "vdivps $dst,$src,$mem\t! div packed2F" %} 7235 ins_encode %{ 7236 int vector_len = 0; 7237 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7238 %} 7239 ins_pipe( pipe_slow ); 7240 %} 7241 7242 instruct vdiv4F(vecX dst, vecX src) %{ 7243 predicate(n->as_Vector()->length() == 4); 7244 match(Set dst (DivVF dst src)); 7245 format %{ "divps $dst,$src\t! div packed4F" %} 7246 ins_encode %{ 7247 __ divps($dst$$XMMRegister, $src$$XMMRegister); 7248 %} 7249 ins_pipe( pipe_slow ); 7250 %} 7251 7252 instruct vdiv4F_reg(vecX dst, vecX src1, vecX src2) %{ 7253 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7254 match(Set dst (DivVF src1 src2)); 7255 format %{ "vdivps $dst,$src1,$src2\t! div packed4F" %} 7256 ins_encode %{ 7257 int vector_len = 0; 7258 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7259 %} 7260 ins_pipe( pipe_slow ); 7261 %} 7262 7263 instruct vdiv4F_mem(vecX dst, vecX src, memory mem) %{ 7264 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7265 match(Set dst (DivVF src (LoadVector mem))); 7266 format %{ "vdivps $dst,$src,$mem\t! div packed4F" %} 7267 ins_encode %{ 7268 int vector_len = 0; 7269 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7270 %} 7271 ins_pipe( pipe_slow ); 7272 %} 7273 7274 instruct vdiv8F_reg(vecY dst, vecY src1, vecY src2) %{ 7275 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7276 match(Set dst (DivVF src1 src2)); 7277 format %{ "vdivps $dst,$src1,$src2\t! div packed8F" %} 7278 ins_encode %{ 7279 int vector_len = 1; 7280 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7281 %} 7282 ins_pipe( pipe_slow ); 7283 %} 7284 7285 instruct vdiv8F_mem(vecY dst, vecY src, memory mem) %{ 7286 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7287 match(Set dst (DivVF src (LoadVector mem))); 7288 format %{ "vdivps $dst,$src,$mem\t! div packed8F" %} 7289 ins_encode %{ 7290 int vector_len = 1; 7291 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7292 %} 7293 ins_pipe( pipe_slow ); 7294 %} 7295 7296 instruct vdiv16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7297 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 7298 match(Set dst (DivVF src1 src2)); 7299 format %{ "vdivps $dst,$src1,$src2\t! div packed16F" %} 7300 ins_encode %{ 7301 int vector_len = 2; 7302 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7303 %} 7304 ins_pipe( pipe_slow ); 7305 %} 7306 7307 instruct vdiv16F_mem(vecZ dst, vecZ src, memory mem) %{ 7308 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 7309 match(Set dst (DivVF src (LoadVector mem))); 7310 format %{ "vdivps $dst,$src,$mem\t! div packed16F" %} 7311 ins_encode %{ 7312 int vector_len = 2; 7313 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7314 %} 7315 ins_pipe( pipe_slow ); 7316 %} 7317 7318 // Doubles vector div 7319 instruct vdiv2D(vecX dst, vecX src) %{ 7320 predicate(n->as_Vector()->length() == 2); 7321 match(Set dst (DivVD dst src)); 7322 format %{ "divpd $dst,$src\t! div packed2D" %} 7323 ins_encode %{ 7324 __ divpd($dst$$XMMRegister, $src$$XMMRegister); 7325 %} 7326 ins_pipe( pipe_slow ); 7327 %} 7328 7329 instruct vdiv2D_reg(vecX dst, vecX src1, vecX src2) %{ 7330 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7331 match(Set dst (DivVD src1 src2)); 7332 format %{ "vdivpd $dst,$src1,$src2\t! div packed2D" %} 7333 ins_encode %{ 7334 int vector_len = 0; 7335 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7336 %} 7337 ins_pipe( pipe_slow ); 7338 %} 7339 7340 instruct vdiv2D_mem(vecX dst, vecX src, memory mem) %{ 7341 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7342 match(Set dst (DivVD src (LoadVector mem))); 7343 format %{ "vdivpd $dst,$src,$mem\t! div packed2D" %} 7344 ins_encode %{ 7345 int vector_len = 0; 7346 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7347 %} 7348 ins_pipe( pipe_slow ); 7349 %} 7350 7351 instruct vdiv4D_reg(vecY dst, vecY src1, vecY src2) %{ 7352 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7353 match(Set dst (DivVD src1 src2)); 7354 format %{ "vdivpd $dst,$src1,$src2\t! div packed4D" %} 7355 ins_encode %{ 7356 int vector_len = 1; 7357 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7358 %} 7359 ins_pipe( pipe_slow ); 7360 %} 7361 7362 instruct vdiv4D_mem(vecY dst, vecY src, memory mem) %{ 7363 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7364 match(Set dst (DivVD src (LoadVector mem))); 7365 format %{ "vdivpd $dst,$src,$mem\t! div packed4D" %} 7366 ins_encode %{ 7367 int vector_len = 1; 7368 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7369 %} 7370 ins_pipe( pipe_slow ); 7371 %} 7372 7373 instruct vdiv8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7374 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7375 match(Set dst (DivVD src1 src2)); 7376 format %{ "vdivpd $dst,$src1,$src2\t! div packed8D" %} 7377 ins_encode %{ 7378 int vector_len = 2; 7379 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7380 %} 7381 ins_pipe( pipe_slow ); 7382 %} 7383 7384 instruct vdiv8D_mem(vecZ dst, vecZ src, memory mem) %{ 7385 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7386 match(Set dst (DivVD src (LoadVector mem))); 7387 format %{ "vdivpd $dst,$src,$mem\t! div packed8D" %} 7388 ins_encode %{ 7389 int vector_len = 2; 7390 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7391 %} 7392 ins_pipe( pipe_slow ); 7393 %} 7394 7395 // ------------------------------ Shift --------------------------------------- 7396 7397 // Left and right shift count vectors are the same on x86 7398 // (only lowest bits of xmm reg are used for count). 7399 instruct vshiftcnt(vecS dst, rRegI cnt) %{ 7400 match(Set dst (LShiftCntV cnt)); 7401 match(Set dst (RShiftCntV cnt)); 7402 format %{ "movd $dst,$cnt\t! load shift count" %} 7403 ins_encode %{ 7404 __ movdl($dst$$XMMRegister, $cnt$$Register); 7405 %} 7406 ins_pipe( pipe_slow ); 7407 %} 7408 7409 // ------------------------------ LeftShift ----------------------------------- 7410 7411 // Shorts/Chars vector left shift 7412 instruct vsll2S(vecS dst, vecS shift) %{ 7413 predicate(n->as_Vector()->length() == 2); 7414 match(Set dst (LShiftVS dst shift)); 7415 format %{ "psllw $dst,$shift\t! left shift packed2S" %} 7416 ins_encode %{ 7417 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 7418 %} 7419 ins_pipe( pipe_slow ); 7420 %} 7421 7422 instruct vsll2S_imm(vecS dst, immI8 shift) %{ 7423 predicate(n->as_Vector()->length() == 2); 7424 match(Set dst (LShiftVS dst shift)); 7425 format %{ "psllw $dst,$shift\t! left shift packed2S" %} 7426 ins_encode %{ 7427 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 7428 %} 7429 ins_pipe( pipe_slow ); 7430 %} 7431 7432 instruct vsll2S_reg(vecS dst, vecS src, vecS shift) %{ 7433 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7434 match(Set dst (LShiftVS src shift)); 7435 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 7436 ins_encode %{ 7437 int vector_len = 0; 7438 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7439 %} 7440 ins_pipe( pipe_slow ); 7441 %} 7442 7443 instruct vsll2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ 7444 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7445 match(Set dst (LShiftVS src shift)); 7446 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 7447 ins_encode %{ 7448 int vector_len = 0; 7449 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7450 %} 7451 ins_pipe( pipe_slow ); 7452 %} 7453 7454 instruct vsll4S(vecD dst, vecS shift) %{ 7455 predicate(n->as_Vector()->length() == 4); 7456 match(Set dst (LShiftVS dst shift)); 7457 format %{ "psllw $dst,$shift\t! left shift packed4S" %} 7458 ins_encode %{ 7459 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 7460 %} 7461 ins_pipe( pipe_slow ); 7462 %} 7463 7464 instruct vsll4S_imm(vecD dst, immI8 shift) %{ 7465 predicate(n->as_Vector()->length() == 4); 7466 match(Set dst (LShiftVS dst shift)); 7467 format %{ "psllw $dst,$shift\t! left shift packed4S" %} 7468 ins_encode %{ 7469 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 7470 %} 7471 ins_pipe( pipe_slow ); 7472 %} 7473 7474 instruct vsll4S_reg(vecD dst, vecD src, vecS shift) %{ 7475 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7476 match(Set dst (LShiftVS src shift)); 7477 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 7478 ins_encode %{ 7479 int vector_len = 0; 7480 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7481 %} 7482 ins_pipe( pipe_slow ); 7483 %} 7484 7485 instruct vsll4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ 7486 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7487 match(Set dst (LShiftVS src shift)); 7488 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 7489 ins_encode %{ 7490 int vector_len = 0; 7491 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7492 %} 7493 ins_pipe( pipe_slow ); 7494 %} 7495 7496 instruct vsll8S(vecX dst, vecS shift) %{ 7497 predicate(n->as_Vector()->length() == 8); 7498 match(Set dst (LShiftVS dst shift)); 7499 format %{ "psllw $dst,$shift\t! left shift packed8S" %} 7500 ins_encode %{ 7501 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 7502 %} 7503 ins_pipe( pipe_slow ); 7504 %} 7505 7506 instruct vsll8S_imm(vecX dst, immI8 shift) %{ 7507 predicate(n->as_Vector()->length() == 8); 7508 match(Set dst (LShiftVS dst shift)); 7509 format %{ "psllw $dst,$shift\t! left shift packed8S" %} 7510 ins_encode %{ 7511 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 7512 %} 7513 ins_pipe( pipe_slow ); 7514 %} 7515 7516 instruct vsll8S_reg(vecX dst, vecX src, vecS shift) %{ 7517 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7518 match(Set dst (LShiftVS src shift)); 7519 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 7520 ins_encode %{ 7521 int vector_len = 0; 7522 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7523 %} 7524 ins_pipe( pipe_slow ); 7525 %} 7526 7527 instruct vsll8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ 7528 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7529 match(Set dst (LShiftVS src shift)); 7530 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 7531 ins_encode %{ 7532 int vector_len = 0; 7533 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7534 %} 7535 ins_pipe( pipe_slow ); 7536 %} 7537 7538 instruct vsll16S_reg(vecY dst, vecY src, vecS shift) %{ 7539 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 7540 match(Set dst (LShiftVS src shift)); 7541 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 7542 ins_encode %{ 7543 int vector_len = 1; 7544 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7545 %} 7546 ins_pipe( pipe_slow ); 7547 %} 7548 7549 instruct vsll16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ 7550 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 7551 match(Set dst (LShiftVS src shift)); 7552 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 7553 ins_encode %{ 7554 int vector_len = 1; 7555 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7556 %} 7557 ins_pipe( pipe_slow ); 7558 %} 7559 7560 instruct vsll32S_reg(vecZ dst, vecZ src, vecS shift) %{ 7561 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); 7562 match(Set dst (LShiftVS src shift)); 7563 format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %} 7564 ins_encode %{ 7565 int vector_len = 2; 7566 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7567 %} 7568 ins_pipe( pipe_slow ); 7569 %} 7570 7571 instruct vsll32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 7572 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); 7573 match(Set dst (LShiftVS src shift)); 7574 format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %} 7575 ins_encode %{ 7576 int vector_len = 2; 7577 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7578 %} 7579 ins_pipe( pipe_slow ); 7580 %} 7581 7582 // Integers vector left shift 7583 instruct vsll2I(vecD dst, vecS shift) %{ 7584 predicate(n->as_Vector()->length() == 2); 7585 match(Set dst (LShiftVI dst shift)); 7586 format %{ "pslld $dst,$shift\t! left shift packed2I" %} 7587 ins_encode %{ 7588 __ pslld($dst$$XMMRegister, $shift$$XMMRegister); 7589 %} 7590 ins_pipe( pipe_slow ); 7591 %} 7592 7593 instruct vsll2I_imm(vecD dst, immI8 shift) %{ 7594 predicate(n->as_Vector()->length() == 2); 7595 match(Set dst (LShiftVI dst shift)); 7596 format %{ "pslld $dst,$shift\t! left shift packed2I" %} 7597 ins_encode %{ 7598 __ pslld($dst$$XMMRegister, (int)$shift$$constant); 7599 %} 7600 ins_pipe( pipe_slow ); 7601 %} 7602 7603 instruct vsll2I_reg(vecD dst, vecD src, vecS shift) %{ 7604 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7605 match(Set dst (LShiftVI src shift)); 7606 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} 7607 ins_encode %{ 7608 int vector_len = 0; 7609 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7610 %} 7611 ins_pipe( pipe_slow ); 7612 %} 7613 7614 instruct vsll2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 7615 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7616 match(Set dst (LShiftVI src shift)); 7617 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} 7618 ins_encode %{ 7619 int vector_len = 0; 7620 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7621 %} 7622 ins_pipe( pipe_slow ); 7623 %} 7624 7625 instruct vsll4I(vecX dst, vecS shift) %{ 7626 predicate(n->as_Vector()->length() == 4); 7627 match(Set dst (LShiftVI dst shift)); 7628 format %{ "pslld $dst,$shift\t! left shift packed4I" %} 7629 ins_encode %{ 7630 __ pslld($dst$$XMMRegister, $shift$$XMMRegister); 7631 %} 7632 ins_pipe( pipe_slow ); 7633 %} 7634 7635 instruct vsll4I_imm(vecX dst, immI8 shift) %{ 7636 predicate(n->as_Vector()->length() == 4); 7637 match(Set dst (LShiftVI dst shift)); 7638 format %{ "pslld $dst,$shift\t! left shift packed4I" %} 7639 ins_encode %{ 7640 __ pslld($dst$$XMMRegister, (int)$shift$$constant); 7641 %} 7642 ins_pipe( pipe_slow ); 7643 %} 7644 7645 instruct vsll4I_reg(vecX dst, vecX src, vecS shift) %{ 7646 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7647 match(Set dst (LShiftVI src shift)); 7648 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} 7649 ins_encode %{ 7650 int vector_len = 0; 7651 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7652 %} 7653 ins_pipe( pipe_slow ); 7654 %} 7655 7656 instruct vsll4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 7657 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7658 match(Set dst (LShiftVI src shift)); 7659 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} 7660 ins_encode %{ 7661 int vector_len = 0; 7662 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7663 %} 7664 ins_pipe( pipe_slow ); 7665 %} 7666 7667 instruct vsll8I_reg(vecY dst, vecY src, vecS shift) %{ 7668 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 7669 match(Set dst (LShiftVI src shift)); 7670 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} 7671 ins_encode %{ 7672 int vector_len = 1; 7673 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7674 %} 7675 ins_pipe( pipe_slow ); 7676 %} 7677 7678 instruct vsll8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 7679 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 7680 match(Set dst (LShiftVI src shift)); 7681 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} 7682 ins_encode %{ 7683 int vector_len = 1; 7684 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7685 %} 7686 ins_pipe( pipe_slow ); 7687 %} 7688 7689 instruct vsll16I_reg(vecZ dst, vecZ src, vecS shift) %{ 7690 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7691 match(Set dst (LShiftVI src shift)); 7692 format %{ "vpslld $dst,$src,$shift\t! left shift packed16I" %} 7693 ins_encode %{ 7694 int vector_len = 2; 7695 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7696 %} 7697 ins_pipe( pipe_slow ); 7698 %} 7699 7700 instruct vsll16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 7701 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7702 match(Set dst (LShiftVI src shift)); 7703 format %{ "vpslld $dst,$src,$shift\t! left shift packed16I" %} 7704 ins_encode %{ 7705 int vector_len = 2; 7706 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7707 %} 7708 ins_pipe( pipe_slow ); 7709 %} 7710 7711 // Longs vector left shift 7712 instruct vsll2L(vecX dst, vecS shift) %{ 7713 predicate(n->as_Vector()->length() == 2); 7714 match(Set dst (LShiftVL dst shift)); 7715 format %{ "psllq $dst,$shift\t! left shift packed2L" %} 7716 ins_encode %{ 7717 __ psllq($dst$$XMMRegister, $shift$$XMMRegister); 7718 %} 7719 ins_pipe( pipe_slow ); 7720 %} 7721 7722 instruct vsll2L_imm(vecX dst, immI8 shift) %{ 7723 predicate(n->as_Vector()->length() == 2); 7724 match(Set dst (LShiftVL dst shift)); 7725 format %{ "psllq $dst,$shift\t! left shift packed2L" %} 7726 ins_encode %{ 7727 __ psllq($dst$$XMMRegister, (int)$shift$$constant); 7728 %} 7729 ins_pipe( pipe_slow ); 7730 %} 7731 7732 instruct vsll2L_reg(vecX dst, vecX src, vecS shift) %{ 7733 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7734 match(Set dst (LShiftVL src shift)); 7735 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} 7736 ins_encode %{ 7737 int vector_len = 0; 7738 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7739 %} 7740 ins_pipe( pipe_slow ); 7741 %} 7742 7743 instruct vsll2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ 7744 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7745 match(Set dst (LShiftVL src shift)); 7746 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} 7747 ins_encode %{ 7748 int vector_len = 0; 7749 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7750 %} 7751 ins_pipe( pipe_slow ); 7752 %} 7753 7754 instruct vsll4L_reg(vecY dst, vecY src, vecS shift) %{ 7755 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 7756 match(Set dst (LShiftVL src shift)); 7757 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} 7758 ins_encode %{ 7759 int vector_len = 1; 7760 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7761 %} 7762 ins_pipe( pipe_slow ); 7763 %} 7764 7765 instruct vsll4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ 7766 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 7767 match(Set dst (LShiftVL src shift)); 7768 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} 7769 ins_encode %{ 7770 int vector_len = 1; 7771 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7772 %} 7773 ins_pipe( pipe_slow ); 7774 %} 7775 7776 instruct vsll8L_reg(vecZ dst, vecZ src, vecS shift) %{ 7777 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7778 match(Set dst (LShiftVL src shift)); 7779 format %{ "vpsllq $dst,$src,$shift\t! left shift packed8L" %} 7780 ins_encode %{ 7781 int vector_len = 2; 7782 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7783 %} 7784 ins_pipe( pipe_slow ); 7785 %} 7786 7787 instruct vsll8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 7788 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7789 match(Set dst (LShiftVL src shift)); 7790 format %{ "vpsllq $dst,$src,$shift\t! left shift packed8L" %} 7791 ins_encode %{ 7792 int vector_len = 2; 7793 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7794 %} 7795 ins_pipe( pipe_slow ); 7796 %} 7797 7798 // ----------------------- LogicalRightShift ----------------------------------- 7799 7800 // Shorts vector logical right shift produces incorrect Java result 7801 // for negative data because java code convert short value into int with 7802 // sign extension before a shift. But char vectors are fine since chars are 7803 // unsigned values. 7804 7805 instruct vsrl2S(vecS dst, vecS shift) %{ 7806 predicate(n->as_Vector()->length() == 2); 7807 match(Set dst (URShiftVS dst shift)); 7808 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} 7809 ins_encode %{ 7810 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 7811 %} 7812 ins_pipe( pipe_slow ); 7813 %} 7814 7815 instruct vsrl2S_imm(vecS dst, immI8 shift) %{ 7816 predicate(n->as_Vector()->length() == 2); 7817 match(Set dst (URShiftVS dst shift)); 7818 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} 7819 ins_encode %{ 7820 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 7821 %} 7822 ins_pipe( pipe_slow ); 7823 %} 7824 7825 instruct vsrl2S_reg(vecS dst, vecS src, vecS shift) %{ 7826 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7827 match(Set dst (URShiftVS src shift)); 7828 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 7829 ins_encode %{ 7830 int vector_len = 0; 7831 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7832 %} 7833 ins_pipe( pipe_slow ); 7834 %} 7835 7836 instruct vsrl2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ 7837 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7838 match(Set dst (URShiftVS src shift)); 7839 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 7840 ins_encode %{ 7841 int vector_len = 0; 7842 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7843 %} 7844 ins_pipe( pipe_slow ); 7845 %} 7846 7847 instruct vsrl4S(vecD dst, vecS shift) %{ 7848 predicate(n->as_Vector()->length() == 4); 7849 match(Set dst (URShiftVS dst shift)); 7850 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} 7851 ins_encode %{ 7852 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 7853 %} 7854 ins_pipe( pipe_slow ); 7855 %} 7856 7857 instruct vsrl4S_imm(vecD dst, immI8 shift) %{ 7858 predicate(n->as_Vector()->length() == 4); 7859 match(Set dst (URShiftVS dst shift)); 7860 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} 7861 ins_encode %{ 7862 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 7863 %} 7864 ins_pipe( pipe_slow ); 7865 %} 7866 7867 instruct vsrl4S_reg(vecD dst, vecD src, vecS shift) %{ 7868 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7869 match(Set dst (URShiftVS src shift)); 7870 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 7871 ins_encode %{ 7872 int vector_len = 0; 7873 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7874 %} 7875 ins_pipe( pipe_slow ); 7876 %} 7877 7878 instruct vsrl4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ 7879 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7880 match(Set dst (URShiftVS src shift)); 7881 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 7882 ins_encode %{ 7883 int vector_len = 0; 7884 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7885 %} 7886 ins_pipe( pipe_slow ); 7887 %} 7888 7889 instruct vsrl8S(vecX dst, vecS shift) %{ 7890 predicate(n->as_Vector()->length() == 8); 7891 match(Set dst (URShiftVS dst shift)); 7892 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} 7893 ins_encode %{ 7894 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 7895 %} 7896 ins_pipe( pipe_slow ); 7897 %} 7898 7899 instruct vsrl8S_imm(vecX dst, immI8 shift) %{ 7900 predicate(n->as_Vector()->length() == 8); 7901 match(Set dst (URShiftVS dst shift)); 7902 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} 7903 ins_encode %{ 7904 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 7905 %} 7906 ins_pipe( pipe_slow ); 7907 %} 7908 7909 instruct vsrl8S_reg(vecX dst, vecX src, vecS shift) %{ 7910 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7911 match(Set dst (URShiftVS src shift)); 7912 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 7913 ins_encode %{ 7914 int vector_len = 0; 7915 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7916 %} 7917 ins_pipe( pipe_slow ); 7918 %} 7919 7920 instruct vsrl8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ 7921 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7922 match(Set dst (URShiftVS src shift)); 7923 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 7924 ins_encode %{ 7925 int vector_len = 0; 7926 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7927 %} 7928 ins_pipe( pipe_slow ); 7929 %} 7930 7931 instruct vsrl16S_reg(vecY dst, vecY src, vecS shift) %{ 7932 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 7933 match(Set dst (URShiftVS src shift)); 7934 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 7935 ins_encode %{ 7936 int vector_len = 1; 7937 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7938 %} 7939 ins_pipe( pipe_slow ); 7940 %} 7941 7942 instruct vsrl16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ 7943 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 7944 match(Set dst (URShiftVS src shift)); 7945 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 7946 ins_encode %{ 7947 int vector_len = 1; 7948 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7949 %} 7950 ins_pipe( pipe_slow ); 7951 %} 7952 7953 instruct vsrl32S_reg(vecZ dst, vecZ src, vecS shift) %{ 7954 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); 7955 match(Set dst (URShiftVS src shift)); 7956 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %} 7957 ins_encode %{ 7958 int vector_len = 2; 7959 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7960 %} 7961 ins_pipe( pipe_slow ); 7962 %} 7963 7964 instruct vsrl32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 7965 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); 7966 match(Set dst (URShiftVS src shift)); 7967 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %} 7968 ins_encode %{ 7969 int vector_len = 2; 7970 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7971 %} 7972 ins_pipe( pipe_slow ); 7973 %} 7974 7975 // Integers vector logical right shift 7976 instruct vsrl2I(vecD dst, vecS shift) %{ 7977 predicate(n->as_Vector()->length() == 2); 7978 match(Set dst (URShiftVI dst shift)); 7979 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} 7980 ins_encode %{ 7981 __ psrld($dst$$XMMRegister, $shift$$XMMRegister); 7982 %} 7983 ins_pipe( pipe_slow ); 7984 %} 7985 7986 instruct vsrl2I_imm(vecD dst, immI8 shift) %{ 7987 predicate(n->as_Vector()->length() == 2); 7988 match(Set dst (URShiftVI dst shift)); 7989 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} 7990 ins_encode %{ 7991 __ psrld($dst$$XMMRegister, (int)$shift$$constant); 7992 %} 7993 ins_pipe( pipe_slow ); 7994 %} 7995 7996 instruct vsrl2I_reg(vecD dst, vecD src, vecS shift) %{ 7997 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7998 match(Set dst (URShiftVI src shift)); 7999 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} 8000 ins_encode %{ 8001 int vector_len = 0; 8002 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8003 %} 8004 ins_pipe( pipe_slow ); 8005 %} 8006 8007 instruct vsrl2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 8008 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8009 match(Set dst (URShiftVI src shift)); 8010 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} 8011 ins_encode %{ 8012 int vector_len = 0; 8013 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8014 %} 8015 ins_pipe( pipe_slow ); 8016 %} 8017 8018 instruct vsrl4I(vecX dst, vecS shift) %{ 8019 predicate(n->as_Vector()->length() == 4); 8020 match(Set dst (URShiftVI dst shift)); 8021 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} 8022 ins_encode %{ 8023 __ psrld($dst$$XMMRegister, $shift$$XMMRegister); 8024 %} 8025 ins_pipe( pipe_slow ); 8026 %} 8027 8028 instruct vsrl4I_imm(vecX dst, immI8 shift) %{ 8029 predicate(n->as_Vector()->length() == 4); 8030 match(Set dst (URShiftVI dst shift)); 8031 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} 8032 ins_encode %{ 8033 __ psrld($dst$$XMMRegister, (int)$shift$$constant); 8034 %} 8035 ins_pipe( pipe_slow ); 8036 %} 8037 8038 instruct vsrl4I_reg(vecX dst, vecX src, vecS shift) %{ 8039 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8040 match(Set dst (URShiftVI src shift)); 8041 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} 8042 ins_encode %{ 8043 int vector_len = 0; 8044 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8045 %} 8046 ins_pipe( pipe_slow ); 8047 %} 8048 8049 instruct vsrl4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 8050 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8051 match(Set dst (URShiftVI src shift)); 8052 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} 8053 ins_encode %{ 8054 int vector_len = 0; 8055 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8056 %} 8057 ins_pipe( pipe_slow ); 8058 %} 8059 8060 instruct vsrl8I_reg(vecY dst, vecY src, vecS shift) %{ 8061 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 8062 match(Set dst (URShiftVI src shift)); 8063 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} 8064 ins_encode %{ 8065 int vector_len = 1; 8066 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8067 %} 8068 ins_pipe( pipe_slow ); 8069 %} 8070 8071 instruct vsrl8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 8072 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 8073 match(Set dst (URShiftVI src shift)); 8074 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} 8075 ins_encode %{ 8076 int vector_len = 1; 8077 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8078 %} 8079 ins_pipe( pipe_slow ); 8080 %} 8081 8082 instruct vsrl16I_reg(vecZ dst, vecZ src, vecS shift) %{ 8083 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8084 match(Set dst (URShiftVI src shift)); 8085 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed16I" %} 8086 ins_encode %{ 8087 int vector_len = 2; 8088 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8089 %} 8090 ins_pipe( pipe_slow ); 8091 %} 8092 8093 instruct vsrl16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 8094 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8095 match(Set dst (URShiftVI src shift)); 8096 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed16I" %} 8097 ins_encode %{ 8098 int vector_len = 2; 8099 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8100 %} 8101 ins_pipe( pipe_slow ); 8102 %} 8103 8104 // Longs vector logical right shift 8105 instruct vsrl2L(vecX dst, vecS shift) %{ 8106 predicate(n->as_Vector()->length() == 2); 8107 match(Set dst (URShiftVL dst shift)); 8108 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} 8109 ins_encode %{ 8110 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister); 8111 %} 8112 ins_pipe( pipe_slow ); 8113 %} 8114 8115 instruct vsrl2L_imm(vecX dst, immI8 shift) %{ 8116 predicate(n->as_Vector()->length() == 2); 8117 match(Set dst (URShiftVL dst shift)); 8118 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} 8119 ins_encode %{ 8120 __ psrlq($dst$$XMMRegister, (int)$shift$$constant); 8121 %} 8122 ins_pipe( pipe_slow ); 8123 %} 8124 8125 instruct vsrl2L_reg(vecX dst, vecX src, vecS shift) %{ 8126 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8127 match(Set dst (URShiftVL src shift)); 8128 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} 8129 ins_encode %{ 8130 int vector_len = 0; 8131 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8132 %} 8133 ins_pipe( pipe_slow ); 8134 %} 8135 8136 instruct vsrl2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ 8137 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8138 match(Set dst (URShiftVL src shift)); 8139 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} 8140 ins_encode %{ 8141 int vector_len = 0; 8142 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8143 %} 8144 ins_pipe( pipe_slow ); 8145 %} 8146 8147 instruct vsrl4L_reg(vecY dst, vecY src, vecS shift) %{ 8148 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 8149 match(Set dst (URShiftVL src shift)); 8150 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} 8151 ins_encode %{ 8152 int vector_len = 1; 8153 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8154 %} 8155 ins_pipe( pipe_slow ); 8156 %} 8157 8158 instruct vsrl4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ 8159 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 8160 match(Set dst (URShiftVL src shift)); 8161 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} 8162 ins_encode %{ 8163 int vector_len = 1; 8164 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8165 %} 8166 ins_pipe( pipe_slow ); 8167 %} 8168 8169 instruct vsrl8L_reg(vecZ dst, vecZ src, vecS shift) %{ 8170 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8171 match(Set dst (URShiftVL src shift)); 8172 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed8L" %} 8173 ins_encode %{ 8174 int vector_len = 2; 8175 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8176 %} 8177 ins_pipe( pipe_slow ); 8178 %} 8179 8180 instruct vsrl8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 8181 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8182 match(Set dst (URShiftVL src shift)); 8183 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed8L" %} 8184 ins_encode %{ 8185 int vector_len = 2; 8186 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8187 %} 8188 ins_pipe( pipe_slow ); 8189 %} 8190 8191 // ------------------- ArithmeticRightShift ----------------------------------- 8192 8193 // Shorts/Chars vector arithmetic right shift 8194 instruct vsra2S(vecS dst, vecS shift) %{ 8195 predicate(n->as_Vector()->length() == 2); 8196 match(Set dst (RShiftVS dst shift)); 8197 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} 8198 ins_encode %{ 8199 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 8200 %} 8201 ins_pipe( pipe_slow ); 8202 %} 8203 8204 instruct vsra2S_imm(vecS dst, immI8 shift) %{ 8205 predicate(n->as_Vector()->length() == 2); 8206 match(Set dst (RShiftVS dst shift)); 8207 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} 8208 ins_encode %{ 8209 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 8210 %} 8211 ins_pipe( pipe_slow ); 8212 %} 8213 8214 instruct vsra2S_reg(vecS dst, vecS src, vecS shift) %{ 8215 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8216 match(Set dst (RShiftVS src shift)); 8217 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 8218 ins_encode %{ 8219 int vector_len = 0; 8220 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8221 %} 8222 ins_pipe( pipe_slow ); 8223 %} 8224 8225 instruct vsra2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ 8226 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8227 match(Set dst (RShiftVS src shift)); 8228 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 8229 ins_encode %{ 8230 int vector_len = 0; 8231 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8232 %} 8233 ins_pipe( pipe_slow ); 8234 %} 8235 8236 instruct vsra4S(vecD dst, vecS shift) %{ 8237 predicate(n->as_Vector()->length() == 4); 8238 match(Set dst (RShiftVS dst shift)); 8239 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} 8240 ins_encode %{ 8241 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 8242 %} 8243 ins_pipe( pipe_slow ); 8244 %} 8245 8246 instruct vsra4S_imm(vecD dst, immI8 shift) %{ 8247 predicate(n->as_Vector()->length() == 4); 8248 match(Set dst (RShiftVS dst shift)); 8249 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} 8250 ins_encode %{ 8251 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 8252 %} 8253 ins_pipe( pipe_slow ); 8254 %} 8255 8256 instruct vsra4S_reg(vecD dst, vecD src, vecS shift) %{ 8257 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8258 match(Set dst (RShiftVS src shift)); 8259 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 8260 ins_encode %{ 8261 int vector_len = 0; 8262 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8263 %} 8264 ins_pipe( pipe_slow ); 8265 %} 8266 8267 instruct vsra4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ 8268 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8269 match(Set dst (RShiftVS src shift)); 8270 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 8271 ins_encode %{ 8272 int vector_len = 0; 8273 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8274 %} 8275 ins_pipe( pipe_slow ); 8276 %} 8277 8278 instruct vsra8S(vecX dst, vecS shift) %{ 8279 predicate(n->as_Vector()->length() == 8); 8280 match(Set dst (RShiftVS dst shift)); 8281 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} 8282 ins_encode %{ 8283 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 8284 %} 8285 ins_pipe( pipe_slow ); 8286 %} 8287 8288 instruct vsra8S_imm(vecX dst, immI8 shift) %{ 8289 predicate(n->as_Vector()->length() == 8); 8290 match(Set dst (RShiftVS dst shift)); 8291 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} 8292 ins_encode %{ 8293 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 8294 %} 8295 ins_pipe( pipe_slow ); 8296 %} 8297 8298 instruct vsra8S_reg(vecX dst, vecX src, vecS shift) %{ 8299 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8300 match(Set dst (RShiftVS src shift)); 8301 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 8302 ins_encode %{ 8303 int vector_len = 0; 8304 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8305 %} 8306 ins_pipe( pipe_slow ); 8307 %} 8308 8309 instruct vsra8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ 8310 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8311 match(Set dst (RShiftVS src shift)); 8312 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 8313 ins_encode %{ 8314 int vector_len = 0; 8315 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8316 %} 8317 ins_pipe( pipe_slow ); 8318 %} 8319 8320 instruct vsra16S_reg(vecY dst, vecY src, vecS shift) %{ 8321 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 8322 match(Set dst (RShiftVS src shift)); 8323 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 8324 ins_encode %{ 8325 int vector_len = 1; 8326 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8327 %} 8328 ins_pipe( pipe_slow ); 8329 %} 8330 8331 instruct vsra16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ 8332 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 8333 match(Set dst (RShiftVS src shift)); 8334 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 8335 ins_encode %{ 8336 int vector_len = 1; 8337 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8338 %} 8339 ins_pipe( pipe_slow ); 8340 %} 8341 8342 instruct vsra32S_reg(vecZ dst, vecZ src, vecS shift) %{ 8343 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); 8344 match(Set dst (RShiftVS src shift)); 8345 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %} 8346 ins_encode %{ 8347 int vector_len = 2; 8348 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8349 %} 8350 ins_pipe( pipe_slow ); 8351 %} 8352 8353 instruct vsra32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 8354 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); 8355 match(Set dst (RShiftVS src shift)); 8356 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %} 8357 ins_encode %{ 8358 int vector_len = 2; 8359 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8360 %} 8361 ins_pipe( pipe_slow ); 8362 %} 8363 8364 // Integers vector arithmetic right shift 8365 instruct vsra2I(vecD dst, vecS shift) %{ 8366 predicate(n->as_Vector()->length() == 2); 8367 match(Set dst (RShiftVI dst shift)); 8368 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} 8369 ins_encode %{ 8370 __ psrad($dst$$XMMRegister, $shift$$XMMRegister); 8371 %} 8372 ins_pipe( pipe_slow ); 8373 %} 8374 8375 instruct vsra2I_imm(vecD dst, immI8 shift) %{ 8376 predicate(n->as_Vector()->length() == 2); 8377 match(Set dst (RShiftVI dst shift)); 8378 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} 8379 ins_encode %{ 8380 __ psrad($dst$$XMMRegister, (int)$shift$$constant); 8381 %} 8382 ins_pipe( pipe_slow ); 8383 %} 8384 8385 instruct vsra2I_reg(vecD dst, vecD src, vecS shift) %{ 8386 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8387 match(Set dst (RShiftVI src shift)); 8388 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} 8389 ins_encode %{ 8390 int vector_len = 0; 8391 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8392 %} 8393 ins_pipe( pipe_slow ); 8394 %} 8395 8396 instruct vsra2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 8397 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8398 match(Set dst (RShiftVI src shift)); 8399 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} 8400 ins_encode %{ 8401 int vector_len = 0; 8402 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8403 %} 8404 ins_pipe( pipe_slow ); 8405 %} 8406 8407 instruct vsra4I(vecX dst, vecS shift) %{ 8408 predicate(n->as_Vector()->length() == 4); 8409 match(Set dst (RShiftVI dst shift)); 8410 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} 8411 ins_encode %{ 8412 __ psrad($dst$$XMMRegister, $shift$$XMMRegister); 8413 %} 8414 ins_pipe( pipe_slow ); 8415 %} 8416 8417 instruct vsra4I_imm(vecX dst, immI8 shift) %{ 8418 predicate(n->as_Vector()->length() == 4); 8419 match(Set dst (RShiftVI dst shift)); 8420 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} 8421 ins_encode %{ 8422 __ psrad($dst$$XMMRegister, (int)$shift$$constant); 8423 %} 8424 ins_pipe( pipe_slow ); 8425 %} 8426 8427 instruct vsra4I_reg(vecX dst, vecX src, vecS shift) %{ 8428 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8429 match(Set dst (RShiftVI src shift)); 8430 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} 8431 ins_encode %{ 8432 int vector_len = 0; 8433 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8434 %} 8435 ins_pipe( pipe_slow ); 8436 %} 8437 8438 instruct vsra4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 8439 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8440 match(Set dst (RShiftVI src shift)); 8441 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} 8442 ins_encode %{ 8443 int vector_len = 0; 8444 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8445 %} 8446 ins_pipe( pipe_slow ); 8447 %} 8448 8449 instruct vsra8I_reg(vecY dst, vecY src, vecS shift) %{ 8450 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 8451 match(Set dst (RShiftVI src shift)); 8452 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} 8453 ins_encode %{ 8454 int vector_len = 1; 8455 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8456 %} 8457 ins_pipe( pipe_slow ); 8458 %} 8459 8460 instruct vsra8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 8461 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 8462 match(Set dst (RShiftVI src shift)); 8463 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} 8464 ins_encode %{ 8465 int vector_len = 1; 8466 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8467 %} 8468 ins_pipe( pipe_slow ); 8469 %} 8470 8471 instruct vsra16I_reg(vecZ dst, vecZ src, vecS shift) %{ 8472 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8473 match(Set dst (RShiftVI src shift)); 8474 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed16I" %} 8475 ins_encode %{ 8476 int vector_len = 2; 8477 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8478 %} 8479 ins_pipe( pipe_slow ); 8480 %} 8481 8482 instruct vsra16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 8483 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8484 match(Set dst (RShiftVI src shift)); 8485 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed16I" %} 8486 ins_encode %{ 8487 int vector_len = 2; 8488 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8489 %} 8490 ins_pipe( pipe_slow ); 8491 %} 8492 8493 // There are no longs vector arithmetic right shift instructions. 8494 8495 8496 // --------------------------------- AND -------------------------------------- 8497 8498 instruct vand4B(vecS dst, vecS src) %{ 8499 predicate(n->as_Vector()->length_in_bytes() == 4); 8500 match(Set dst (AndV dst src)); 8501 format %{ "pand $dst,$src\t! and vectors (4 bytes)" %} 8502 ins_encode %{ 8503 __ pand($dst$$XMMRegister, $src$$XMMRegister); 8504 %} 8505 ins_pipe( pipe_slow ); 8506 %} 8507 8508 instruct vand4B_reg(vecS dst, vecS src1, vecS src2) %{ 8509 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 8510 match(Set dst (AndV src1 src2)); 8511 format %{ "vpand $dst,$src1,$src2\t! and vectors (4 bytes)" %} 8512 ins_encode %{ 8513 int vector_len = 0; 8514 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8515 %} 8516 ins_pipe( pipe_slow ); 8517 %} 8518 8519 instruct vand4B_mem(vecS dst, vecS src, memory mem) %{ 8520 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 8521 match(Set dst (AndV src (LoadVector mem))); 8522 format %{ "vpand $dst,$src,$mem\t! and vectors (4 bytes)" %} 8523 ins_encode %{ 8524 int vector_len = 0; 8525 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8526 %} 8527 ins_pipe( pipe_slow ); 8528 %} 8529 8530 instruct vand8B(vecD dst, vecD src) %{ 8531 predicate(n->as_Vector()->length_in_bytes() == 8); 8532 match(Set dst (AndV dst src)); 8533 format %{ "pand $dst,$src\t! and vectors (8 bytes)" %} 8534 ins_encode %{ 8535 __ pand($dst$$XMMRegister, $src$$XMMRegister); 8536 %} 8537 ins_pipe( pipe_slow ); 8538 %} 8539 8540 instruct vand8B_reg(vecD dst, vecD src1, vecD src2) %{ 8541 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 8542 match(Set dst (AndV src1 src2)); 8543 format %{ "vpand $dst,$src1,$src2\t! and vectors (8 bytes)" %} 8544 ins_encode %{ 8545 int vector_len = 0; 8546 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8547 %} 8548 ins_pipe( pipe_slow ); 8549 %} 8550 8551 instruct vand8B_mem(vecD dst, vecD src, memory mem) %{ 8552 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 8553 match(Set dst (AndV src (LoadVector mem))); 8554 format %{ "vpand $dst,$src,$mem\t! and vectors (8 bytes)" %} 8555 ins_encode %{ 8556 int vector_len = 0; 8557 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8558 %} 8559 ins_pipe( pipe_slow ); 8560 %} 8561 8562 instruct vand16B(vecX dst, vecX src) %{ 8563 predicate(n->as_Vector()->length_in_bytes() == 16); 8564 match(Set dst (AndV dst src)); 8565 format %{ "pand $dst,$src\t! and vectors (16 bytes)" %} 8566 ins_encode %{ 8567 __ pand($dst$$XMMRegister, $src$$XMMRegister); 8568 %} 8569 ins_pipe( pipe_slow ); 8570 %} 8571 8572 instruct vand16B_reg(vecX dst, vecX src1, vecX src2) %{ 8573 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 8574 match(Set dst (AndV src1 src2)); 8575 format %{ "vpand $dst,$src1,$src2\t! and vectors (16 bytes)" %} 8576 ins_encode %{ 8577 int vector_len = 0; 8578 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8579 %} 8580 ins_pipe( pipe_slow ); 8581 %} 8582 8583 instruct vand16B_mem(vecX dst, vecX src, memory mem) %{ 8584 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 8585 match(Set dst (AndV src (LoadVector mem))); 8586 format %{ "vpand $dst,$src,$mem\t! and vectors (16 bytes)" %} 8587 ins_encode %{ 8588 int vector_len = 0; 8589 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8590 %} 8591 ins_pipe( pipe_slow ); 8592 %} 8593 8594 instruct vand32B_reg(vecY dst, vecY src1, vecY src2) %{ 8595 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 8596 match(Set dst (AndV src1 src2)); 8597 format %{ "vpand $dst,$src1,$src2\t! and vectors (32 bytes)" %} 8598 ins_encode %{ 8599 int vector_len = 1; 8600 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8601 %} 8602 ins_pipe( pipe_slow ); 8603 %} 8604 8605 instruct vand32B_mem(vecY dst, vecY src, memory mem) %{ 8606 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 8607 match(Set dst (AndV src (LoadVector mem))); 8608 format %{ "vpand $dst,$src,$mem\t! and vectors (32 bytes)" %} 8609 ins_encode %{ 8610 int vector_len = 1; 8611 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8612 %} 8613 ins_pipe( pipe_slow ); 8614 %} 8615 8616 instruct vand64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8617 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 8618 match(Set dst (AndV src1 src2)); 8619 format %{ "vpand $dst,$src1,$src2\t! and vectors (64 bytes)" %} 8620 ins_encode %{ 8621 int vector_len = 2; 8622 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8623 %} 8624 ins_pipe( pipe_slow ); 8625 %} 8626 8627 instruct vand64B_mem(vecZ dst, vecZ src, memory mem) %{ 8628 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 8629 match(Set dst (AndV src (LoadVector mem))); 8630 format %{ "vpand $dst,$src,$mem\t! and vectors (64 bytes)" %} 8631 ins_encode %{ 8632 int vector_len = 2; 8633 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8634 %} 8635 ins_pipe( pipe_slow ); 8636 %} 8637 8638 // --------------------------------- OR --------------------------------------- 8639 8640 instruct vor4B(vecS dst, vecS src) %{ 8641 predicate(n->as_Vector()->length_in_bytes() == 4); 8642 match(Set dst (OrV dst src)); 8643 format %{ "por $dst,$src\t! or vectors (4 bytes)" %} 8644 ins_encode %{ 8645 __ por($dst$$XMMRegister, $src$$XMMRegister); 8646 %} 8647 ins_pipe( pipe_slow ); 8648 %} 8649 8650 instruct vor4B_reg(vecS dst, vecS src1, vecS src2) %{ 8651 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 8652 match(Set dst (OrV src1 src2)); 8653 format %{ "vpor $dst,$src1,$src2\t! or vectors (4 bytes)" %} 8654 ins_encode %{ 8655 int vector_len = 0; 8656 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8657 %} 8658 ins_pipe( pipe_slow ); 8659 %} 8660 8661 instruct vor4B_mem(vecS dst, vecS src, memory mem) %{ 8662 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 8663 match(Set dst (OrV src (LoadVector mem))); 8664 format %{ "vpor $dst,$src,$mem\t! or vectors (4 bytes)" %} 8665 ins_encode %{ 8666 int vector_len = 0; 8667 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8668 %} 8669 ins_pipe( pipe_slow ); 8670 %} 8671 8672 instruct vor8B(vecD dst, vecD src) %{ 8673 predicate(n->as_Vector()->length_in_bytes() == 8); 8674 match(Set dst (OrV dst src)); 8675 format %{ "por $dst,$src\t! or vectors (8 bytes)" %} 8676 ins_encode %{ 8677 __ por($dst$$XMMRegister, $src$$XMMRegister); 8678 %} 8679 ins_pipe( pipe_slow ); 8680 %} 8681 8682 instruct vor8B_reg(vecD dst, vecD src1, vecD src2) %{ 8683 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 8684 match(Set dst (OrV src1 src2)); 8685 format %{ "vpor $dst,$src1,$src2\t! or vectors (8 bytes)" %} 8686 ins_encode %{ 8687 int vector_len = 0; 8688 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8689 %} 8690 ins_pipe( pipe_slow ); 8691 %} 8692 8693 instruct vor8B_mem(vecD dst, vecD src, memory mem) %{ 8694 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 8695 match(Set dst (OrV src (LoadVector mem))); 8696 format %{ "vpor $dst,$src,$mem\t! or vectors (8 bytes)" %} 8697 ins_encode %{ 8698 int vector_len = 0; 8699 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8700 %} 8701 ins_pipe( pipe_slow ); 8702 %} 8703 8704 instruct vor16B(vecX dst, vecX src) %{ 8705 predicate(n->as_Vector()->length_in_bytes() == 16); 8706 match(Set dst (OrV dst src)); 8707 format %{ "por $dst,$src\t! or vectors (16 bytes)" %} 8708 ins_encode %{ 8709 __ por($dst$$XMMRegister, $src$$XMMRegister); 8710 %} 8711 ins_pipe( pipe_slow ); 8712 %} 8713 8714 instruct vor16B_reg(vecX dst, vecX src1, vecX src2) %{ 8715 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 8716 match(Set dst (OrV src1 src2)); 8717 format %{ "vpor $dst,$src1,$src2\t! or vectors (16 bytes)" %} 8718 ins_encode %{ 8719 int vector_len = 0; 8720 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8721 %} 8722 ins_pipe( pipe_slow ); 8723 %} 8724 8725 instruct vor16B_mem(vecX dst, vecX src, memory mem) %{ 8726 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 8727 match(Set dst (OrV src (LoadVector mem))); 8728 format %{ "vpor $dst,$src,$mem\t! or vectors (16 bytes)" %} 8729 ins_encode %{ 8730 int vector_len = 0; 8731 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8732 %} 8733 ins_pipe( pipe_slow ); 8734 %} 8735 8736 instruct vor32B_reg(vecY dst, vecY src1, vecY src2) %{ 8737 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 8738 match(Set dst (OrV src1 src2)); 8739 format %{ "vpor $dst,$src1,$src2\t! or vectors (32 bytes)" %} 8740 ins_encode %{ 8741 int vector_len = 1; 8742 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8743 %} 8744 ins_pipe( pipe_slow ); 8745 %} 8746 8747 instruct vor32B_mem(vecY dst, vecY src, memory mem) %{ 8748 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 8749 match(Set dst (OrV src (LoadVector mem))); 8750 format %{ "vpor $dst,$src,$mem\t! or vectors (32 bytes)" %} 8751 ins_encode %{ 8752 int vector_len = 1; 8753 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8754 %} 8755 ins_pipe( pipe_slow ); 8756 %} 8757 8758 instruct vor64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8759 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 8760 match(Set dst (OrV src1 src2)); 8761 format %{ "vpor $dst,$src1,$src2\t! or vectors (64 bytes)" %} 8762 ins_encode %{ 8763 int vector_len = 2; 8764 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8765 %} 8766 ins_pipe( pipe_slow ); 8767 %} 8768 8769 instruct vor64B_mem(vecZ dst, vecZ src, memory mem) %{ 8770 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 8771 match(Set dst (OrV src (LoadVector mem))); 8772 format %{ "vpor $dst,$src,$mem\t! or vectors (64 bytes)" %} 8773 ins_encode %{ 8774 int vector_len = 2; 8775 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8776 %} 8777 ins_pipe( pipe_slow ); 8778 %} 8779 8780 // --------------------------------- XOR -------------------------------------- 8781 8782 instruct vxor4B(vecS dst, vecS src) %{ 8783 predicate(n->as_Vector()->length_in_bytes() == 4); 8784 match(Set dst (XorV dst src)); 8785 format %{ "pxor $dst,$src\t! xor vectors (4 bytes)" %} 8786 ins_encode %{ 8787 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 8788 %} 8789 ins_pipe( pipe_slow ); 8790 %} 8791 8792 instruct vxor4B_reg(vecS dst, vecS src1, vecS src2) %{ 8793 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 8794 match(Set dst (XorV src1 src2)); 8795 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (4 bytes)" %} 8796 ins_encode %{ 8797 int vector_len = 0; 8798 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8799 %} 8800 ins_pipe( pipe_slow ); 8801 %} 8802 8803 instruct vxor4B_mem(vecS dst, vecS src, memory mem) %{ 8804 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 8805 match(Set dst (XorV src (LoadVector mem))); 8806 format %{ "vpxor $dst,$src,$mem\t! xor vectors (4 bytes)" %} 8807 ins_encode %{ 8808 int vector_len = 0; 8809 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8810 %} 8811 ins_pipe( pipe_slow ); 8812 %} 8813 8814 instruct vxor8B(vecD dst, vecD src) %{ 8815 predicate(n->as_Vector()->length_in_bytes() == 8); 8816 match(Set dst (XorV dst src)); 8817 format %{ "pxor $dst,$src\t! xor vectors (8 bytes)" %} 8818 ins_encode %{ 8819 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 8820 %} 8821 ins_pipe( pipe_slow ); 8822 %} 8823 8824 instruct vxor8B_reg(vecD dst, vecD src1, vecD src2) %{ 8825 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 8826 match(Set dst (XorV src1 src2)); 8827 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (8 bytes)" %} 8828 ins_encode %{ 8829 int vector_len = 0; 8830 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8831 %} 8832 ins_pipe( pipe_slow ); 8833 %} 8834 8835 instruct vxor8B_mem(vecD dst, vecD src, memory mem) %{ 8836 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 8837 match(Set dst (XorV src (LoadVector mem))); 8838 format %{ "vpxor $dst,$src,$mem\t! xor vectors (8 bytes)" %} 8839 ins_encode %{ 8840 int vector_len = 0; 8841 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8842 %} 8843 ins_pipe( pipe_slow ); 8844 %} 8845 8846 instruct vxor16B(vecX dst, vecX src) %{ 8847 predicate(n->as_Vector()->length_in_bytes() == 16); 8848 match(Set dst (XorV dst src)); 8849 format %{ "pxor $dst,$src\t! xor vectors (16 bytes)" %} 8850 ins_encode %{ 8851 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 8852 %} 8853 ins_pipe( pipe_slow ); 8854 %} 8855 8856 instruct vxor16B_reg(vecX dst, vecX src1, vecX src2) %{ 8857 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 8858 match(Set dst (XorV src1 src2)); 8859 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (16 bytes)" %} 8860 ins_encode %{ 8861 int vector_len = 0; 8862 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8863 %} 8864 ins_pipe( pipe_slow ); 8865 %} 8866 8867 instruct vxor16B_mem(vecX dst, vecX src, memory mem) %{ 8868 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 8869 match(Set dst (XorV src (LoadVector mem))); 8870 format %{ "vpxor $dst,$src,$mem\t! xor vectors (16 bytes)" %} 8871 ins_encode %{ 8872 int vector_len = 0; 8873 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8874 %} 8875 ins_pipe( pipe_slow ); 8876 %} 8877 8878 instruct vxor32B_reg(vecY dst, vecY src1, vecY src2) %{ 8879 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 8880 match(Set dst (XorV src1 src2)); 8881 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (32 bytes)" %} 8882 ins_encode %{ 8883 int vector_len = 1; 8884 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8885 %} 8886 ins_pipe( pipe_slow ); 8887 %} 8888 8889 instruct vxor32B_mem(vecY dst, vecY src, memory mem) %{ 8890 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 8891 match(Set dst (XorV src (LoadVector mem))); 8892 format %{ "vpxor $dst,$src,$mem\t! xor vectors (32 bytes)" %} 8893 ins_encode %{ 8894 int vector_len = 1; 8895 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8896 %} 8897 ins_pipe( pipe_slow ); 8898 %} 8899 8900 instruct vxor64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8901 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 8902 match(Set dst (XorV src1 src2)); 8903 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (64 bytes)" %} 8904 ins_encode %{ 8905 int vector_len = 2; 8906 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8907 %} 8908 ins_pipe( pipe_slow ); 8909 %} 8910 8911 instruct vxor64B_mem(vecZ dst, vecZ src, memory mem) %{ 8912 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 8913 match(Set dst (XorV src (LoadVector mem))); 8914 format %{ "vpxor $dst,$src,$mem\t! xor vectors (64 bytes)" %} 8915 ins_encode %{ 8916 int vector_len = 2; 8917 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8918 %} 8919 ins_pipe( pipe_slow ); 8920 %} 8921