1 // 2 // Copyright (c) 2011, 2014, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Common Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // archtecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // XMM registers. 512-bit registers or 8 words each, labeled (a)-p. 63 // Word a in each register holds a Float, words ab hold a Double. 64 // The whole registers are used in SSE4.2 version intrinsics, 65 // array copy stubs and superword operations (see UseSSE42Intrinsics, 66 // UseXMMForArrayCopy and UseSuperword flags). 67 // For pre EVEX enabled architectures: 68 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX) 69 // For EVEX enabled architectures: 70 // XMM8-XMM31 must be encoded with REX (EVEX for UseAVX). 71 // 72 // Linux ABI: No register preserved across function calls 73 // XMM0-XMM7 might hold parameters 74 // Windows ABI: XMM6-XMM31 preserved across function calls 75 // XMM0-XMM3 might hold parameters 76 77 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()); 78 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1)); 79 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2)); 80 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3)); 81 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4)); 82 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5)); 83 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6)); 84 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7)); 85 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8)); 86 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9)); 87 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10)); 88 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11)); 89 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12)); 90 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13)); 91 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14)); 92 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15)); 93 94 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()); 95 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1)); 96 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2)); 97 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3)); 98 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4)); 99 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5)); 100 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6)); 101 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7)); 102 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8)); 103 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9)); 104 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10)); 105 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11)); 106 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12)); 107 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13)); 108 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14)); 109 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15)); 110 111 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()); 112 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1)); 113 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2)); 114 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3)); 115 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4)); 116 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5)); 117 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6)); 118 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7)); 119 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8)); 120 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9)); 121 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10)); 122 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11)); 123 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12)); 124 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13)); 125 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14)); 126 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15)); 127 128 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()); 129 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1)); 130 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2)); 131 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3)); 132 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4)); 133 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5)); 134 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6)); 135 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7)); 136 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8)); 137 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9)); 138 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10)); 139 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11)); 140 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12)); 141 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13)); 142 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14)); 143 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15)); 144 145 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()); 146 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1)); 147 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2)); 148 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3)); 149 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4)); 150 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5)); 151 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6)); 152 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7)); 153 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8)); 154 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9)); 155 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10)); 156 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11)); 157 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12)); 158 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13)); 159 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14)); 160 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15)); 161 162 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()); 163 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1)); 164 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2)); 165 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3)); 166 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4)); 167 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5)); 168 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6)); 169 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7)); 170 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8)); 171 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9)); 172 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10)); 173 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11)); 174 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12)); 175 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13)); 176 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14)); 177 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15)); 178 179 #ifdef _WIN64 180 181 reg_def XMM6 ( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()); 182 reg_def XMM6b( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(1)); 183 reg_def XMM6c( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(2)); 184 reg_def XMM6d( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(3)); 185 reg_def XMM6e( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(4)); 186 reg_def XMM6f( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(5)); 187 reg_def XMM6g( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(6)); 188 reg_def XMM6h( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(7)); 189 reg_def XMM6i( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(8)); 190 reg_def XMM6j( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(9)); 191 reg_def XMM6k( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(10)); 192 reg_def XMM6l( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(11)); 193 reg_def XMM6m( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(12)); 194 reg_def XMM6n( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(13)); 195 reg_def XMM6o( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(14)); 196 reg_def XMM6p( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(15)); 197 198 reg_def XMM7 ( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()); 199 reg_def XMM7b( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(1)); 200 reg_def XMM7c( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(2)); 201 reg_def XMM7d( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(3)); 202 reg_def XMM7e( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(4)); 203 reg_def XMM7f( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(5)); 204 reg_def XMM7g( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(6)); 205 reg_def XMM7h( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(7)); 206 reg_def XMM7i( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(8)); 207 reg_def XMM7j( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(9)); 208 reg_def XMM7k( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(10)); 209 reg_def XMM7l( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(11)); 210 reg_def XMM7m( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(12)); 211 reg_def XMM7n( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(13)); 212 reg_def XMM7o( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(14)); 213 reg_def XMM7p( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(15)); 214 215 reg_def XMM8 ( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()); 216 reg_def XMM8b( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(1)); 217 reg_def XMM8c( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(2)); 218 reg_def XMM8d( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(3)); 219 reg_def XMM8e( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(4)); 220 reg_def XMM8f( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(5)); 221 reg_def XMM8g( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(6)); 222 reg_def XMM8h( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(7)); 223 reg_def XMM8i( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(8)); 224 reg_def XMM8j( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(9)); 225 reg_def XMM8k( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(10)); 226 reg_def XMM8l( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(11)); 227 reg_def XMM8m( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(12)); 228 reg_def XMM8n( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(13)); 229 reg_def XMM8o( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(14)); 230 reg_def XMM8p( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(15)); 231 232 reg_def XMM9 ( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()); 233 reg_def XMM9b( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(1)); 234 reg_def XMM9c( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(2)); 235 reg_def XMM9d( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(3)); 236 reg_def XMM9e( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(4)); 237 reg_def XMM9f( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(5)); 238 reg_def XMM9g( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(6)); 239 reg_def XMM9h( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(7)); 240 reg_def XMM9i( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(8)); 241 reg_def XMM9j( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(9)); 242 reg_def XMM9k( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(10)); 243 reg_def XMM9l( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(11)); 244 reg_def XMM9m( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(12)); 245 reg_def XMM9n( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(13)); 246 reg_def XMM9o( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(14)); 247 reg_def XMM9p( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(15)); 248 249 reg_def XMM10 ( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()); 250 reg_def XMM10b( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(1)); 251 reg_def XMM10c( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(2)); 252 reg_def XMM10d( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(3)); 253 reg_def XMM10e( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(4)); 254 reg_def XMM10f( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(5)); 255 reg_def XMM10g( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(6)); 256 reg_def XMM10h( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(7)); 257 reg_def XMM10i( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(8)); 258 reg_def XMM10j( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(9)); 259 reg_def XMM10k( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(10)); 260 reg_def XMM10l( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(11)); 261 reg_def XMM10m( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(12)); 262 reg_def XMM10n( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(13)); 263 reg_def XMM10o( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(14)); 264 reg_def XMM10p( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(15)); 265 266 reg_def XMM11 ( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()); 267 reg_def XMM11b( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(1)); 268 reg_def XMM11c( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(2)); 269 reg_def XMM11d( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(3)); 270 reg_def XMM11e( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(4)); 271 reg_def XMM11f( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(5)); 272 reg_def XMM11g( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(6)); 273 reg_def XMM11h( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(7)); 274 reg_def XMM11i( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(8)); 275 reg_def XMM11j( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(9)); 276 reg_def XMM11k( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(10)); 277 reg_def XMM11l( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(11)); 278 reg_def XMM11m( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(12)); 279 reg_def XMM11n( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(13)); 280 reg_def XMM11o( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(14)); 281 reg_def XMM11p( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(15)); 282 283 reg_def XMM12 ( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()); 284 reg_def XMM12b( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(1)); 285 reg_def XMM12c( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(2)); 286 reg_def XMM12d( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(3)); 287 reg_def XMM12e( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(4)); 288 reg_def XMM12f( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(5)); 289 reg_def XMM12g( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(6)); 290 reg_def XMM12h( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(7)); 291 reg_def XMM12i( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(8)); 292 reg_def XMM12j( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(9)); 293 reg_def XMM12k( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(10)); 294 reg_def XMM12l( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(11)); 295 reg_def XMM12m( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(12)); 296 reg_def XMM12n( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(13)); 297 reg_def XMM12o( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(14)); 298 reg_def XMM12p( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(15)); 299 300 reg_def XMM13 ( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()); 301 reg_def XMM13b( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(1)); 302 reg_def XMM13c( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(2)); 303 reg_def XMM13d( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(3)); 304 reg_def XMM13e( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(4)); 305 reg_def XMM13f( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(5)); 306 reg_def XMM13g( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(6)); 307 reg_def XMM13h( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(7)); 308 reg_def XMM13i( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(8)); 309 reg_def XMM13j( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(9)); 310 reg_def XMM13k( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(10)); 311 reg_def XMM13l( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(11)); 312 reg_def XMM13m( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(12)); 313 reg_def XMM13n( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(13)); 314 reg_def XMM13o( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(14)); 315 reg_def XMM13p( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(15)); 316 317 reg_def XMM14 ( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()); 318 reg_def XMM14b( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(1)); 319 reg_def XMM14c( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(2)); 320 reg_def XMM14d( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(3)); 321 reg_def XMM14e( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(4)); 322 reg_def XMM14f( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(5)); 323 reg_def XMM14g( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(6)); 324 reg_def XMM14h( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(7)); 325 reg_def XMM14i( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(8)); 326 reg_def XMM14j( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(9)); 327 reg_def XMM14k( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(10)); 328 reg_def XMM14l( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(11)); 329 reg_def XMM14m( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(12)); 330 reg_def XMM14n( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(13)); 331 reg_def XMM14o( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(14)); 332 reg_def XMM14p( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(15)); 333 334 reg_def XMM15 ( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()); 335 reg_def XMM15b( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(1)); 336 reg_def XMM15c( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(2)); 337 reg_def XMM15d( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(3)); 338 reg_def XMM15e( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(4)); 339 reg_def XMM15f( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(5)); 340 reg_def XMM15g( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(6)); 341 reg_def XMM15h( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(7)); 342 reg_def XMM15i( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(8)); 343 reg_def XMM15j( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(9)); 344 reg_def XMM15k( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(10)); 345 reg_def XMM15l( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(11)); 346 reg_def XMM15m( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(12)); 347 reg_def XMM15n( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(13)); 348 reg_def XMM15o( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(14)); 349 reg_def XMM15p( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(15)); 350 351 reg_def XMM16 ( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()); 352 reg_def XMM16b( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(1)); 353 reg_def XMM16c( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(2)); 354 reg_def XMM16d( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(3)); 355 reg_def XMM16e( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(4)); 356 reg_def XMM16f( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(5)); 357 reg_def XMM16g( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(6)); 358 reg_def XMM16h( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(7)); 359 reg_def XMM16i( SOC, SOE, Op_RegF, 16, xmm15->as_VMReg()->next(8)); 360 reg_def XMM16j( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(9)); 361 reg_def XMM16k( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(10)); 362 reg_def XMM16l( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(11)); 363 reg_def XMM16m( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(12)); 364 reg_def XMM16n( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(13)); 365 reg_def XMM16o( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(14)); 366 reg_def XMM16p( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(15)); 367 368 reg_def XMM17 ( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()); 369 reg_def XMM17b( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(1)); 370 reg_def XMM17c( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(2)); 371 reg_def XMM17d( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(3)); 372 reg_def XMM17e( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(4)); 373 reg_def XMM17f( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(5)); 374 reg_def XMM17g( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(6)); 375 reg_def XMM17h( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(7)); 376 reg_def XMM17i( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(8)); 377 reg_def XMM17j( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(9)); 378 reg_def XMM17k( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(10)); 379 reg_def XMM17l( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(11)); 380 reg_def XMM17m( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(12)); 381 reg_def XMM17n( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(13)); 382 reg_def XMM17o( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(14)); 383 reg_def XMM17p( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(15)); 384 385 reg_def XMM18 ( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()); 386 reg_def XMM18b( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(1)); 387 reg_def XMM18c( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(2)); 388 reg_def XMM18d( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(3)); 389 reg_def XMM18e( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(4)); 390 reg_def XMM18f( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(5)); 391 reg_def XMM18g( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(6)); 392 reg_def XMM18h( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(7)); 393 reg_def XMM18i( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(8)); 394 reg_def XMM18j( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(9)); 395 reg_def XMM18k( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(10)); 396 reg_def XMM18l( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(11)); 397 reg_def XMM18m( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(12)); 398 reg_def XMM18n( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(13)); 399 reg_def XMM18o( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(14)); 400 reg_def XMM18p( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(15)); 401 402 reg_def XMM19 ( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()); 403 reg_def XMM19b( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(1)); 404 reg_def XMM19c( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(2)); 405 reg_def XMM19d( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(3)); 406 reg_def XMM19e( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(4)); 407 reg_def XMM19f( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(5)); 408 reg_def XMM19g( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(6)); 409 reg_def XMM19h( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(7)); 410 reg_def XMM19i( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(8)); 411 reg_def XMM19j( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(9)); 412 reg_def XMM19k( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(10)); 413 reg_def XMM19l( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(11)); 414 reg_def XMM19m( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(12)); 415 reg_def XMM19n( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(13)); 416 reg_def XMM19o( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(14)); 417 reg_def XMM19p( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(15)); 418 419 reg_def XMM20 ( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()); 420 reg_def XMM20b( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(1)); 421 reg_def XMM20c( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(2)); 422 reg_def XMM20d( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(3)); 423 reg_def XMM20e( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(4)); 424 reg_def XMM20f( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(5)); 425 reg_def XMM20g( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(6)); 426 reg_def XMM20h( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(7)); 427 reg_def XMM20i( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(8)); 428 reg_def XMM20j( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(9)); 429 reg_def XMM20k( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(10)); 430 reg_def XMM20l( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(11)); 431 reg_def XMM20m( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(12)); 432 reg_def XMM20n( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(13)); 433 reg_def XMM20o( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(14)); 434 reg_def XMM20p( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(15)); 435 436 reg_def XMM21 ( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()); 437 reg_def XMM21b( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(1)); 438 reg_def XMM21c( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(2)); 439 reg_def XMM21d( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(3)); 440 reg_def XMM21e( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(4)); 441 reg_def XMM21f( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(5)); 442 reg_def XMM21g( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(6)); 443 reg_def XMM21h( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(7)); 444 reg_def XMM21i( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(8)); 445 reg_def XMM21j( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(9)); 446 reg_def XMM21k( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(10)); 447 reg_def XMM21l( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(11)); 448 reg_def XMM21m( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(12)); 449 reg_def XMM21n( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(13)); 450 reg_def XMM21o( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(14)); 451 reg_def XMM21p( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(15)); 452 453 reg_def XMM22 ( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()); 454 reg_def XMM22b( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(1)); 455 reg_def XMM22c( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(2)); 456 reg_def XMM22d( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(3)); 457 reg_def XMM22e( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(4)); 458 reg_def XMM22f( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(5)); 459 reg_def XMM22g( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(6)); 460 reg_def XMM22h( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(7)); 461 reg_def XMM22i( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(8)); 462 reg_def XMM22j( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(9)); 463 reg_def XMM22k( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(10)); 464 reg_def XMM22l( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(11)); 465 reg_def XMM22m( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(12)); 466 reg_def XMM22n( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(13)); 467 reg_def XMM22o( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(14)); 468 reg_def XMM22p( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(15)); 469 470 reg_def XMM23 ( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()); 471 reg_def XMM23b( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(1)); 472 reg_def XMM23c( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(2)); 473 reg_def XMM23d( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(3)); 474 reg_def XMM23e( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(4)); 475 reg_def XMM23f( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(5)); 476 reg_def XMM23g( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(6)); 477 reg_def XMM23h( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(7)); 478 reg_def XMM23i( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(8)); 479 reg_def XMM23j( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(9)); 480 reg_def XMM23k( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(10)); 481 reg_def XMM23l( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(11)); 482 reg_def XMM23m( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(12)); 483 reg_def XMM23n( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(13)); 484 reg_def XMM23o( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(14)); 485 reg_def XMM23p( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(15)); 486 487 reg_def XMM24 ( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()); 488 reg_def XMM24b( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(1)); 489 reg_def XMM24c( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(2)); 490 reg_def XMM24d( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(3)); 491 reg_def XMM24e( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(4)); 492 reg_def XMM24f( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(5)); 493 reg_def XMM24g( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(6)); 494 reg_def XMM24h( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(7)); 495 reg_def XMM24i( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(8)); 496 reg_def XMM24j( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(9)); 497 reg_def XMM24k( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(10)); 498 reg_def XMM24l( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(11)); 499 reg_def XMM24m( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(12)); 500 reg_def XMM24n( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(13)); 501 reg_def XMM24o( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(14)); 502 reg_def XMM24p( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(15)); 503 504 reg_def XMM25 ( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()); 505 reg_def XMM25b( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(1)); 506 reg_def XMM25c( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(2)); 507 reg_def XMM25d( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(3)); 508 reg_def XMM25e( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(4)); 509 reg_def XMM25f( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(5)); 510 reg_def XMM25g( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(6)); 511 reg_def XMM25h( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(7)); 512 reg_def XMM25i( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(8)); 513 reg_def XMM25j( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(9)); 514 reg_def XMM25k( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(10)); 515 reg_def XMM25l( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(11)); 516 reg_def XMM25m( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(12)); 517 reg_def XMM25n( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(13)); 518 reg_def XMM25o( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(14)); 519 reg_def XMM25p( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(15)); 520 521 reg_def XMM26 ( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()); 522 reg_def XMM26b( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(1)); 523 reg_def XMM26c( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(2)); 524 reg_def XMM26d( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(3)); 525 reg_def XMM26e( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(4)); 526 reg_def XMM26f( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(5)); 527 reg_def XMM26g( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(6)); 528 reg_def XMM26h( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(7)); 529 reg_def XMM26i( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(8)); 530 reg_def XMM26j( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(9)); 531 reg_def XMM26k( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(10)); 532 reg_def XMM26l( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(11)); 533 reg_def XMM26m( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(12)); 534 reg_def XMM26n( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(13)); 535 reg_def XMM26o( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(14)); 536 reg_def XMM26p( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(15)); 537 538 reg_def XMM27g( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(1)); 539 reg_def XMM27c( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(2)); 540 reg_def XMM27d( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(3)); 541 reg_def XMM27e( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(4)); 542 reg_def XMM27f( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(5)); 543 reg_def XMM27g( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(6)); 544 reg_def XMM27h( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(7)); 545 reg_def XMM27i( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(8)); 546 reg_def XMM27j( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(9)); 547 reg_def XMM27k( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(10)); 548 reg_def XMM27l( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(11)); 549 reg_def XMM27m( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(12)); 550 reg_def XMM27n( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(13)); 551 reg_def XMM27o( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(14)); 552 reg_def XMM27p( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(15)); 553 554 reg_def XMM28 ( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()); 555 reg_def XMM28b( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(1)); 556 reg_def XMM28c( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(2)); 557 reg_def XMM28d( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(3)); 558 reg_def XMM28e( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(4)); 559 reg_def XMM28f( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(5)); 560 reg_def XMM28g( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(6)); 561 reg_def XMM28h( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(7)); 562 reg_def XMM28i( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(8)); 563 reg_def XMM28j( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(9)); 564 reg_def XMM28k( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(10)); 565 reg_def XMM28l( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(11)); 566 reg_def XMM28m( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(12)); 567 reg_def XMM28n( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(13)); 568 reg_def XMM28o( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(14)); 569 reg_def XMM28p( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(15)); 570 571 reg_def XMM29 ( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()); 572 reg_def XMM29b( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(1)); 573 reg_def XMM29c( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(2)); 574 reg_def XMM29d( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(3)); 575 reg_def XMM29e( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(4)); 576 reg_def XMM29f( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(5)); 577 reg_def XMM29g( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(6)); 578 reg_def XMM29h( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(7)); 579 reg_def XMM29i( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(8)); 580 reg_def XMM29j( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(9)); 581 reg_def XMM29k( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(10)); 582 reg_def XMM29l( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(11)); 583 reg_def XMM29m( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(12)); 584 reg_def XMM29n( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(13)); 585 reg_def XMM29o( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(14)); 586 reg_def XMM29p( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(15)); 587 588 reg_def XMM30 ( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()); 589 reg_def XMM30b( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(1)); 590 reg_def XMM30c( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(2)); 591 reg_def XMM30d( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(3)); 592 reg_def XMM30e( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(4)); 593 reg_def XMM30f( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(5)); 594 reg_def XMM30g( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(6)); 595 reg_def XMM30h( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(7)); 596 reg_def XMM30i( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(8)); 597 reg_def XMM30j( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(9)); 598 reg_def XMM30k( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(10)); 599 reg_def XMM30l( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(11)); 600 reg_def XMM30m( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(12)); 601 reg_def XMM30n( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(13)); 602 reg_def XMM30o( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(14)); 603 reg_def XMM30p( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(15)); 604 605 reg_def XMM31 ( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()); 606 reg_def XMM31b( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(1)); 607 reg_def XMM31c( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(2)); 608 reg_def XMM31d( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(3)); 609 reg_def XMM31e( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(4)); 610 reg_def XMM31f( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(5)); 611 reg_def XMM31g( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(6)); 612 reg_def XMM31h( SOC, SOE, Op_RegF, 31, xmm31>-as_VMReg()->next(7)); 613 reg_def XMM31i( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(8)); 614 reg_def XMM31j( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(9)); 615 reg_def XMM31k( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(10)); 616 reg_def XMM31l( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(11)); 617 reg_def XMM31m( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(12)); 618 reg_def XMM31n( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(13)); 619 reg_def XMM31o( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(14)); 620 reg_def XMM31p( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(15)); 621 622 #else // _WIN64 623 624 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()); 625 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1)); 626 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2)); 627 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3)); 628 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4)); 629 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5)); 630 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6)); 631 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7)); 632 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8)); 633 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9)); 634 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10)); 635 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11)); 636 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12)); 637 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13)); 638 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14)); 639 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15)); 640 641 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()); 642 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1)); 643 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2)); 644 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3)); 645 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4)); 646 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5)); 647 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6)); 648 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7)); 649 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8)); 650 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9)); 651 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10)); 652 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11)); 653 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12)); 654 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13)); 655 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14)); 656 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15)); 657 658 #ifdef _LP64 659 660 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()); 661 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1)); 662 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2)); 663 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3)); 664 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4)); 665 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5)); 666 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6)); 667 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7)); 668 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8)); 669 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9)); 670 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10)); 671 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11)); 672 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12)); 673 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13)); 674 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14)); 675 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15)); 676 677 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()); 678 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1)); 679 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2)); 680 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3)); 681 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4)); 682 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5)); 683 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6)); 684 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7)); 685 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8)); 686 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9)); 687 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10)); 688 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11)); 689 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12)); 690 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13)); 691 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14)); 692 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15)); 693 694 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()); 695 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1)); 696 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2)); 697 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3)); 698 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4)); 699 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5)); 700 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6)); 701 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7)); 702 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8)); 703 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9)); 704 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10)); 705 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11)); 706 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12)); 707 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13)); 708 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14)); 709 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15)); 710 711 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()); 712 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1)); 713 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2)); 714 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3)); 715 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4)); 716 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5)); 717 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6)); 718 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7)); 719 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8)); 720 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9)); 721 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10)); 722 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11)); 723 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12)); 724 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13)); 725 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14)); 726 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15)); 727 728 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()); 729 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1)); 730 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2)); 731 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3)); 732 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4)); 733 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5)); 734 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6)); 735 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7)); 736 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8)); 737 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9)); 738 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10)); 739 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11)); 740 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12)); 741 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13)); 742 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14)); 743 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15)); 744 745 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()); 746 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1)); 747 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2)); 748 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3)); 749 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4)); 750 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5)); 751 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6)); 752 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7)); 753 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8)); 754 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9)); 755 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10)); 756 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11)); 757 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12)); 758 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13)); 759 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14)); 760 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15)); 761 762 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()); 763 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1)); 764 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2)); 765 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3)); 766 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4)); 767 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5)); 768 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6)); 769 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7)); 770 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8)); 771 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9)); 772 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10)); 773 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11)); 774 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12)); 775 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13)); 776 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14)); 777 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15)); 778 779 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()); 780 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1)); 781 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2)); 782 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3)); 783 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4)); 784 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5)); 785 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6)); 786 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7)); 787 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8)); 788 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9)); 789 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10)); 790 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11)); 791 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12)); 792 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13)); 793 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14)); 794 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15)); 795 796 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()); 797 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1)); 798 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2)); 799 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3)); 800 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4)); 801 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5)); 802 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6)); 803 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7)); 804 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8)); 805 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9)); 806 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10)); 807 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11)); 808 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12)); 809 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13)); 810 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14)); 811 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15)); 812 813 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()); 814 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1)); 815 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2)); 816 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3)); 817 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4)); 818 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5)); 819 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6)); 820 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7)); 821 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8)); 822 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9)); 823 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10)); 824 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11)); 825 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12)); 826 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13)); 827 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14)); 828 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15)); 829 830 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()); 831 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1)); 832 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2)); 833 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3)); 834 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4)); 835 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5)); 836 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6)); 837 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7)); 838 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8)); 839 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9)); 840 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10)); 841 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11)); 842 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12)); 843 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13)); 844 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14)); 845 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15)); 846 847 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()); 848 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1)); 849 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2)); 850 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3)); 851 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4)); 852 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5)); 853 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6)); 854 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7)); 855 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8)); 856 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9)); 857 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10)); 858 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11)); 859 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12)); 860 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13)); 861 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14)); 862 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15)); 863 864 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()); 865 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1)); 866 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2)); 867 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3)); 868 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4)); 869 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5)); 870 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6)); 871 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7)); 872 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8)); 873 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9)); 874 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10)); 875 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11)); 876 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12)); 877 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13)); 878 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14)); 879 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15)); 880 881 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()); 882 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1)); 883 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2)); 884 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3)); 885 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4)); 886 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5)); 887 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6)); 888 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7)); 889 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8)); 890 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9)); 891 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10)); 892 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11)); 893 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12)); 894 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13)); 895 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14)); 896 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15)); 897 898 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()); 899 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1)); 900 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2)); 901 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3)); 902 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4)); 903 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5)); 904 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6)); 905 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7)); 906 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8)); 907 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9)); 908 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10)); 909 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11)); 910 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12)); 911 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13)); 912 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14)); 913 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15)); 914 915 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()); 916 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1)); 917 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2)); 918 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3)); 919 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4)); 920 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5)); 921 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6)); 922 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7)); 923 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8)); 924 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9)); 925 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10)); 926 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11)); 927 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12)); 928 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13)); 929 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14)); 930 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15)); 931 932 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()); 933 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1)); 934 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2)); 935 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3)); 936 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4)); 937 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5)); 938 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6)); 939 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7)); 940 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8)); 941 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9)); 942 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10)); 943 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11)); 944 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12)); 945 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13)); 946 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14)); 947 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15)); 948 949 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()); 950 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1)); 951 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2)); 952 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3)); 953 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4)); 954 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5)); 955 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6)); 956 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7)); 957 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8)); 958 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9)); 959 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10)); 960 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11)); 961 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12)); 962 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13)); 963 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14)); 964 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15)); 965 966 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()); 967 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1)); 968 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2)); 969 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3)); 970 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4)); 971 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5)); 972 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6)); 973 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7)); 974 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8)); 975 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9)); 976 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10)); 977 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11)); 978 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12)); 979 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13)); 980 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14)); 981 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15)); 982 983 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()); 984 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1)); 985 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2)); 986 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3)); 987 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4)); 988 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5)); 989 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6)); 990 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7)); 991 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8)); 992 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9)); 993 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10)); 994 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11)); 995 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12)); 996 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13)); 997 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14)); 998 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15)); 999 1000 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()); 1001 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1)); 1002 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2)); 1003 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3)); 1004 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4)); 1005 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5)); 1006 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6)); 1007 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7)); 1008 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8)); 1009 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9)); 1010 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10)); 1011 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11)); 1012 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12)); 1013 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13)); 1014 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14)); 1015 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15)); 1016 1017 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()); 1018 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1)); 1019 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2)); 1020 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3)); 1021 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4)); 1022 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5)); 1023 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6)); 1024 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7)); 1025 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8)); 1026 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9)); 1027 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10)); 1028 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11)); 1029 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12)); 1030 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13)); 1031 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14)); 1032 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15)); 1033 1034 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()); 1035 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1)); 1036 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2)); 1037 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3)); 1038 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4)); 1039 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5)); 1040 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6)); 1041 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7)); 1042 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8)); 1043 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9)); 1044 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10)); 1045 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11)); 1046 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12)); 1047 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13)); 1048 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14)); 1049 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15)); 1050 1051 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()); 1052 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1)); 1053 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2)); 1054 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3)); 1055 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4)); 1056 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5)); 1057 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6)); 1058 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7)); 1059 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8)); 1060 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9)); 1061 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10)); 1062 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11)); 1063 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12)); 1064 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13)); 1065 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14)); 1066 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15)); 1067 1068 #endif // _LP64 1069 1070 #endif // _WIN64 1071 1072 #ifdef _LP64 1073 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad()); 1074 #else 1075 reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad()); 1076 #endif // _LP64 1077 1078 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1079 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1080 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1081 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1082 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1083 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1084 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1085 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1086 #ifdef _LP64 1087 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1088 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1089 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1090 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1091 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1092 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1093 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1094 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1095 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 1096 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 1097 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 1098 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 1099 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 1100 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 1101 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 1102 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 1103 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 1104 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 1105 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 1106 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 1107 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 1108 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 1109 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 1110 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 1111 #endif 1112 ); 1113 1114 // flags allocation class should be last. 1115 alloc_class chunk2(RFLAGS); 1116 1117 // Singleton class for condition codes 1118 reg_class int_flags(RFLAGS); 1119 1120 // Class for pre evex float registers 1121 reg_class float_reg_legacy(XMM0, 1122 XMM1, 1123 XMM2, 1124 XMM3, 1125 XMM4, 1126 XMM5, 1127 XMM6, 1128 XMM7 1129 #ifdef _LP64 1130 ,XMM8, 1131 XMM9, 1132 XMM10, 1133 XMM11, 1134 XMM12, 1135 XMM13, 1136 XMM14, 1137 XMM15 1138 #endif 1139 ); 1140 1141 // Class for evex float registers 1142 reg_class float_reg_evex(XMM0, 1143 XMM1, 1144 XMM2, 1145 XMM3, 1146 XMM4, 1147 XMM5, 1148 XMM6, 1149 XMM7 1150 #ifdef _LP64 1151 ,XMM8, 1152 XMM9, 1153 XMM10, 1154 XMM11, 1155 XMM12, 1156 XMM13, 1157 XMM14, 1158 XMM15, 1159 XMM16, 1160 XMM17, 1161 XMM18, 1162 XMM19, 1163 XMM20, 1164 XMM21, 1165 XMM22, 1166 XMM23, 1167 XMM24, 1168 XMM25, 1169 XMM26, 1170 XMM27, 1171 XMM28, 1172 XMM29, 1173 XMM30, 1174 XMM31 1175 #endif 1176 ); 1177 1178 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} ); 1179 1180 // Class for pre evex double registers 1181 reg_class double_reg_legacy(XMM0, XMM0b, 1182 XMM1, XMM1b, 1183 XMM2, XMM2b, 1184 XMM3, XMM3b, 1185 XMM4, XMM4b, 1186 XMM5, XMM5b, 1187 XMM6, XMM6b, 1188 XMM7, XMM7b 1189 #ifdef _LP64 1190 ,XMM8, XMM8b, 1191 XMM9, XMM9b, 1192 XMM10, XMM10b, 1193 XMM11, XMM11b, 1194 XMM12, XMM12b, 1195 XMM13, XMM13b, 1196 XMM14, XMM14b, 1197 XMM15, XMM15b 1198 #endif 1199 ); 1200 1201 // Class for evex double registers 1202 reg_class double_reg_evex(XMM0, XMM0b, 1203 XMM1, XMM1b, 1204 XMM2, XMM2b, 1205 XMM3, XMM3b, 1206 XMM4, XMM4b, 1207 XMM5, XMM5b, 1208 XMM6, XMM6b, 1209 XMM7, XMM7b 1210 #ifdef _LP64 1211 ,XMM8, XMM8b, 1212 XMM9, XMM9b, 1213 XMM10, XMM10b, 1214 XMM11, XMM11b, 1215 XMM12, XMM12b, 1216 XMM13, XMM13b, 1217 XMM14, XMM14b, 1218 XMM15, XMM15b, 1219 XMM16, XMM16b, 1220 XMM17, XMM17b, 1221 XMM18, XMM18b, 1222 XMM19, XMM19b, 1223 XMM20, XMM20b, 1224 XMM21, XMM21b, 1225 XMM22, XMM22b, 1226 XMM23, XMM23b, 1227 XMM24, XMM24b, 1228 XMM25, XMM25b, 1229 XMM26, XMM26b, 1230 XMM27, XMM27b, 1231 XMM28, XMM28b, 1232 XMM29, XMM29b, 1233 XMM30, XMM30b, 1234 XMM31, XMM31b 1235 #endif 1236 ); 1237 1238 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} ); 1239 1240 // Class for pre evex 32bit vector registers 1241 reg_class vectors_reg_legacy(XMM0, 1242 XMM1, 1243 XMM2, 1244 XMM3, 1245 XMM4, 1246 XMM5, 1247 XMM6, 1248 XMM7 1249 #ifdef _LP64 1250 ,XMM8, 1251 XMM9, 1252 XMM10, 1253 XMM11, 1254 XMM12, 1255 XMM13, 1256 XMM14, 1257 XMM15 1258 #endif 1259 ); 1260 1261 // Class for evex 32bit vector registers 1262 reg_class vectors_reg_evex(XMM0, 1263 XMM1, 1264 XMM2, 1265 XMM3, 1266 XMM4, 1267 XMM5, 1268 XMM6, 1269 XMM7 1270 #ifdef _LP64 1271 ,XMM8, 1272 XMM9, 1273 XMM10, 1274 XMM11, 1275 XMM12, 1276 XMM13, 1277 XMM14, 1278 XMM15, 1279 XMM16, 1280 XMM17, 1281 XMM18, 1282 XMM19, 1283 XMM20, 1284 XMM21, 1285 XMM22, 1286 XMM23, 1287 XMM24, 1288 XMM25, 1289 XMM26, 1290 XMM27, 1291 XMM28, 1292 XMM29, 1293 XMM30, 1294 XMM31 1295 #endif 1296 ); 1297 1298 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} ); 1299 1300 // Class for all 64bit vector registers 1301 reg_class vectord_reg_legacy(XMM0, XMM0b, 1302 XMM1, XMM1b, 1303 XMM2, XMM2b, 1304 XMM3, XMM3b, 1305 XMM4, XMM4b, 1306 XMM5, XMM5b, 1307 XMM6, XMM6b, 1308 XMM7, XMM7b 1309 #ifdef _LP64 1310 ,XMM8, XMM8b, 1311 XMM9, XMM9b, 1312 XMM10, XMM10b, 1313 XMM11, XMM11b, 1314 XMM12, XMM12b, 1315 XMM13, XMM13b, 1316 XMM14, XMM14b, 1317 XMM15, XMM15b 1318 #endif 1319 ); 1320 1321 // Class for all 64bit vector registers 1322 reg_class vectord_reg_evex(XMM0, XMM0b, 1323 XMM1, XMM1b, 1324 XMM2, XMM2b, 1325 XMM3, XMM3b, 1326 XMM4, XMM4b, 1327 XMM5, XMM5b, 1328 XMM6, XMM6b, 1329 XMM7, XMM7b 1330 #ifdef _LP64 1331 ,XMM8, XMM8b, 1332 XMM9, XMM9b, 1333 XMM10, XMM10b, 1334 XMM11, XMM11b, 1335 XMM12, XMM12b, 1336 XMM13, XMM13b, 1337 XMM14, XMM14b, 1338 XMM15, XMM15b, 1339 XMM16, XMM16b, 1340 XMM17, XMM17b, 1341 XMM18, XMM18b, 1342 XMM19, XMM19b, 1343 XMM20, XMM20b, 1344 XMM21, XMM21b, 1345 XMM22, XMM22b, 1346 XMM23, XMM23b, 1347 XMM24, XMM24b, 1348 XMM25, XMM25b, 1349 XMM26, XMM26b, 1350 XMM27, XMM27b, 1351 XMM28, XMM28b, 1352 XMM29, XMM29b, 1353 XMM30, XMM30b, 1354 XMM31, XMM31b 1355 #endif 1356 ); 1357 1358 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} ); 1359 1360 // Class for all 128bit vector registers 1361 reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, 1362 XMM1, XMM1b, XMM1c, XMM1d, 1363 XMM2, XMM2b, XMM2c, XMM2d, 1364 XMM3, XMM3b, XMM3c, XMM3d, 1365 XMM4, XMM4b, XMM4c, XMM4d, 1366 XMM5, XMM5b, XMM5c, XMM5d, 1367 XMM6, XMM6b, XMM6c, XMM6d, 1368 XMM7, XMM7b, XMM7c, XMM7d 1369 #ifdef _LP64 1370 ,XMM8, XMM8b, XMM8c, XMM8d, 1371 XMM9, XMM9b, XMM9c, XMM9d, 1372 XMM10, XMM10b, XMM10c, XMM10d, 1373 XMM11, XMM11b, XMM11c, XMM11d, 1374 XMM12, XMM12b, XMM12c, XMM12d, 1375 XMM13, XMM13b, XMM13c, XMM13d, 1376 XMM14, XMM14b, XMM14c, XMM14d, 1377 XMM15, XMM15b, XMM15c, XMM15d 1378 #endif 1379 ); 1380 1381 // Class for all 128bit vector registers 1382 reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, 1383 XMM1, XMM1b, XMM1c, XMM1d, 1384 XMM2, XMM2b, XMM2c, XMM2d, 1385 XMM3, XMM3b, XMM3c, XMM3d, 1386 XMM4, XMM4b, XMM4c, XMM4d, 1387 XMM5, XMM5b, XMM5c, XMM5d, 1388 XMM6, XMM6b, XMM6c, XMM6d, 1389 XMM7, XMM7b, XMM7c, XMM7d 1390 #ifdef _LP64 1391 ,XMM8, XMM8b, XMM8c, XMM8d, 1392 XMM9, XMM9b, XMM9c, XMM9d, 1393 XMM10, XMM10b, XMM10c, XMM10d, 1394 XMM11, XMM11b, XMM11c, XMM11d, 1395 XMM12, XMM12b, XMM12c, XMM12d, 1396 XMM13, XMM13b, XMM13c, XMM13d, 1397 XMM14, XMM14b, XMM14c, XMM14d, 1398 XMM15, XMM15b, XMM15c, XMM15d, 1399 XMM16, XMM16b, XMM16c, XMM16d, 1400 XMM17, XMM17b, XMM17c, XMM17d, 1401 XMM18, XMM18b, XMM18c, XMM18d, 1402 XMM19, XMM19b, XMM19c, XMM19d, 1403 XMM20, XMM20b, XMM20c, XMM20d, 1404 XMM21, XMM21b, XMM21c, XMM21d, 1405 XMM22, XMM22b, XMM22c, XMM22d, 1406 XMM23, XMM23b, XMM23c, XMM23d, 1407 XMM24, XMM24b, XMM24c, XMM24d, 1408 XMM25, XMM25b, XMM25c, XMM25d, 1409 XMM26, XMM26b, XMM26c, XMM26d, 1410 XMM27, XMM27b, XMM27c, XMM27d, 1411 XMM28, XMM28b, XMM28c, XMM28d, 1412 XMM29, XMM29b, XMM29c, XMM29d, 1413 XMM30, XMM30b, XMM30c, XMM30d, 1414 XMM31, XMM31b, XMM31c, XMM31d 1415 #endif 1416 ); 1417 1418 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} ); 1419 1420 // Class for all 256bit vector registers 1421 reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 1422 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 1423 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 1424 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 1425 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 1426 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 1427 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 1428 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 1429 #ifdef _LP64 1430 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 1431 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 1432 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 1433 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 1434 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 1435 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1436 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1437 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h 1438 #endif 1439 ); 1440 1441 // Class for all 256bit vector registers 1442 reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 1443 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 1444 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 1445 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 1446 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 1447 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 1448 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 1449 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 1450 #ifdef _LP64 1451 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 1452 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 1453 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 1454 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 1455 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 1456 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1457 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1458 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, 1459 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, 1460 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, 1461 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, 1462 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, 1463 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, 1464 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, 1465 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, 1466 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, 1467 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, 1468 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, 1469 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, 1470 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, 1471 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, 1472 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, 1473 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, 1474 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h 1475 #endif 1476 ); 1477 1478 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} ); 1479 1480 // Class for all 512bit vector registers 1481 reg_class vectorz_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1482 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1483 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1484 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1485 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1486 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1487 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1488 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1489 #ifdef _LP64 1490 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1491 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1492 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1493 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1494 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1495 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1496 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1497 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1498 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 1499 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 1500 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 1501 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 1502 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 1503 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 1504 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 1505 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 1506 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 1507 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 1508 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 1509 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 1510 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 1511 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 1512 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 1513 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 1514 #endif 1515 ); 1516 1517 %} 1518 1519 1520 //----------SOURCE BLOCK------------------------------------------------------- 1521 // This is a block of C++ code which provides values, functions, and 1522 // definitions necessary in the rest of the architecture description 1523 1524 source_hpp %{ 1525 // Header information of the source block. 1526 // Method declarations/definitions which are used outside 1527 // the ad-scope can conveniently be defined here. 1528 // 1529 // To keep related declarations/definitions/uses close together, 1530 // we switch between source %{ }% and source_hpp %{ }% freely as needed. 1531 1532 class NativeJump; 1533 1534 class CallStubImpl { 1535 1536 //-------------------------------------------------------------- 1537 //---< Used for optimization in Compile::shorten_branches >--- 1538 //-------------------------------------------------------------- 1539 1540 public: 1541 // Size of call trampoline stub. 1542 static uint size_call_trampoline() { 1543 return 0; // no call trampolines on this platform 1544 } 1545 1546 // number of relocations needed by a call trampoline stub 1547 static uint reloc_call_trampoline() { 1548 return 0; // no call trampolines on this platform 1549 } 1550 }; 1551 1552 class HandlerImpl { 1553 1554 public: 1555 1556 static int emit_exception_handler(CodeBuffer &cbuf); 1557 static int emit_deopt_handler(CodeBuffer& cbuf); 1558 1559 static uint size_exception_handler() { 1560 // NativeCall instruction size is the same as NativeJump. 1561 // exception handler starts out as jump and can be patched to 1562 // a call be deoptimization. (4932387) 1563 // Note that this value is also credited (in output.cpp) to 1564 // the size of the code section. 1565 return NativeJump::instruction_size; 1566 } 1567 1568 #ifdef _LP64 1569 static uint size_deopt_handler() { 1570 // three 5 byte instructions 1571 return 15; 1572 } 1573 #else 1574 static uint size_deopt_handler() { 1575 // NativeCall instruction size is the same as NativeJump. 1576 // exception handler starts out as jump and can be patched to 1577 // a call be deoptimization. (4932387) 1578 // Note that this value is also credited (in output.cpp) to 1579 // the size of the code section. 1580 return 5 + NativeJump::instruction_size; // pushl(); jmp; 1581 } 1582 #endif 1583 }; 1584 1585 %} // end source_hpp 1586 1587 source %{ 1588 1589 // Emit exception handler code. 1590 // Stuff framesize into a register and call a VM stub routine. 1591 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) { 1592 1593 // Note that the code buffer's insts_mark is always relative to insts. 1594 // That's why we must use the macroassembler to generate a handler. 1595 MacroAssembler _masm(&cbuf); 1596 address base = __ start_a_stub(size_exception_handler()); 1597 if (base == NULL) return 0; // CodeBuffer::expand failed 1598 int offset = __ offset(); 1599 __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point())); 1600 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); 1601 __ end_a_stub(); 1602 return offset; 1603 } 1604 1605 // Emit deopt handler code. 1606 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) { 1607 1608 // Note that the code buffer's insts_mark is always relative to insts. 1609 // That's why we must use the macroassembler to generate a handler. 1610 MacroAssembler _masm(&cbuf); 1611 address base = __ start_a_stub(size_deopt_handler()); 1612 if (base == NULL) return 0; // CodeBuffer::expand failed 1613 int offset = __ offset(); 1614 1615 #ifdef _LP64 1616 address the_pc = (address) __ pc(); 1617 Label next; 1618 // push a "the_pc" on the stack without destroying any registers 1619 // as they all may be live. 1620 1621 // push address of "next" 1622 __ call(next, relocInfo::none); // reloc none is fine since it is a disp32 1623 __ bind(next); 1624 // adjust it so it matches "the_pc" 1625 __ subptr(Address(rsp, 0), __ offset() - offset); 1626 #else 1627 InternalAddress here(__ pc()); 1628 __ pushptr(here.addr()); 1629 #endif 1630 1631 __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack())); 1632 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow"); 1633 __ end_a_stub(); 1634 return offset; 1635 } 1636 1637 1638 //============================================================================= 1639 1640 // Float masks come from different places depending on platform. 1641 #ifdef _LP64 1642 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); } 1643 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); } 1644 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); } 1645 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); } 1646 #else 1647 static address float_signmask() { return (address)float_signmask_pool; } 1648 static address float_signflip() { return (address)float_signflip_pool; } 1649 static address double_signmask() { return (address)double_signmask_pool; } 1650 static address double_signflip() { return (address)double_signflip_pool; } 1651 #endif 1652 1653 1654 const bool Matcher::match_rule_supported(int opcode) { 1655 if (!has_match_rule(opcode)) 1656 return false; 1657 1658 switch (opcode) { 1659 case Op_PopCountI: 1660 case Op_PopCountL: 1661 if (!UsePopCountInstruction) 1662 return false; 1663 break; 1664 case Op_MulVI: 1665 if ((UseSSE < 4) && (UseAVX < 1)) // only with SSE4_1 or AVX 1666 return false; 1667 break; 1668 case Op_MulVL: 1669 case Op_MulReductionVL: 1670 if (VM_Version::supports_avx512dq() == false) 1671 return false; 1672 case Op_AddReductionVL: 1673 if (UseAVX < 3) // only EVEX : vector connectivity becomes an issue here 1674 return false; 1675 case Op_AddReductionVI: 1676 if (UseSSE < 3) // requires at least SSE3 1677 return false; 1678 case Op_MulReductionVI: 1679 if (UseSSE < 4) // requires at least SSE4 1680 return false; 1681 case Op_AddReductionVF: 1682 case Op_AddReductionVD: 1683 case Op_MulReductionVF: 1684 case Op_MulReductionVD: 1685 if (UseSSE < 1) // requires at least SSE 1686 return false; 1687 break; 1688 case Op_CompareAndSwapL: 1689 #ifdef _LP64 1690 case Op_CompareAndSwapP: 1691 #endif 1692 if (!VM_Version::supports_cx8()) 1693 return false; 1694 break; 1695 } 1696 1697 return true; // Per default match rules are supported. 1698 } 1699 1700 // Max vector size in bytes. 0 if not supported. 1701 const int Matcher::vector_width_in_bytes(BasicType bt) { 1702 assert(is_java_primitive(bt), "only primitive type vectors"); 1703 if (UseSSE < 2) return 0; 1704 // SSE2 supports 128bit vectors for all types. 1705 // AVX2 supports 256bit vectors for all types. 1706 // AVX2/EVEX supports 512bit vectors for all types. 1707 int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16; 1708 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE. 1709 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE)) 1710 size = (UseAVX > 2) ? 64 : 32; 1711 // Use flag to limit vector size. 1712 size = MIN2(size,(int)MaxVectorSize); 1713 // Minimum 2 values in vector (or 4 for bytes). 1714 switch (bt) { 1715 case T_DOUBLE: 1716 case T_LONG: 1717 if (size < 16) return 0; 1718 case T_FLOAT: 1719 case T_INT: 1720 if (size < 8) return 0; 1721 case T_BOOLEAN: 1722 case T_BYTE: 1723 case T_CHAR: 1724 case T_SHORT: 1725 if (size < 4) return 0; 1726 break; 1727 default: 1728 ShouldNotReachHere(); 1729 } 1730 return size; 1731 } 1732 1733 // Limits on vector size (number of elements) loaded into vector. 1734 const int Matcher::max_vector_size(const BasicType bt) { 1735 return vector_width_in_bytes(bt)/type2aelembytes(bt); 1736 } 1737 const int Matcher::min_vector_size(const BasicType bt) { 1738 int max_size = max_vector_size(bt); 1739 // Min size which can be loaded into vector is 4 bytes. 1740 int size = (type2aelembytes(bt) == 1) ? 4 : 2; 1741 return MIN2(size,max_size); 1742 } 1743 1744 // Vector ideal reg corresponding to specidied size in bytes 1745 const int Matcher::vector_ideal_reg(int size) { 1746 assert(MaxVectorSize >= size, ""); 1747 switch(size) { 1748 case 4: return Op_VecS; 1749 case 8: return Op_VecD; 1750 case 16: return Op_VecX; 1751 case 32: return Op_VecY; 1752 case 64: return Op_VecZ; 1753 } 1754 ShouldNotReachHere(); 1755 return 0; 1756 } 1757 1758 // Only lowest bits of xmm reg are used for vector shift count. 1759 const int Matcher::vector_shift_count_ideal_reg(int size) { 1760 return Op_VecS; 1761 } 1762 1763 // x86 supports misaligned vectors store/load. 1764 const bool Matcher::misaligned_vectors_ok() { 1765 return !AlignVector; // can be changed by flag 1766 } 1767 1768 // x86 AES instructions are compatible with SunJCE expanded 1769 // keys, hence we do not need to pass the original key to stubs 1770 const bool Matcher::pass_original_key_for_aes() { 1771 return false; 1772 } 1773 1774 // Helper methods for MachSpillCopyNode::implementation(). 1775 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 1776 int src_hi, int dst_hi, uint ireg, outputStream* st) { 1777 // In 64-bit VM size calculation is very complex. Emitting instructions 1778 // into scratch buffer is used to get size in 64-bit VM. 1779 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 1780 assert(ireg == Op_VecS || // 32bit vector 1781 (src_lo & 1) == 0 && (src_lo + 1) == src_hi && 1782 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi, 1783 "no non-adjacent vector moves" ); 1784 if (cbuf) { 1785 MacroAssembler _masm(cbuf); 1786 int offset = __ offset(); 1787 switch (ireg) { 1788 case Op_VecS: // copy whole register 1789 case Op_VecD: 1790 case Op_VecX: 1791 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1792 break; 1793 case Op_VecY: 1794 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1795 break; 1796 case Op_VecZ: 1797 __ evmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2); 1798 break; 1799 default: 1800 ShouldNotReachHere(); 1801 } 1802 int size = __ offset() - offset; 1803 #ifdef ASSERT 1804 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1805 assert(!do_size || size == 4, "incorrect size calculattion"); 1806 #endif 1807 return size; 1808 #ifndef PRODUCT 1809 } else if (!do_size) { 1810 switch (ireg) { 1811 case Op_VecS: 1812 case Op_VecD: 1813 case Op_VecX: 1814 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 1815 break; 1816 case Op_VecY: 1817 case Op_VecZ: 1818 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 1819 break; 1820 default: 1821 ShouldNotReachHere(); 1822 } 1823 #endif 1824 } 1825 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 1826 return (UseAVX > 2) ? 6 : 4; 1827 } 1828 1829 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, 1830 int stack_offset, int reg, uint ireg, outputStream* st) { 1831 // In 64-bit VM size calculation is very complex. Emitting instructions 1832 // into scratch buffer is used to get size in 64-bit VM. 1833 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 1834 if (cbuf) { 1835 MacroAssembler _masm(cbuf); 1836 int offset = __ offset(); 1837 if (is_load) { 1838 switch (ireg) { 1839 case Op_VecS: 1840 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1841 break; 1842 case Op_VecD: 1843 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1844 break; 1845 case Op_VecX: 1846 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1847 break; 1848 case Op_VecY: 1849 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1850 break; 1851 case Op_VecZ: 1852 __ evmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2); 1853 break; 1854 default: 1855 ShouldNotReachHere(); 1856 } 1857 } else { // store 1858 switch (ireg) { 1859 case Op_VecS: 1860 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1861 break; 1862 case Op_VecD: 1863 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1864 break; 1865 case Op_VecX: 1866 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1867 break; 1868 case Op_VecY: 1869 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1870 break; 1871 case Op_VecZ: 1872 __ evmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2); 1873 break; 1874 default: 1875 ShouldNotReachHere(); 1876 } 1877 } 1878 int size = __ offset() - offset; 1879 #ifdef ASSERT 1880 int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : (UseAVX > 2) ? 6 : 4); 1881 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1882 assert(!do_size || size == (5+offset_size), "incorrect size calculattion"); 1883 #endif 1884 return size; 1885 #ifndef PRODUCT 1886 } else if (!do_size) { 1887 if (is_load) { 1888 switch (ireg) { 1889 case Op_VecS: 1890 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1891 break; 1892 case Op_VecD: 1893 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1894 break; 1895 case Op_VecX: 1896 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1897 break; 1898 case Op_VecY: 1899 case Op_VecZ: 1900 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1901 break; 1902 default: 1903 ShouldNotReachHere(); 1904 } 1905 } else { // store 1906 switch (ireg) { 1907 case Op_VecS: 1908 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1909 break; 1910 case Op_VecD: 1911 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1912 break; 1913 case Op_VecX: 1914 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1915 break; 1916 case Op_VecY: 1917 case Op_VecZ: 1918 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1919 break; 1920 default: 1921 ShouldNotReachHere(); 1922 } 1923 } 1924 #endif 1925 } 1926 int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : (UseAVX > 2) ? 6 : 4); 1927 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1928 return 5+offset_size; 1929 } 1930 1931 static inline jfloat replicate4_imm(int con, int width) { 1932 // Load a constant of "width" (in bytes) and replicate it to fill 32bit. 1933 assert(width == 1 || width == 2, "only byte or short types here"); 1934 int bit_width = width * 8; 1935 jint val = con; 1936 val &= (1 << bit_width) - 1; // mask off sign bits 1937 while(bit_width < 32) { 1938 val |= (val << bit_width); 1939 bit_width <<= 1; 1940 } 1941 jfloat fval = *((jfloat*) &val); // coerce to float type 1942 return fval; 1943 } 1944 1945 static inline jdouble replicate8_imm(int con, int width) { 1946 // Load a constant of "width" (in bytes) and replicate it to fill 64bit. 1947 assert(width == 1 || width == 2 || width == 4, "only byte, short or int types here"); 1948 int bit_width = width * 8; 1949 jlong val = con; 1950 val &= (((jlong) 1) << bit_width) - 1; // mask off sign bits 1951 while(bit_width < 64) { 1952 val |= (val << bit_width); 1953 bit_width <<= 1; 1954 } 1955 jdouble dval = *((jdouble*) &val); // coerce to double type 1956 return dval; 1957 } 1958 1959 #ifndef PRODUCT 1960 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const { 1961 st->print("nop \t# %d bytes pad for loops and calls", _count); 1962 } 1963 #endif 1964 1965 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const { 1966 MacroAssembler _masm(&cbuf); 1967 __ nop(_count); 1968 } 1969 1970 uint MachNopNode::size(PhaseRegAlloc*) const { 1971 return _count; 1972 } 1973 1974 #ifndef PRODUCT 1975 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const { 1976 st->print("# breakpoint"); 1977 } 1978 #endif 1979 1980 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const { 1981 MacroAssembler _masm(&cbuf); 1982 __ int3(); 1983 } 1984 1985 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const { 1986 return MachNode::size(ra_); 1987 } 1988 1989 %} 1990 1991 encode %{ 1992 1993 enc_class call_epilog %{ 1994 if (VerifyStackAtCalls) { 1995 // Check that stack depth is unchanged: find majik cookie on stack 1996 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word)); 1997 MacroAssembler _masm(&cbuf); 1998 Label L; 1999 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d); 2000 __ jccb(Assembler::equal, L); 2001 // Die if stack mismatch 2002 __ int3(); 2003 __ bind(L); 2004 } 2005 %} 2006 2007 %} 2008 2009 2010 //----------OPERANDS----------------------------------------------------------- 2011 // Operand definitions must precede instruction definitions for correct parsing 2012 // in the ADLC because operands constitute user defined types which are used in 2013 // instruction definitions. 2014 2015 // This one generically applies only for evex, so only one version 2016 operand vecZ() %{ 2017 constraint(ALLOC_IN_RC(vectorz_reg)); 2018 match(VecZ); 2019 2020 format %{ %} 2021 interface(REG_INTER); 2022 %} 2023 2024 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit) 2025 2026 // ============================================================================ 2027 2028 instruct ShouldNotReachHere() %{ 2029 match(Halt); 2030 format %{ "int3\t# ShouldNotReachHere" %} 2031 ins_encode %{ 2032 __ int3(); 2033 %} 2034 ins_pipe(pipe_slow); 2035 %} 2036 2037 // ============================================================================ 2038 2039 instruct addF_reg(regF dst, regF src) %{ 2040 predicate((UseSSE>=1) && (UseAVX == 0)); 2041 match(Set dst (AddF dst src)); 2042 2043 format %{ "addss $dst, $src" %} 2044 ins_cost(150); 2045 ins_encode %{ 2046 __ addss($dst$$XMMRegister, $src$$XMMRegister); 2047 %} 2048 ins_pipe(pipe_slow); 2049 %} 2050 2051 instruct addF_mem(regF dst, memory src) %{ 2052 predicate((UseSSE>=1) && (UseAVX == 0)); 2053 match(Set dst (AddF dst (LoadF src))); 2054 2055 format %{ "addss $dst, $src" %} 2056 ins_cost(150); 2057 ins_encode %{ 2058 __ addss($dst$$XMMRegister, $src$$Address); 2059 %} 2060 ins_pipe(pipe_slow); 2061 %} 2062 2063 instruct addF_imm(regF dst, immF con) %{ 2064 predicate((UseSSE>=1) && (UseAVX == 0)); 2065 match(Set dst (AddF dst con)); 2066 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2067 ins_cost(150); 2068 ins_encode %{ 2069 __ addss($dst$$XMMRegister, $constantaddress($con)); 2070 %} 2071 ins_pipe(pipe_slow); 2072 %} 2073 2074 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{ 2075 predicate(UseAVX > 0); 2076 match(Set dst (AddF src1 src2)); 2077 2078 format %{ "vaddss $dst, $src1, $src2" %} 2079 ins_cost(150); 2080 ins_encode %{ 2081 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2082 %} 2083 ins_pipe(pipe_slow); 2084 %} 2085 2086 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{ 2087 predicate(UseAVX > 0); 2088 match(Set dst (AddF src1 (LoadF src2))); 2089 2090 format %{ "vaddss $dst, $src1, $src2" %} 2091 ins_cost(150); 2092 ins_encode %{ 2093 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2094 %} 2095 ins_pipe(pipe_slow); 2096 %} 2097 2098 instruct addF_reg_imm(regF dst, regF src, immF con) %{ 2099 predicate(UseAVX > 0); 2100 match(Set dst (AddF src con)); 2101 2102 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2103 ins_cost(150); 2104 ins_encode %{ 2105 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2106 %} 2107 ins_pipe(pipe_slow); 2108 %} 2109 2110 instruct addD_reg(regD dst, regD src) %{ 2111 predicate((UseSSE>=2) && (UseAVX == 0)); 2112 match(Set dst (AddD dst src)); 2113 2114 format %{ "addsd $dst, $src" %} 2115 ins_cost(150); 2116 ins_encode %{ 2117 __ addsd($dst$$XMMRegister, $src$$XMMRegister); 2118 %} 2119 ins_pipe(pipe_slow); 2120 %} 2121 2122 instruct addD_mem(regD dst, memory src) %{ 2123 predicate((UseSSE>=2) && (UseAVX == 0)); 2124 match(Set dst (AddD dst (LoadD src))); 2125 2126 format %{ "addsd $dst, $src" %} 2127 ins_cost(150); 2128 ins_encode %{ 2129 __ addsd($dst$$XMMRegister, $src$$Address); 2130 %} 2131 ins_pipe(pipe_slow); 2132 %} 2133 2134 instruct addD_imm(regD dst, immD con) %{ 2135 predicate((UseSSE>=2) && (UseAVX == 0)); 2136 match(Set dst (AddD dst con)); 2137 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2138 ins_cost(150); 2139 ins_encode %{ 2140 __ addsd($dst$$XMMRegister, $constantaddress($con)); 2141 %} 2142 ins_pipe(pipe_slow); 2143 %} 2144 2145 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{ 2146 predicate(UseAVX > 0); 2147 match(Set dst (AddD src1 src2)); 2148 2149 format %{ "vaddsd $dst, $src1, $src2" %} 2150 ins_cost(150); 2151 ins_encode %{ 2152 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2153 %} 2154 ins_pipe(pipe_slow); 2155 %} 2156 2157 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{ 2158 predicate(UseAVX > 0); 2159 match(Set dst (AddD src1 (LoadD src2))); 2160 2161 format %{ "vaddsd $dst, $src1, $src2" %} 2162 ins_cost(150); 2163 ins_encode %{ 2164 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2165 %} 2166 ins_pipe(pipe_slow); 2167 %} 2168 2169 instruct addD_reg_imm(regD dst, regD src, immD con) %{ 2170 predicate(UseAVX > 0); 2171 match(Set dst (AddD src con)); 2172 2173 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2174 ins_cost(150); 2175 ins_encode %{ 2176 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2177 %} 2178 ins_pipe(pipe_slow); 2179 %} 2180 2181 instruct subF_reg(regF dst, regF src) %{ 2182 predicate((UseSSE>=1) && (UseAVX == 0)); 2183 match(Set dst (SubF dst src)); 2184 2185 format %{ "subss $dst, $src" %} 2186 ins_cost(150); 2187 ins_encode %{ 2188 __ subss($dst$$XMMRegister, $src$$XMMRegister); 2189 %} 2190 ins_pipe(pipe_slow); 2191 %} 2192 2193 instruct subF_mem(regF dst, memory src) %{ 2194 predicate((UseSSE>=1) && (UseAVX == 0)); 2195 match(Set dst (SubF dst (LoadF src))); 2196 2197 format %{ "subss $dst, $src" %} 2198 ins_cost(150); 2199 ins_encode %{ 2200 __ subss($dst$$XMMRegister, $src$$Address); 2201 %} 2202 ins_pipe(pipe_slow); 2203 %} 2204 2205 instruct subF_imm(regF dst, immF con) %{ 2206 predicate((UseSSE>=1) && (UseAVX == 0)); 2207 match(Set dst (SubF dst con)); 2208 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2209 ins_cost(150); 2210 ins_encode %{ 2211 __ subss($dst$$XMMRegister, $constantaddress($con)); 2212 %} 2213 ins_pipe(pipe_slow); 2214 %} 2215 2216 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{ 2217 predicate(UseAVX > 0); 2218 match(Set dst (SubF src1 src2)); 2219 2220 format %{ "vsubss $dst, $src1, $src2" %} 2221 ins_cost(150); 2222 ins_encode %{ 2223 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2224 %} 2225 ins_pipe(pipe_slow); 2226 %} 2227 2228 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{ 2229 predicate(UseAVX > 0); 2230 match(Set dst (SubF src1 (LoadF src2))); 2231 2232 format %{ "vsubss $dst, $src1, $src2" %} 2233 ins_cost(150); 2234 ins_encode %{ 2235 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2236 %} 2237 ins_pipe(pipe_slow); 2238 %} 2239 2240 instruct subF_reg_imm(regF dst, regF src, immF con) %{ 2241 predicate(UseAVX > 0); 2242 match(Set dst (SubF src con)); 2243 2244 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2245 ins_cost(150); 2246 ins_encode %{ 2247 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2248 %} 2249 ins_pipe(pipe_slow); 2250 %} 2251 2252 instruct subD_reg(regD dst, regD src) %{ 2253 predicate((UseSSE>=2) && (UseAVX == 0)); 2254 match(Set dst (SubD dst src)); 2255 2256 format %{ "subsd $dst, $src" %} 2257 ins_cost(150); 2258 ins_encode %{ 2259 __ subsd($dst$$XMMRegister, $src$$XMMRegister); 2260 %} 2261 ins_pipe(pipe_slow); 2262 %} 2263 2264 instruct subD_mem(regD dst, memory src) %{ 2265 predicate((UseSSE>=2) && (UseAVX == 0)); 2266 match(Set dst (SubD dst (LoadD src))); 2267 2268 format %{ "subsd $dst, $src" %} 2269 ins_cost(150); 2270 ins_encode %{ 2271 __ subsd($dst$$XMMRegister, $src$$Address); 2272 %} 2273 ins_pipe(pipe_slow); 2274 %} 2275 2276 instruct subD_imm(regD dst, immD con) %{ 2277 predicate((UseSSE>=2) && (UseAVX == 0)); 2278 match(Set dst (SubD dst con)); 2279 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2280 ins_cost(150); 2281 ins_encode %{ 2282 __ subsd($dst$$XMMRegister, $constantaddress($con)); 2283 %} 2284 ins_pipe(pipe_slow); 2285 %} 2286 2287 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{ 2288 predicate(UseAVX > 0); 2289 match(Set dst (SubD src1 src2)); 2290 2291 format %{ "vsubsd $dst, $src1, $src2" %} 2292 ins_cost(150); 2293 ins_encode %{ 2294 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2295 %} 2296 ins_pipe(pipe_slow); 2297 %} 2298 2299 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{ 2300 predicate(UseAVX > 0); 2301 match(Set dst (SubD src1 (LoadD src2))); 2302 2303 format %{ "vsubsd $dst, $src1, $src2" %} 2304 ins_cost(150); 2305 ins_encode %{ 2306 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2307 %} 2308 ins_pipe(pipe_slow); 2309 %} 2310 2311 instruct subD_reg_imm(regD dst, regD src, immD con) %{ 2312 predicate(UseAVX > 0); 2313 match(Set dst (SubD src con)); 2314 2315 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2316 ins_cost(150); 2317 ins_encode %{ 2318 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2319 %} 2320 ins_pipe(pipe_slow); 2321 %} 2322 2323 instruct mulF_reg(regF dst, regF src) %{ 2324 predicate((UseSSE>=1) && (UseAVX == 0)); 2325 match(Set dst (MulF dst src)); 2326 2327 format %{ "mulss $dst, $src" %} 2328 ins_cost(150); 2329 ins_encode %{ 2330 __ mulss($dst$$XMMRegister, $src$$XMMRegister); 2331 %} 2332 ins_pipe(pipe_slow); 2333 %} 2334 2335 instruct mulF_mem(regF dst, memory src) %{ 2336 predicate((UseSSE>=1) && (UseAVX == 0)); 2337 match(Set dst (MulF dst (LoadF src))); 2338 2339 format %{ "mulss $dst, $src" %} 2340 ins_cost(150); 2341 ins_encode %{ 2342 __ mulss($dst$$XMMRegister, $src$$Address); 2343 %} 2344 ins_pipe(pipe_slow); 2345 %} 2346 2347 instruct mulF_imm(regF dst, immF con) %{ 2348 predicate((UseSSE>=1) && (UseAVX == 0)); 2349 match(Set dst (MulF dst con)); 2350 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2351 ins_cost(150); 2352 ins_encode %{ 2353 __ mulss($dst$$XMMRegister, $constantaddress($con)); 2354 %} 2355 ins_pipe(pipe_slow); 2356 %} 2357 2358 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{ 2359 predicate(UseAVX > 0); 2360 match(Set dst (MulF src1 src2)); 2361 2362 format %{ "vmulss $dst, $src1, $src2" %} 2363 ins_cost(150); 2364 ins_encode %{ 2365 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2366 %} 2367 ins_pipe(pipe_slow); 2368 %} 2369 2370 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{ 2371 predicate(UseAVX > 0); 2372 match(Set dst (MulF src1 (LoadF src2))); 2373 2374 format %{ "vmulss $dst, $src1, $src2" %} 2375 ins_cost(150); 2376 ins_encode %{ 2377 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2378 %} 2379 ins_pipe(pipe_slow); 2380 %} 2381 2382 instruct mulF_reg_imm(regF dst, regF src, immF con) %{ 2383 predicate(UseAVX > 0); 2384 match(Set dst (MulF src con)); 2385 2386 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2387 ins_cost(150); 2388 ins_encode %{ 2389 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2390 %} 2391 ins_pipe(pipe_slow); 2392 %} 2393 2394 instruct mulD_reg(regD dst, regD src) %{ 2395 predicate((UseSSE>=2) && (UseAVX == 0)); 2396 match(Set dst (MulD dst src)); 2397 2398 format %{ "mulsd $dst, $src" %} 2399 ins_cost(150); 2400 ins_encode %{ 2401 __ mulsd($dst$$XMMRegister, $src$$XMMRegister); 2402 %} 2403 ins_pipe(pipe_slow); 2404 %} 2405 2406 instruct mulD_mem(regD dst, memory src) %{ 2407 predicate((UseSSE>=2) && (UseAVX == 0)); 2408 match(Set dst (MulD dst (LoadD src))); 2409 2410 format %{ "mulsd $dst, $src" %} 2411 ins_cost(150); 2412 ins_encode %{ 2413 __ mulsd($dst$$XMMRegister, $src$$Address); 2414 %} 2415 ins_pipe(pipe_slow); 2416 %} 2417 2418 instruct mulD_imm(regD dst, immD con) %{ 2419 predicate((UseSSE>=2) && (UseAVX == 0)); 2420 match(Set dst (MulD dst con)); 2421 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2422 ins_cost(150); 2423 ins_encode %{ 2424 __ mulsd($dst$$XMMRegister, $constantaddress($con)); 2425 %} 2426 ins_pipe(pipe_slow); 2427 %} 2428 2429 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{ 2430 predicate(UseAVX > 0); 2431 match(Set dst (MulD src1 src2)); 2432 2433 format %{ "vmulsd $dst, $src1, $src2" %} 2434 ins_cost(150); 2435 ins_encode %{ 2436 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2437 %} 2438 ins_pipe(pipe_slow); 2439 %} 2440 2441 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{ 2442 predicate(UseAVX > 0); 2443 match(Set dst (MulD src1 (LoadD src2))); 2444 2445 format %{ "vmulsd $dst, $src1, $src2" %} 2446 ins_cost(150); 2447 ins_encode %{ 2448 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2449 %} 2450 ins_pipe(pipe_slow); 2451 %} 2452 2453 instruct mulD_reg_imm(regD dst, regD src, immD con) %{ 2454 predicate(UseAVX > 0); 2455 match(Set dst (MulD src con)); 2456 2457 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2458 ins_cost(150); 2459 ins_encode %{ 2460 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2461 %} 2462 ins_pipe(pipe_slow); 2463 %} 2464 2465 instruct divF_reg(regF dst, regF src) %{ 2466 predicate((UseSSE>=1) && (UseAVX == 0)); 2467 match(Set dst (DivF dst src)); 2468 2469 format %{ "divss $dst, $src" %} 2470 ins_cost(150); 2471 ins_encode %{ 2472 __ divss($dst$$XMMRegister, $src$$XMMRegister); 2473 %} 2474 ins_pipe(pipe_slow); 2475 %} 2476 2477 instruct divF_mem(regF dst, memory src) %{ 2478 predicate((UseSSE>=1) && (UseAVX == 0)); 2479 match(Set dst (DivF dst (LoadF src))); 2480 2481 format %{ "divss $dst, $src" %} 2482 ins_cost(150); 2483 ins_encode %{ 2484 __ divss($dst$$XMMRegister, $src$$Address); 2485 %} 2486 ins_pipe(pipe_slow); 2487 %} 2488 2489 instruct divF_imm(regF dst, immF con) %{ 2490 predicate((UseSSE>=1) && (UseAVX == 0)); 2491 match(Set dst (DivF dst con)); 2492 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2493 ins_cost(150); 2494 ins_encode %{ 2495 __ divss($dst$$XMMRegister, $constantaddress($con)); 2496 %} 2497 ins_pipe(pipe_slow); 2498 %} 2499 2500 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{ 2501 predicate(UseAVX > 0); 2502 match(Set dst (DivF src1 src2)); 2503 2504 format %{ "vdivss $dst, $src1, $src2" %} 2505 ins_cost(150); 2506 ins_encode %{ 2507 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2508 %} 2509 ins_pipe(pipe_slow); 2510 %} 2511 2512 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{ 2513 predicate(UseAVX > 0); 2514 match(Set dst (DivF src1 (LoadF src2))); 2515 2516 format %{ "vdivss $dst, $src1, $src2" %} 2517 ins_cost(150); 2518 ins_encode %{ 2519 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2520 %} 2521 ins_pipe(pipe_slow); 2522 %} 2523 2524 instruct divF_reg_imm(regF dst, regF src, immF con) %{ 2525 predicate(UseAVX > 0); 2526 match(Set dst (DivF src con)); 2527 2528 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2529 ins_cost(150); 2530 ins_encode %{ 2531 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2532 %} 2533 ins_pipe(pipe_slow); 2534 %} 2535 2536 instruct divD_reg(regD dst, regD src) %{ 2537 predicate((UseSSE>=2) && (UseAVX == 0)); 2538 match(Set dst (DivD dst src)); 2539 2540 format %{ "divsd $dst, $src" %} 2541 ins_cost(150); 2542 ins_encode %{ 2543 __ divsd($dst$$XMMRegister, $src$$XMMRegister); 2544 %} 2545 ins_pipe(pipe_slow); 2546 %} 2547 2548 instruct divD_mem(regD dst, memory src) %{ 2549 predicate((UseSSE>=2) && (UseAVX == 0)); 2550 match(Set dst (DivD dst (LoadD src))); 2551 2552 format %{ "divsd $dst, $src" %} 2553 ins_cost(150); 2554 ins_encode %{ 2555 __ divsd($dst$$XMMRegister, $src$$Address); 2556 %} 2557 ins_pipe(pipe_slow); 2558 %} 2559 2560 instruct divD_imm(regD dst, immD con) %{ 2561 predicate((UseSSE>=2) && (UseAVX == 0)); 2562 match(Set dst (DivD dst con)); 2563 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2564 ins_cost(150); 2565 ins_encode %{ 2566 __ divsd($dst$$XMMRegister, $constantaddress($con)); 2567 %} 2568 ins_pipe(pipe_slow); 2569 %} 2570 2571 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{ 2572 predicate(UseAVX > 0); 2573 match(Set dst (DivD src1 src2)); 2574 2575 format %{ "vdivsd $dst, $src1, $src2" %} 2576 ins_cost(150); 2577 ins_encode %{ 2578 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2579 %} 2580 ins_pipe(pipe_slow); 2581 %} 2582 2583 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{ 2584 predicate(UseAVX > 0); 2585 match(Set dst (DivD src1 (LoadD src2))); 2586 2587 format %{ "vdivsd $dst, $src1, $src2" %} 2588 ins_cost(150); 2589 ins_encode %{ 2590 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2591 %} 2592 ins_pipe(pipe_slow); 2593 %} 2594 2595 instruct divD_reg_imm(regD dst, regD src, immD con) %{ 2596 predicate(UseAVX > 0); 2597 match(Set dst (DivD src con)); 2598 2599 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2600 ins_cost(150); 2601 ins_encode %{ 2602 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2603 %} 2604 ins_pipe(pipe_slow); 2605 %} 2606 2607 instruct absF_reg(regF dst) %{ 2608 predicate((UseSSE>=1) && (UseAVX == 0)); 2609 match(Set dst (AbsF dst)); 2610 ins_cost(150); 2611 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %} 2612 ins_encode %{ 2613 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask())); 2614 %} 2615 ins_pipe(pipe_slow); 2616 %} 2617 2618 instruct absF_reg_reg(regF dst, regF src) %{ 2619 predicate(UseAVX > 0); 2620 match(Set dst (AbsF src)); 2621 ins_cost(150); 2622 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 2623 ins_encode %{ 2624 int vector_len = 0; 2625 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 2626 ExternalAddress(float_signmask()), vector_len); 2627 %} 2628 ins_pipe(pipe_slow); 2629 %} 2630 2631 instruct absD_reg(regD dst) %{ 2632 predicate((UseSSE>=2) && (UseAVX == 0)); 2633 match(Set dst (AbsD dst)); 2634 ins_cost(150); 2635 format %{ "andpd $dst, [0x7fffffffffffffff]\t" 2636 "# abs double by sign masking" %} 2637 ins_encode %{ 2638 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask())); 2639 %} 2640 ins_pipe(pipe_slow); 2641 %} 2642 2643 instruct absD_reg_reg(regD dst, regD src) %{ 2644 predicate(UseAVX > 0); 2645 match(Set dst (AbsD src)); 2646 ins_cost(150); 2647 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 2648 "# abs double by sign masking" %} 2649 ins_encode %{ 2650 int vector_len = 0; 2651 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 2652 ExternalAddress(double_signmask()), vector_len); 2653 %} 2654 ins_pipe(pipe_slow); 2655 %} 2656 2657 instruct negF_reg(regF dst) %{ 2658 predicate((UseSSE>=1) && (UseAVX == 0)); 2659 match(Set dst (NegF dst)); 2660 ins_cost(150); 2661 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %} 2662 ins_encode %{ 2663 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip())); 2664 %} 2665 ins_pipe(pipe_slow); 2666 %} 2667 2668 instruct negF_reg_reg(regF dst, regF src) %{ 2669 predicate(UseAVX > 0); 2670 match(Set dst (NegF src)); 2671 ins_cost(150); 2672 format %{ "vxorps $dst, $src, [0x80000000]\t# neg float by sign flipping" %} 2673 ins_encode %{ 2674 int vector_len = 0; 2675 __ vxorps($dst$$XMMRegister, $src$$XMMRegister, 2676 ExternalAddress(float_signflip()), vector_len); 2677 %} 2678 ins_pipe(pipe_slow); 2679 %} 2680 2681 instruct negD_reg(regD dst) %{ 2682 predicate((UseSSE>=2) && (UseAVX == 0)); 2683 match(Set dst (NegD dst)); 2684 ins_cost(150); 2685 format %{ "xorpd $dst, [0x8000000000000000]\t" 2686 "# neg double by sign flipping" %} 2687 ins_encode %{ 2688 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip())); 2689 %} 2690 ins_pipe(pipe_slow); 2691 %} 2692 2693 instruct negD_reg_reg(regD dst, regD src) %{ 2694 predicate(UseAVX > 0); 2695 match(Set dst (NegD src)); 2696 ins_cost(150); 2697 format %{ "vxorpd $dst, $src, [0x8000000000000000]\t" 2698 "# neg double by sign flipping" %} 2699 ins_encode %{ 2700 int vector_len = 0; 2701 __ vxorpd($dst$$XMMRegister, $src$$XMMRegister, 2702 ExternalAddress(double_signflip()), vector_len); 2703 %} 2704 ins_pipe(pipe_slow); 2705 %} 2706 2707 instruct sqrtF_reg(regF dst, regF src) %{ 2708 predicate(UseSSE>=1); 2709 match(Set dst (ConvD2F (SqrtD (ConvF2D src)))); 2710 2711 format %{ "sqrtss $dst, $src" %} 2712 ins_cost(150); 2713 ins_encode %{ 2714 __ sqrtss($dst$$XMMRegister, $src$$XMMRegister); 2715 %} 2716 ins_pipe(pipe_slow); 2717 %} 2718 2719 instruct sqrtF_mem(regF dst, memory src) %{ 2720 predicate(UseSSE>=1); 2721 match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF src))))); 2722 2723 format %{ "sqrtss $dst, $src" %} 2724 ins_cost(150); 2725 ins_encode %{ 2726 __ sqrtss($dst$$XMMRegister, $src$$Address); 2727 %} 2728 ins_pipe(pipe_slow); 2729 %} 2730 2731 instruct sqrtF_imm(regF dst, immF con) %{ 2732 predicate(UseSSE>=1); 2733 match(Set dst (ConvD2F (SqrtD (ConvF2D con)))); 2734 format %{ "sqrtss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2735 ins_cost(150); 2736 ins_encode %{ 2737 __ sqrtss($dst$$XMMRegister, $constantaddress($con)); 2738 %} 2739 ins_pipe(pipe_slow); 2740 %} 2741 2742 instruct sqrtD_reg(regD dst, regD src) %{ 2743 predicate(UseSSE>=2); 2744 match(Set dst (SqrtD src)); 2745 2746 format %{ "sqrtsd $dst, $src" %} 2747 ins_cost(150); 2748 ins_encode %{ 2749 __ sqrtsd($dst$$XMMRegister, $src$$XMMRegister); 2750 %} 2751 ins_pipe(pipe_slow); 2752 %} 2753 2754 instruct sqrtD_mem(regD dst, memory src) %{ 2755 predicate(UseSSE>=2); 2756 match(Set dst (SqrtD (LoadD src))); 2757 2758 format %{ "sqrtsd $dst, $src" %} 2759 ins_cost(150); 2760 ins_encode %{ 2761 __ sqrtsd($dst$$XMMRegister, $src$$Address); 2762 %} 2763 ins_pipe(pipe_slow); 2764 %} 2765 2766 instruct sqrtD_imm(regD dst, immD con) %{ 2767 predicate(UseSSE>=2); 2768 match(Set dst (SqrtD con)); 2769 format %{ "sqrtsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2770 ins_cost(150); 2771 ins_encode %{ 2772 __ sqrtsd($dst$$XMMRegister, $constantaddress($con)); 2773 %} 2774 ins_pipe(pipe_slow); 2775 %} 2776 2777 // ====================VECTOR INSTRUCTIONS===================================== 2778 2779 // Load vectors (4 bytes long) 2780 instruct loadV4(vecS dst, memory mem) %{ 2781 predicate(n->as_LoadVector()->memory_size() == 4); 2782 match(Set dst (LoadVector mem)); 2783 ins_cost(125); 2784 format %{ "movd $dst,$mem\t! load vector (4 bytes)" %} 2785 ins_encode %{ 2786 __ movdl($dst$$XMMRegister, $mem$$Address); 2787 %} 2788 ins_pipe( pipe_slow ); 2789 %} 2790 2791 // Load vectors (8 bytes long) 2792 instruct loadV8(vecD dst, memory mem) %{ 2793 predicate(n->as_LoadVector()->memory_size() == 8); 2794 match(Set dst (LoadVector mem)); 2795 ins_cost(125); 2796 format %{ "movq $dst,$mem\t! load vector (8 bytes)" %} 2797 ins_encode %{ 2798 __ movq($dst$$XMMRegister, $mem$$Address); 2799 %} 2800 ins_pipe( pipe_slow ); 2801 %} 2802 2803 // Load vectors (16 bytes long) 2804 instruct loadV16(vecX dst, memory mem) %{ 2805 predicate(n->as_LoadVector()->memory_size() == 16); 2806 match(Set dst (LoadVector mem)); 2807 ins_cost(125); 2808 format %{ "movdqu $dst,$mem\t! load vector (16 bytes)" %} 2809 ins_encode %{ 2810 __ movdqu($dst$$XMMRegister, $mem$$Address); 2811 %} 2812 ins_pipe( pipe_slow ); 2813 %} 2814 2815 // Load vectors (32 bytes long) 2816 instruct loadV32(vecY dst, memory mem) %{ 2817 predicate(n->as_LoadVector()->memory_size() == 32); 2818 match(Set dst (LoadVector mem)); 2819 ins_cost(125); 2820 format %{ "vmovdqu $dst,$mem\t! load vector (32 bytes)" %} 2821 ins_encode %{ 2822 __ vmovdqu($dst$$XMMRegister, $mem$$Address); 2823 %} 2824 ins_pipe( pipe_slow ); 2825 %} 2826 2827 // Load vectors (64 bytes long) 2828 instruct loadV64(vecZ dst, memory mem) %{ 2829 predicate(n->as_LoadVector()->memory_size() == 64); 2830 match(Set dst (LoadVector mem)); 2831 ins_cost(125); 2832 format %{ "vmovdqu $dst k0,$mem\t! load vector (64 bytes)" %} 2833 ins_encode %{ 2834 int vector_len = 2; 2835 __ evmovdqu($dst$$XMMRegister, $mem$$Address, vector_len); 2836 %} 2837 ins_pipe( pipe_slow ); 2838 %} 2839 2840 // Store vectors 2841 instruct storeV4(memory mem, vecS src) %{ 2842 predicate(n->as_StoreVector()->memory_size() == 4); 2843 match(Set mem (StoreVector mem src)); 2844 ins_cost(145); 2845 format %{ "movd $mem,$src\t! store vector (4 bytes)" %} 2846 ins_encode %{ 2847 __ movdl($mem$$Address, $src$$XMMRegister); 2848 %} 2849 ins_pipe( pipe_slow ); 2850 %} 2851 2852 instruct storeV8(memory mem, vecD src) %{ 2853 predicate(n->as_StoreVector()->memory_size() == 8); 2854 match(Set mem (StoreVector mem src)); 2855 ins_cost(145); 2856 format %{ "movq $mem,$src\t! store vector (8 bytes)" %} 2857 ins_encode %{ 2858 __ movq($mem$$Address, $src$$XMMRegister); 2859 %} 2860 ins_pipe( pipe_slow ); 2861 %} 2862 2863 instruct storeV16(memory mem, vecX src) %{ 2864 predicate(n->as_StoreVector()->memory_size() == 16); 2865 match(Set mem (StoreVector mem src)); 2866 ins_cost(145); 2867 format %{ "movdqu $mem,$src\t! store vector (16 bytes)" %} 2868 ins_encode %{ 2869 __ movdqu($mem$$Address, $src$$XMMRegister); 2870 %} 2871 ins_pipe( pipe_slow ); 2872 %} 2873 2874 instruct storeV32(memory mem, vecY src) %{ 2875 predicate(n->as_StoreVector()->memory_size() == 32); 2876 match(Set mem (StoreVector mem src)); 2877 ins_cost(145); 2878 format %{ "vmovdqu $mem,$src\t! store vector (32 bytes)" %} 2879 ins_encode %{ 2880 __ vmovdqu($mem$$Address, $src$$XMMRegister); 2881 %} 2882 ins_pipe( pipe_slow ); 2883 %} 2884 2885 instruct storeV64(memory mem, vecZ src) %{ 2886 predicate(n->as_StoreVector()->memory_size() == 64); 2887 match(Set mem (StoreVector mem src)); 2888 ins_cost(145); 2889 format %{ "vmovdqu $mem k0,$src\t! store vector (64 bytes)" %} 2890 ins_encode %{ 2891 int vector_len = 2; 2892 __ evmovdqu($mem$$Address, $src$$XMMRegister, vector_len); 2893 %} 2894 ins_pipe( pipe_slow ); 2895 %} 2896 2897 // ====================LEGACY REPLICATE======================================= 2898 2899 instruct Repl4B_mem(vecS dst, memory mem) %{ 2900 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 2901 match(Set dst (ReplicateB (LoadB mem))); 2902 format %{ "punpcklbw $dst,$mem\n\t" 2903 "pshuflw $dst,$dst,0x00\t! replicate4B" %} 2904 ins_encode %{ 2905 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 2906 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2907 %} 2908 ins_pipe( pipe_slow ); 2909 %} 2910 2911 instruct Repl8B_mem(vecD dst, memory mem) %{ 2912 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 2913 match(Set dst (ReplicateB (LoadB mem))); 2914 format %{ "punpcklbw $dst,$mem\n\t" 2915 "pshuflw $dst,$dst,0x00\t! replicate8B" %} 2916 ins_encode %{ 2917 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 2918 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2919 %} 2920 ins_pipe( pipe_slow ); 2921 %} 2922 2923 instruct Repl16B(vecX dst, rRegI src) %{ 2924 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 2925 match(Set dst (ReplicateB src)); 2926 format %{ "movd $dst,$src\n\t" 2927 "punpcklbw $dst,$dst\n\t" 2928 "pshuflw $dst,$dst,0x00\n\t" 2929 "punpcklqdq $dst,$dst\t! replicate16B" %} 2930 ins_encode %{ 2931 __ movdl($dst$$XMMRegister, $src$$Register); 2932 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 2933 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2934 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2935 %} 2936 ins_pipe( pipe_slow ); 2937 %} 2938 2939 instruct Repl16B_mem(vecX dst, memory mem) %{ 2940 predicate(n->as_Vector()->length() == 16 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 2941 match(Set dst (ReplicateB (LoadB mem))); 2942 format %{ "punpcklbw $dst,$mem\n\t" 2943 "pshuflw $dst,$dst,0x00\n\t" 2944 "punpcklqdq $dst,$dst\t! replicate16B" %} 2945 ins_encode %{ 2946 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 2947 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2948 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2949 %} 2950 ins_pipe( pipe_slow ); 2951 %} 2952 2953 instruct Repl32B(vecY dst, rRegI src) %{ 2954 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 2955 match(Set dst (ReplicateB src)); 2956 format %{ "movd $dst,$src\n\t" 2957 "punpcklbw $dst,$dst\n\t" 2958 "pshuflw $dst,$dst,0x00\n\t" 2959 "punpcklqdq $dst,$dst\n\t" 2960 "vinserti128h $dst,$dst,$dst\t! replicate32B" %} 2961 ins_encode %{ 2962 __ movdl($dst$$XMMRegister, $src$$Register); 2963 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 2964 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2965 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2966 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2967 %} 2968 ins_pipe( pipe_slow ); 2969 %} 2970 2971 instruct Repl32B_mem(vecY dst, memory mem) %{ 2972 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 2973 match(Set dst (ReplicateB (LoadB mem))); 2974 format %{ "punpcklbw $dst,$mem\n\t" 2975 "pshuflw $dst,$dst,0x00\n\t" 2976 "punpcklqdq $dst,$dst\n\t" 2977 "vinserti128h $dst,$dst,$dst\t! replicate32B" %} 2978 ins_encode %{ 2979 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 2980 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2981 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2982 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2983 %} 2984 ins_pipe( pipe_slow ); 2985 %} 2986 2987 instruct Repl16B_imm(vecX dst, immI con) %{ 2988 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 2989 match(Set dst (ReplicateB con)); 2990 format %{ "movq $dst,[$constantaddress]\n\t" 2991 "punpcklqdq $dst,$dst\t! replicate16B($con)" %} 2992 ins_encode %{ 2993 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 2994 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2995 %} 2996 ins_pipe( pipe_slow ); 2997 %} 2998 2999 instruct Repl32B_imm(vecY dst, immI con) %{ 3000 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3001 match(Set dst (ReplicateB con)); 3002 format %{ "movq $dst,[$constantaddress]\n\t" 3003 "punpcklqdq $dst,$dst\n\t" 3004 "vinserti128h $dst,$dst,$dst\t! lreplicate32B($con)" %} 3005 ins_encode %{ 3006 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3007 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3008 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3009 %} 3010 ins_pipe( pipe_slow ); 3011 %} 3012 3013 instruct Repl4S(vecD dst, rRegI src) %{ 3014 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vlbw()); 3015 match(Set dst (ReplicateS src)); 3016 format %{ "movd $dst,$src\n\t" 3017 "pshuflw $dst,$dst,0x00\t! replicate4S" %} 3018 ins_encode %{ 3019 __ movdl($dst$$XMMRegister, $src$$Register); 3020 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3021 %} 3022 ins_pipe( pipe_slow ); 3023 %} 3024 3025 instruct Repl4S_mem(vecD dst, memory mem) %{ 3026 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3027 match(Set dst (ReplicateS (LoadS mem))); 3028 format %{ "pshuflw $dst,$mem,0x00\t! replicate4S" %} 3029 ins_encode %{ 3030 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3031 %} 3032 ins_pipe( pipe_slow ); 3033 %} 3034 3035 instruct Repl8S(vecX dst, rRegI src) %{ 3036 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vlbw()); 3037 match(Set dst (ReplicateS src)); 3038 format %{ "movd $dst,$src\n\t" 3039 "pshuflw $dst,$dst,0x00\n\t" 3040 "punpcklqdq $dst,$dst\t! replicate8S" %} 3041 ins_encode %{ 3042 __ movdl($dst$$XMMRegister, $src$$Register); 3043 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3044 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3045 %} 3046 ins_pipe( pipe_slow ); 3047 %} 3048 3049 instruct Repl8S_mem(vecX dst, memory mem) %{ 3050 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3051 match(Set dst (ReplicateS (LoadS mem))); 3052 format %{ "pshuflw $dst,$mem,0x00\n\t" 3053 "punpcklqdq $dst,$dst\t! replicate8S" %} 3054 ins_encode %{ 3055 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3056 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3057 %} 3058 ins_pipe( pipe_slow ); 3059 %} 3060 3061 instruct Repl8S_imm(vecX dst, immI con) %{ 3062 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vlbw()); 3063 match(Set dst (ReplicateS con)); 3064 format %{ "movq $dst,[$constantaddress]\n\t" 3065 "punpcklqdq $dst,$dst\t! replicate8S($con)" %} 3066 ins_encode %{ 3067 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3068 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3069 %} 3070 ins_pipe( pipe_slow ); 3071 %} 3072 3073 instruct Repl16S(vecY dst, rRegI src) %{ 3074 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3075 match(Set dst (ReplicateS src)); 3076 format %{ "movd $dst,$src\n\t" 3077 "pshuflw $dst,$dst,0x00\n\t" 3078 "punpcklqdq $dst,$dst\n\t" 3079 "vinserti128h $dst,$dst,$dst\t! replicate16S" %} 3080 ins_encode %{ 3081 __ movdl($dst$$XMMRegister, $src$$Register); 3082 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3083 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3084 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3085 %} 3086 ins_pipe( pipe_slow ); 3087 %} 3088 3089 instruct Repl16S_mem(vecY dst, memory mem) %{ 3090 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3091 match(Set dst (ReplicateS (LoadS mem))); 3092 format %{ "pshuflw $dst,$mem,0x00\n\t" 3093 "punpcklqdq $dst,$dst\n\t" 3094 "vinserti128h $dst,$dst,$dst\t! replicate16S" %} 3095 ins_encode %{ 3096 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3097 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3098 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3099 %} 3100 ins_pipe( pipe_slow ); 3101 %} 3102 3103 instruct Repl16S_imm(vecY dst, immI con) %{ 3104 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3105 match(Set dst (ReplicateS con)); 3106 format %{ "movq $dst,[$constantaddress]\n\t" 3107 "punpcklqdq $dst,$dst\n\t" 3108 "vinserti128h $dst,$dst,$dst\t! replicate16S($con)" %} 3109 ins_encode %{ 3110 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3111 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3112 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3113 %} 3114 ins_pipe( pipe_slow ); 3115 %} 3116 3117 instruct Repl4I(vecX dst, rRegI src) %{ 3118 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3119 match(Set dst (ReplicateI src)); 3120 format %{ "movd $dst,$src\n\t" 3121 "pshufd $dst,$dst,0x00\t! replicate4I" %} 3122 ins_encode %{ 3123 __ movdl($dst$$XMMRegister, $src$$Register); 3124 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3125 %} 3126 ins_pipe( pipe_slow ); 3127 %} 3128 3129 instruct Repl4I_mem(vecX dst, memory mem) %{ 3130 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3131 match(Set dst (ReplicateI (LoadI mem))); 3132 format %{ "pshufd $dst,$mem,0x00\t! replicate4I" %} 3133 ins_encode %{ 3134 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3135 %} 3136 ins_pipe( pipe_slow ); 3137 %} 3138 3139 instruct Repl8I(vecY dst, rRegI src) %{ 3140 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3141 match(Set dst (ReplicateI src)); 3142 format %{ "movd $dst,$src\n\t" 3143 "pshufd $dst,$dst,0x00\n\t" 3144 "vinserti128h $dst,$dst,$dst\t! replicate8I" %} 3145 ins_encode %{ 3146 __ movdl($dst$$XMMRegister, $src$$Register); 3147 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3148 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3149 %} 3150 ins_pipe( pipe_slow ); 3151 %} 3152 3153 instruct Repl8I_mem(vecY dst, memory mem) %{ 3154 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3155 match(Set dst (ReplicateI (LoadI mem))); 3156 format %{ "pshufd $dst,$mem,0x00\n\t" 3157 "vinserti128h $dst,$dst,$dst\t! replicate8I" %} 3158 ins_encode %{ 3159 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3160 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3161 %} 3162 ins_pipe( pipe_slow ); 3163 %} 3164 3165 instruct Repl4I_imm(vecX dst, immI con) %{ 3166 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3167 match(Set dst (ReplicateI con)); 3168 format %{ "movq $dst,[$constantaddress]\t! replicate4I($con)\n\t" 3169 "punpcklqdq $dst,$dst" %} 3170 ins_encode %{ 3171 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3172 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3173 %} 3174 ins_pipe( pipe_slow ); 3175 %} 3176 3177 instruct Repl8I_imm(vecY dst, immI con) %{ 3178 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3179 match(Set dst (ReplicateI con)); 3180 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 3181 "punpcklqdq $dst,$dst\n\t" 3182 "vinserti128h $dst,$dst,$dst" %} 3183 ins_encode %{ 3184 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3185 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3186 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3187 %} 3188 ins_pipe( pipe_slow ); 3189 %} 3190 3191 // Long could be loaded into xmm register directly from memory. 3192 instruct Repl2L_mem(vecX dst, memory mem) %{ 3193 predicate(n->as_Vector()->length() == 2 && !VM_Version::supports_avx512vlbw()); 3194 match(Set dst (ReplicateL (LoadL mem))); 3195 format %{ "movq $dst,$mem\n\t" 3196 "punpcklqdq $dst,$dst\t! replicate2L" %} 3197 ins_encode %{ 3198 __ movq($dst$$XMMRegister, $mem$$Address); 3199 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3200 %} 3201 ins_pipe( pipe_slow ); 3202 %} 3203 3204 // Replicate long (8 byte) scalar to be vector 3205 #ifdef _LP64 3206 instruct Repl4L(vecY dst, rRegL src) %{ 3207 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3208 match(Set dst (ReplicateL src)); 3209 format %{ "movdq $dst,$src\n\t" 3210 "punpcklqdq $dst,$dst\n\t" 3211 "vinserti128h $dst,$dst,$dst\t! replicate4L" %} 3212 ins_encode %{ 3213 __ movdq($dst$$XMMRegister, $src$$Register); 3214 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3215 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3216 %} 3217 ins_pipe( pipe_slow ); 3218 %} 3219 #else // _LP64 3220 instruct Repl4L(vecY dst, eRegL src, regD tmp) %{ 3221 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3222 match(Set dst (ReplicateL src)); 3223 effect(TEMP dst, USE src, TEMP tmp); 3224 format %{ "movdl $dst,$src.lo\n\t" 3225 "movdl $tmp,$src.hi\n\t" 3226 "punpckldq $dst,$tmp\n\t" 3227 "punpcklqdq $dst,$dst\n\t" 3228 "vinserti128h $dst,$dst,$dst\t! replicate4L" %} 3229 ins_encode %{ 3230 __ movdl($dst$$XMMRegister, $src$$Register); 3231 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3232 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3233 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3234 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3235 %} 3236 ins_pipe( pipe_slow ); 3237 %} 3238 #endif // _LP64 3239 3240 instruct Repl4L_imm(vecY dst, immL con) %{ 3241 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3242 match(Set dst (ReplicateL con)); 3243 format %{ "movq $dst,[$constantaddress]\n\t" 3244 "punpcklqdq $dst,$dst\n\t" 3245 "vinserti128h $dst,$dst,$dst\t! replicate4L($con)" %} 3246 ins_encode %{ 3247 __ movq($dst$$XMMRegister, $constantaddress($con)); 3248 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3249 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3250 %} 3251 ins_pipe( pipe_slow ); 3252 %} 3253 3254 instruct Repl4L_mem(vecY dst, memory mem) %{ 3255 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3256 match(Set dst (ReplicateL (LoadL mem))); 3257 format %{ "movq $dst,$mem\n\t" 3258 "punpcklqdq $dst,$dst\n\t" 3259 "vinserti128h $dst,$dst,$dst\t! replicate4L" %} 3260 ins_encode %{ 3261 __ movq($dst$$XMMRegister, $mem$$Address); 3262 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3263 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3264 %} 3265 ins_pipe( pipe_slow ); 3266 %} 3267 3268 instruct Repl2F_mem(vecD dst, memory mem) %{ 3269 predicate(n->as_Vector()->length() == 2 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3270 match(Set dst (ReplicateF (LoadF mem))); 3271 format %{ "pshufd $dst,$mem,0x00\t! replicate2F" %} 3272 ins_encode %{ 3273 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3274 %} 3275 ins_pipe( pipe_slow ); 3276 %} 3277 3278 instruct Repl4F_mem(vecX dst, memory mem) %{ 3279 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3280 match(Set dst (ReplicateF (LoadF mem))); 3281 format %{ "pshufd $dst,$mem,0x00\t! replicate4F" %} 3282 ins_encode %{ 3283 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3284 %} 3285 ins_pipe( pipe_slow ); 3286 %} 3287 3288 instruct Repl8F(vecY dst, regF src) %{ 3289 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3290 match(Set dst (ReplicateF src)); 3291 format %{ "pshufd $dst,$src,0x00\n\t" 3292 "vinsertf128h $dst,$dst,$dst\t! replicate8F" %} 3293 ins_encode %{ 3294 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3295 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3296 %} 3297 ins_pipe( pipe_slow ); 3298 %} 3299 3300 instruct Repl8F_mem(vecY dst, memory mem) %{ 3301 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3302 match(Set dst (ReplicateF (LoadF mem))); 3303 format %{ "pshufd $dst,$mem,0x00\n\t" 3304 "vinsertf128h $dst,$dst,$dst\t! replicate8F" %} 3305 ins_encode %{ 3306 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3307 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3308 %} 3309 ins_pipe( pipe_slow ); 3310 %} 3311 3312 instruct Repl2D_mem(vecX dst, memory mem) %{ 3313 predicate(n->as_Vector()->length() == 2 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3314 match(Set dst (ReplicateD (LoadD mem))); 3315 format %{ "pshufd $dst,$mem,0x44\t! replicate2D" %} 3316 ins_encode %{ 3317 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44); 3318 %} 3319 ins_pipe( pipe_slow ); 3320 %} 3321 3322 instruct Repl4D(vecY dst, regD src) %{ 3323 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3324 match(Set dst (ReplicateD src)); 3325 format %{ "pshufd $dst,$src,0x44\n\t" 3326 "vinsertf128h $dst,$dst,$dst\t! replicate4D" %} 3327 ins_encode %{ 3328 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 3329 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3330 %} 3331 ins_pipe( pipe_slow ); 3332 %} 3333 3334 instruct Repl4D_mem(vecY dst, memory mem) %{ 3335 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3336 match(Set dst (ReplicateD (LoadD mem))); 3337 format %{ "pshufd $dst,$mem,0x44\n\t" 3338 "vinsertf128h $dst,$dst,$dst\t! replicate4D" %} 3339 ins_encode %{ 3340 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44); 3341 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3342 %} 3343 ins_pipe( pipe_slow ); 3344 %} 3345 3346 // ====================GENERIC REPLICATE========================================== 3347 3348 // Replicate byte scalar to be vector 3349 instruct Repl4B(vecS dst, rRegI src) %{ 3350 predicate(n->as_Vector()->length() == 4); 3351 match(Set dst (ReplicateB src)); 3352 format %{ "movd $dst,$src\n\t" 3353 "punpcklbw $dst,$dst\n\t" 3354 "pshuflw $dst,$dst,0x00\t! replicate4B" %} 3355 ins_encode %{ 3356 __ movdl($dst$$XMMRegister, $src$$Register); 3357 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3358 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3359 %} 3360 ins_pipe( pipe_slow ); 3361 %} 3362 3363 instruct Repl8B(vecD dst, rRegI src) %{ 3364 predicate(n->as_Vector()->length() == 8); 3365 match(Set dst (ReplicateB src)); 3366 format %{ "movd $dst,$src\n\t" 3367 "punpcklbw $dst,$dst\n\t" 3368 "pshuflw $dst,$dst,0x00\t! replicate8B" %} 3369 ins_encode %{ 3370 __ movdl($dst$$XMMRegister, $src$$Register); 3371 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3372 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3373 %} 3374 ins_pipe( pipe_slow ); 3375 %} 3376 3377 // Replicate byte scalar immediate to be vector by loading from const table. 3378 instruct Repl4B_imm(vecS dst, immI con) %{ 3379 predicate(n->as_Vector()->length() == 4); 3380 match(Set dst (ReplicateB con)); 3381 format %{ "movdl $dst,[$constantaddress]\t! replicate4B($con)" %} 3382 ins_encode %{ 3383 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 1))); 3384 %} 3385 ins_pipe( pipe_slow ); 3386 %} 3387 3388 instruct Repl8B_imm(vecD dst, immI con) %{ 3389 predicate(n->as_Vector()->length() == 8); 3390 match(Set dst (ReplicateB con)); 3391 format %{ "movq $dst,[$constantaddress]\t! replicate8B($con)" %} 3392 ins_encode %{ 3393 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3394 %} 3395 ins_pipe( pipe_slow ); 3396 %} 3397 3398 // Replicate byte scalar zero to be vector 3399 instruct Repl4B_zero(vecS dst, immI0 zero) %{ 3400 predicate(n->as_Vector()->length() == 4); 3401 match(Set dst (ReplicateB zero)); 3402 format %{ "pxor $dst,$dst\t! replicate4B zero" %} 3403 ins_encode %{ 3404 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3405 %} 3406 ins_pipe( fpu_reg_reg ); 3407 %} 3408 3409 instruct Repl8B_zero(vecD dst, immI0 zero) %{ 3410 predicate(n->as_Vector()->length() == 8); 3411 match(Set dst (ReplicateB zero)); 3412 format %{ "pxor $dst,$dst\t! replicate8B zero" %} 3413 ins_encode %{ 3414 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3415 %} 3416 ins_pipe( fpu_reg_reg ); 3417 %} 3418 3419 instruct Repl16B_zero(vecX dst, immI0 zero) %{ 3420 predicate(n->as_Vector()->length() == 16); 3421 match(Set dst (ReplicateB zero)); 3422 format %{ "pxor $dst,$dst\t! replicate16B zero" %} 3423 ins_encode %{ 3424 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3425 %} 3426 ins_pipe( fpu_reg_reg ); 3427 %} 3428 3429 instruct Repl32B_zero(vecY dst, immI0 zero) %{ 3430 predicate(n->as_Vector()->length() == 32); 3431 match(Set dst (ReplicateB zero)); 3432 format %{ "vpxor $dst,$dst,$dst\t! replicate32B zero" %} 3433 ins_encode %{ 3434 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3435 int vector_len = 1; 3436 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3437 %} 3438 ins_pipe( fpu_reg_reg ); 3439 %} 3440 3441 // Replicate char/short (2 byte) scalar to be vector 3442 instruct Repl2S(vecS dst, rRegI src) %{ 3443 predicate(n->as_Vector()->length() == 2); 3444 match(Set dst (ReplicateS src)); 3445 format %{ "movd $dst,$src\n\t" 3446 "pshuflw $dst,$dst,0x00\t! replicate2S" %} 3447 ins_encode %{ 3448 __ movdl($dst$$XMMRegister, $src$$Register); 3449 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3450 %} 3451 ins_pipe( fpu_reg_reg ); 3452 %} 3453 3454 // Replicate char/short (2 byte) scalar immediate to be vector by loading from const table. 3455 instruct Repl2S_imm(vecS dst, immI con) %{ 3456 predicate(n->as_Vector()->length() == 2); 3457 match(Set dst (ReplicateS con)); 3458 format %{ "movdl $dst,[$constantaddress]\t! replicate2S($con)" %} 3459 ins_encode %{ 3460 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 2))); 3461 %} 3462 ins_pipe( fpu_reg_reg ); 3463 %} 3464 3465 instruct Repl4S_imm(vecD dst, immI con) %{ 3466 predicate(n->as_Vector()->length() == 4); 3467 match(Set dst (ReplicateS con)); 3468 format %{ "movq $dst,[$constantaddress]\t! replicate4S($con)" %} 3469 ins_encode %{ 3470 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3471 %} 3472 ins_pipe( fpu_reg_reg ); 3473 %} 3474 3475 // Replicate char/short (2 byte) scalar zero to be vector 3476 instruct Repl2S_zero(vecS dst, immI0 zero) %{ 3477 predicate(n->as_Vector()->length() == 2); 3478 match(Set dst (ReplicateS zero)); 3479 format %{ "pxor $dst,$dst\t! replicate2S zero" %} 3480 ins_encode %{ 3481 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3482 %} 3483 ins_pipe( fpu_reg_reg ); 3484 %} 3485 3486 instruct Repl4S_zero(vecD dst, immI0 zero) %{ 3487 predicate(n->as_Vector()->length() == 4); 3488 match(Set dst (ReplicateS zero)); 3489 format %{ "pxor $dst,$dst\t! replicate4S zero" %} 3490 ins_encode %{ 3491 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3492 %} 3493 ins_pipe( fpu_reg_reg ); 3494 %} 3495 3496 instruct Repl8S_zero(vecX dst, immI0 zero) %{ 3497 predicate(n->as_Vector()->length() == 8); 3498 match(Set dst (ReplicateS zero)); 3499 format %{ "pxor $dst,$dst\t! replicate8S zero" %} 3500 ins_encode %{ 3501 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3502 %} 3503 ins_pipe( fpu_reg_reg ); 3504 %} 3505 3506 instruct Repl16S_zero(vecY dst, immI0 zero) %{ 3507 predicate(n->as_Vector()->length() == 16); 3508 match(Set dst (ReplicateS zero)); 3509 format %{ "vpxor $dst,$dst,$dst\t! replicate16S zero" %} 3510 ins_encode %{ 3511 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3512 int vector_len = 1; 3513 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3514 %} 3515 ins_pipe( fpu_reg_reg ); 3516 %} 3517 3518 // Replicate integer (4 byte) scalar to be vector 3519 instruct Repl2I(vecD dst, rRegI src) %{ 3520 predicate(n->as_Vector()->length() == 2); 3521 match(Set dst (ReplicateI src)); 3522 format %{ "movd $dst,$src\n\t" 3523 "pshufd $dst,$dst,0x00\t! replicate2I" %} 3524 ins_encode %{ 3525 __ movdl($dst$$XMMRegister, $src$$Register); 3526 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3527 %} 3528 ins_pipe( fpu_reg_reg ); 3529 %} 3530 3531 // Integer could be loaded into xmm register directly from memory. 3532 instruct Repl2I_mem(vecD dst, memory mem) %{ 3533 predicate(n->as_Vector()->length() == 2); 3534 match(Set dst (ReplicateI (LoadI mem))); 3535 format %{ "movd $dst,$mem\n\t" 3536 "pshufd $dst,$dst,0x00\t! replicate2I" %} 3537 ins_encode %{ 3538 __ movdl($dst$$XMMRegister, $mem$$Address); 3539 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3540 %} 3541 ins_pipe( fpu_reg_reg ); 3542 %} 3543 3544 // Replicate integer (4 byte) scalar immediate to be vector by loading from const table. 3545 instruct Repl2I_imm(vecD dst, immI con) %{ 3546 predicate(n->as_Vector()->length() == 2); 3547 match(Set dst (ReplicateI con)); 3548 format %{ "movq $dst,[$constantaddress]\t! replicate2I($con)" %} 3549 ins_encode %{ 3550 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3551 %} 3552 ins_pipe( fpu_reg_reg ); 3553 %} 3554 3555 // Replicate integer (4 byte) scalar zero to be vector 3556 instruct Repl2I_zero(vecD dst, immI0 zero) %{ 3557 predicate(n->as_Vector()->length() == 2); 3558 match(Set dst (ReplicateI zero)); 3559 format %{ "pxor $dst,$dst\t! replicate2I" %} 3560 ins_encode %{ 3561 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3562 %} 3563 ins_pipe( fpu_reg_reg ); 3564 %} 3565 3566 instruct Repl4I_zero(vecX dst, immI0 zero) %{ 3567 predicate(n->as_Vector()->length() == 4); 3568 match(Set dst (ReplicateI zero)); 3569 format %{ "pxor $dst,$dst\t! replicate4I zero)" %} 3570 ins_encode %{ 3571 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3572 %} 3573 ins_pipe( fpu_reg_reg ); 3574 %} 3575 3576 instruct Repl8I_zero(vecY dst, immI0 zero) %{ 3577 predicate(n->as_Vector()->length() == 8); 3578 match(Set dst (ReplicateI zero)); 3579 format %{ "vpxor $dst,$dst,$dst\t! replicate8I zero" %} 3580 ins_encode %{ 3581 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3582 int vector_len = 1; 3583 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3584 %} 3585 ins_pipe( fpu_reg_reg ); 3586 %} 3587 3588 // Replicate long (8 byte) scalar to be vector 3589 #ifdef _LP64 3590 instruct Repl2L(vecX dst, rRegL src) %{ 3591 predicate(n->as_Vector()->length() == 2); 3592 match(Set dst (ReplicateL src)); 3593 format %{ "movdq $dst,$src\n\t" 3594 "punpcklqdq $dst,$dst\t! replicate2L" %} 3595 ins_encode %{ 3596 __ movdq($dst$$XMMRegister, $src$$Register); 3597 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3598 %} 3599 ins_pipe( pipe_slow ); 3600 %} 3601 #else // _LP64 3602 instruct Repl2L(vecX dst, eRegL src, regD tmp) %{ 3603 predicate(n->as_Vector()->length() == 2); 3604 match(Set dst (ReplicateL src)); 3605 effect(TEMP dst, USE src, TEMP tmp); 3606 format %{ "movdl $dst,$src.lo\n\t" 3607 "movdl $tmp,$src.hi\n\t" 3608 "punpckldq $dst,$tmp\n\t" 3609 "punpcklqdq $dst,$dst\t! replicate2L"%} 3610 ins_encode %{ 3611 __ movdl($dst$$XMMRegister, $src$$Register); 3612 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3613 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3614 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3615 %} 3616 ins_pipe( pipe_slow ); 3617 %} 3618 #endif // _LP64 3619 3620 // Replicate long (8 byte) scalar immediate to be vector by loading from const table. 3621 instruct Repl2L_imm(vecX dst, immL con) %{ 3622 predicate(n->as_Vector()->length() == 2); 3623 match(Set dst (ReplicateL con)); 3624 format %{ "movq $dst,[$constantaddress]\n\t" 3625 "punpcklqdq $dst,$dst\t! replicate2L($con)" %} 3626 ins_encode %{ 3627 __ movq($dst$$XMMRegister, $constantaddress($con)); 3628 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3629 %} 3630 ins_pipe( pipe_slow ); 3631 %} 3632 3633 // Replicate long (8 byte) scalar zero to be vector 3634 instruct Repl2L_zero(vecX dst, immL0 zero) %{ 3635 predicate(n->as_Vector()->length() == 2); 3636 match(Set dst (ReplicateL zero)); 3637 format %{ "pxor $dst,$dst\t! replicate2L zero" %} 3638 ins_encode %{ 3639 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3640 %} 3641 ins_pipe( fpu_reg_reg ); 3642 %} 3643 3644 instruct Repl4L_zero(vecY dst, immL0 zero) %{ 3645 predicate(n->as_Vector()->length() == 4); 3646 match(Set dst (ReplicateL zero)); 3647 format %{ "vpxor $dst,$dst,$dst\t! replicate4L zero" %} 3648 ins_encode %{ 3649 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3650 int vector_len = 1; 3651 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3652 %} 3653 ins_pipe( fpu_reg_reg ); 3654 %} 3655 3656 // Replicate float (4 byte) scalar to be vector 3657 instruct Repl2F(vecD dst, regF src) %{ 3658 predicate(n->as_Vector()->length() == 2); 3659 match(Set dst (ReplicateF src)); 3660 format %{ "pshufd $dst,$dst,0x00\t! replicate2F" %} 3661 ins_encode %{ 3662 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3663 %} 3664 ins_pipe( fpu_reg_reg ); 3665 %} 3666 3667 instruct Repl4F(vecX dst, regF src) %{ 3668 predicate(n->as_Vector()->length() == 4); 3669 match(Set dst (ReplicateF src)); 3670 format %{ "pshufd $dst,$dst,0x00\t! replicate4F" %} 3671 ins_encode %{ 3672 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3673 %} 3674 ins_pipe( pipe_slow ); 3675 %} 3676 3677 // Replicate float (4 byte) scalar zero to be vector 3678 instruct Repl2F_zero(vecD dst, immF0 zero) %{ 3679 predicate(n->as_Vector()->length() == 2); 3680 match(Set dst (ReplicateF zero)); 3681 format %{ "xorps $dst,$dst\t! replicate2F zero" %} 3682 ins_encode %{ 3683 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 3684 %} 3685 ins_pipe( fpu_reg_reg ); 3686 %} 3687 3688 instruct Repl4F_zero(vecX dst, immF0 zero) %{ 3689 predicate(n->as_Vector()->length() == 4); 3690 match(Set dst (ReplicateF zero)); 3691 format %{ "xorps $dst,$dst\t! replicate4F zero" %} 3692 ins_encode %{ 3693 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 3694 %} 3695 ins_pipe( fpu_reg_reg ); 3696 %} 3697 3698 instruct Repl8F_zero(vecY dst, immF0 zero) %{ 3699 predicate(n->as_Vector()->length() == 8); 3700 match(Set dst (ReplicateF zero)); 3701 format %{ "vxorps $dst,$dst,$dst\t! replicate8F zero" %} 3702 ins_encode %{ 3703 int vector_len = 1; 3704 __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3705 %} 3706 ins_pipe( fpu_reg_reg ); 3707 %} 3708 3709 // Replicate double (8 bytes) scalar to be vector 3710 instruct Repl2D(vecX dst, regD src) %{ 3711 predicate(n->as_Vector()->length() == 2); 3712 match(Set dst (ReplicateD src)); 3713 format %{ "pshufd $dst,$src,0x44\t! replicate2D" %} 3714 ins_encode %{ 3715 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 3716 %} 3717 ins_pipe( pipe_slow ); 3718 %} 3719 3720 // Replicate double (8 byte) scalar zero to be vector 3721 instruct Repl2D_zero(vecX dst, immD0 zero) %{ 3722 predicate(n->as_Vector()->length() == 2); 3723 match(Set dst (ReplicateD zero)); 3724 format %{ "xorpd $dst,$dst\t! replicate2D zero" %} 3725 ins_encode %{ 3726 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister); 3727 %} 3728 ins_pipe( fpu_reg_reg ); 3729 %} 3730 3731 instruct Repl4D_zero(vecY dst, immD0 zero) %{ 3732 predicate(n->as_Vector()->length() == 4); 3733 match(Set dst (ReplicateD zero)); 3734 format %{ "vxorpd $dst,$dst,$dst,vect256\t! replicate4D zero" %} 3735 ins_encode %{ 3736 int vector_len = 1; 3737 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3738 %} 3739 ins_pipe( fpu_reg_reg ); 3740 %} 3741 3742 // ====================EVEX REPLICATE============================================= 3743 3744 instruct Repl4B_mem_evex(vecS dst, memory mem) %{ 3745 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vlbw()); 3746 match(Set dst (ReplicateB (LoadB mem))); 3747 format %{ "vpbroadcastb $dst,$mem\t! replicate4B" %} 3748 ins_encode %{ 3749 int vector_len = 0; 3750 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 3751 %} 3752 ins_pipe( pipe_slow ); 3753 %} 3754 3755 instruct Repl8B_mem_evex(vecD dst, memory mem) %{ 3756 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); 3757 match(Set dst (ReplicateB (LoadB mem))); 3758 format %{ "vpbroadcastb $dst,$mem\t! replicate8B" %} 3759 ins_encode %{ 3760 int vector_len = 0; 3761 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 3762 %} 3763 ins_pipe( pipe_slow ); 3764 %} 3765 3766 instruct Repl16B_evex(vecX dst, rRegI src) %{ 3767 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 3768 match(Set dst (ReplicateB src)); 3769 format %{ "vpbroadcastb $dst,$src\t! replicate16B" %} 3770 ins_encode %{ 3771 int vector_len = 0; 3772 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 3773 %} 3774 ins_pipe( pipe_slow ); 3775 %} 3776 3777 instruct Repl16B_mem_evex(vecX dst, memory mem) %{ 3778 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 3779 match(Set dst (ReplicateB (LoadB mem))); 3780 format %{ "vpbroadcastb $dst,$mem\t! replicate16B" %} 3781 ins_encode %{ 3782 int vector_len = 0; 3783 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 3784 %} 3785 ins_pipe( pipe_slow ); 3786 %} 3787 3788 instruct Repl32B_evex(vecY dst, rRegI src) %{ 3789 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw()); 3790 match(Set dst (ReplicateB src)); 3791 format %{ "vpbroadcastb $dst,$src\t! replicate32B" %} 3792 ins_encode %{ 3793 int vector_len = 1; 3794 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 3795 %} 3796 ins_pipe( pipe_slow ); 3797 %} 3798 3799 instruct Repl32B_mem_evex(vecY dst, memory mem) %{ 3800 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw()); 3801 match(Set dst (ReplicateB (LoadB mem))); 3802 format %{ "vpbroadcastb $dst,$mem\t! replicate32B" %} 3803 ins_encode %{ 3804 int vector_len = 1; 3805 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 3806 %} 3807 ins_pipe( pipe_slow ); 3808 %} 3809 3810 instruct Repl64B_evex(vecZ dst, rRegI src) %{ 3811 predicate(n->as_Vector()->length() == 64 && UseAVX > 2); 3812 match(Set dst (ReplicateB src)); 3813 format %{ "vpbroadcastb $dst,$src\t! upper replicate64B" %} 3814 ins_encode %{ 3815 int vector_len = 2; 3816 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 3817 %} 3818 ins_pipe( pipe_slow ); 3819 %} 3820 3821 instruct Repl64B_mem_evex(vecZ dst, memory mem) %{ 3822 predicate(n->as_Vector()->length() == 64 && VM_Version::supports_avx512vlbw()); 3823 match(Set dst (ReplicateB (LoadB mem))); 3824 format %{ "vpbroadcastb $dst,$mem\t! replicate64B" %} 3825 ins_encode %{ 3826 int vector_len = 2; 3827 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 3828 %} 3829 ins_pipe( pipe_slow ); 3830 %} 3831 3832 instruct Repl16B_imm_evex(vecX dst, immI con) %{ 3833 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 3834 match(Set dst (ReplicateB con)); 3835 format %{ "movq $dst,[$constantaddress]\n\t" 3836 "vpbroadcastb $dst,$dst\t! replicate16B" %} 3837 ins_encode %{ 3838 int vector_len = 0; 3839 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3840 __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3841 %} 3842 ins_pipe( pipe_slow ); 3843 %} 3844 3845 instruct Repl32B_imm_evex(vecY dst, immI con) %{ 3846 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw()); 3847 match(Set dst (ReplicateB con)); 3848 format %{ "movq $dst,[$constantaddress]\n\t" 3849 "vpbroadcastb $dst,$dst\t! replicate32B" %} 3850 ins_encode %{ 3851 int vector_len = 1; 3852 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3853 __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3854 %} 3855 ins_pipe( pipe_slow ); 3856 %} 3857 3858 instruct Repl64B_imm_evex(vecZ dst, immI con) %{ 3859 predicate(n->as_Vector()->length() == 64 && UseAVX > 2); 3860 match(Set dst (ReplicateB con)); 3861 format %{ "movq $dst,[$constantaddress]\n\t" 3862 "vpbroadcastb $dst,$dst\t! upper replicate64B" %} 3863 ins_encode %{ 3864 int vector_len = 2; 3865 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3866 __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3867 %} 3868 ins_pipe( pipe_slow ); 3869 ins_pipe( pipe_slow ); 3870 %} 3871 3872 instruct Repl64B_zero_evex(vecZ dst, immI0 zero) %{ 3873 predicate(n->as_Vector()->length() == 64 && UseAVX > 2); 3874 match(Set dst (ReplicateB zero)); 3875 format %{ "vpxor $dst k0,$dst,$dst\t! replicate64B zero" %} 3876 ins_encode %{ 3877 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 3878 int vector_len = 2; 3879 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3880 %} 3881 ins_pipe( fpu_reg_reg ); 3882 %} 3883 3884 instruct Repl4S_evex(vecD dst, rRegI src) %{ 3885 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vlbw()); 3886 match(Set dst (ReplicateS src)); 3887 format %{ "vpbroadcastw $dst,$src\t! replicate4S" %} 3888 ins_encode %{ 3889 int vector_len = 0; 3890 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 3891 %} 3892 ins_pipe( pipe_slow ); 3893 %} 3894 3895 instruct Repl4S_mem_evex(vecD dst, memory mem) %{ 3896 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vlbw()); 3897 match(Set dst (ReplicateS (LoadS mem))); 3898 format %{ "vpbroadcastw $dst,$mem\t! replicate4S" %} 3899 ins_encode %{ 3900 int vector_len = 0; 3901 __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 3902 %} 3903 ins_pipe( pipe_slow ); 3904 %} 3905 3906 instruct Repl8S_evex(vecX dst, rRegI src) %{ 3907 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); 3908 match(Set dst (ReplicateS src)); 3909 format %{ "vpbroadcastw $dst,$src\t! replicate8S" %} 3910 ins_encode %{ 3911 int vector_len = 0; 3912 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 3913 %} 3914 ins_pipe( pipe_slow ); 3915 %} 3916 3917 instruct Repl8S_mem_evex(vecX dst, memory mem) %{ 3918 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); 3919 match(Set dst (ReplicateS (LoadS mem))); 3920 format %{ "vpbroadcastw $dst,$mem\t! replicate8S" %} 3921 ins_encode %{ 3922 int vector_len = 0; 3923 __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 3924 %} 3925 ins_pipe( pipe_slow ); 3926 %} 3927 3928 instruct Repl16S_evex(vecY dst, rRegI src) %{ 3929 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 3930 match(Set dst (ReplicateS src)); 3931 format %{ "vpbroadcastw $dst,$src\t! replicate16S" %} 3932 ins_encode %{ 3933 int vector_len = 1; 3934 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 3935 %} 3936 ins_pipe( pipe_slow ); 3937 %} 3938 3939 instruct Repl16S_mem_evex(vecY dst, memory mem) %{ 3940 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 3941 match(Set dst (ReplicateS (LoadS mem))); 3942 format %{ "vpbroadcastw $dst,$mem\t! replicate16S" %} 3943 ins_encode %{ 3944 int vector_len = 1; 3945 __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 3946 %} 3947 ins_pipe( pipe_slow ); 3948 %} 3949 3950 instruct Repl32S_evex(vecZ dst, rRegI src) %{ 3951 predicate(n->as_Vector()->length() == 32 && UseAVX > 2); 3952 match(Set dst (ReplicateS src)); 3953 format %{ "vpbroadcastw $dst,$src\t! replicate32S" %} 3954 ins_encode %{ 3955 int vector_len = 2; 3956 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 3957 %} 3958 ins_pipe( pipe_slow ); 3959 %} 3960 3961 instruct Repl32S_mem_evex(vecZ dst, memory mem) %{ 3962 predicate(n->as_Vector()->length() == 32 && UseAVX > 2); 3963 match(Set dst (ReplicateS (LoadS mem))); 3964 format %{ "vpbroadcastw $dst,$mem\t! replicate32S" %} 3965 ins_encode %{ 3966 int vector_len = 2; 3967 __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 3968 %} 3969 ins_pipe( pipe_slow ); 3970 %} 3971 3972 instruct Repl8S_imm_evex(vecX dst, immI con) %{ 3973 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); 3974 match(Set dst (ReplicateS con)); 3975 format %{ "movq $dst,[$constantaddress]\n\t" 3976 "vpbroadcastw $dst,$dst\t! replicate8S" %} 3977 ins_encode %{ 3978 int vector_len = 0; 3979 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3980 __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3981 %} 3982 ins_pipe( pipe_slow ); 3983 %} 3984 3985 instruct Repl16S_imm_evex(vecY dst, immI con) %{ 3986 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 3987 match(Set dst (ReplicateS con)); 3988 format %{ "movq $dst,[$constantaddress]\n\t" 3989 "vpbroadcastw $dst,$dst\t! replicate16S" %} 3990 ins_encode %{ 3991 int vector_len = 1; 3992 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3993 __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3994 %} 3995 ins_pipe( pipe_slow ); 3996 %} 3997 3998 instruct Repl32S_imm_evex(vecZ dst, immI con) %{ 3999 predicate(n->as_Vector()->length() == 32 && UseAVX > 2); 4000 match(Set dst (ReplicateS con)); 4001 format %{ "movq $dst,[$constantaddress]\n\t" 4002 "vpbroadcastw $dst,$dst\t! replicate32S" %} 4003 ins_encode %{ 4004 int vector_len = 2; 4005 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 4006 __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4007 %} 4008 ins_pipe( pipe_slow ); 4009 %} 4010 4011 instruct Repl32S_zero_evex(vecZ dst, immI0 zero) %{ 4012 predicate(n->as_Vector()->length() == 32 && UseAVX > 2); 4013 match(Set dst (ReplicateS zero)); 4014 format %{ "vpxor $dst k0,$dst,$dst\t! replicate32S zero" %} 4015 ins_encode %{ 4016 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 4017 int vector_len = 2; 4018 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4019 %} 4020 ins_pipe( fpu_reg_reg ); 4021 %} 4022 4023 instruct Repl4I_evex(vecX dst, rRegI src) %{ 4024 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4025 match(Set dst (ReplicateI src)); 4026 format %{ "vpbroadcastd $dst,$src\t! replicate4I" %} 4027 ins_encode %{ 4028 int vector_len = 0; 4029 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 4030 %} 4031 ins_pipe( pipe_slow ); 4032 %} 4033 4034 instruct Repl4I_mem_evex(vecX dst, memory mem) %{ 4035 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4036 match(Set dst (ReplicateI (LoadI mem))); 4037 format %{ "vpbroadcastd $dst,$mem\t! replicate4I" %} 4038 ins_encode %{ 4039 int vector_len = 0; 4040 __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 4041 %} 4042 ins_pipe( pipe_slow ); 4043 %} 4044 4045 instruct Repl8I_evex(vecY dst, rRegI src) %{ 4046 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4047 match(Set dst (ReplicateI src)); 4048 format %{ "vpbroadcastd $dst,$src\t! replicate8I" %} 4049 ins_encode %{ 4050 int vector_len = 1; 4051 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 4052 %} 4053 ins_pipe( pipe_slow ); 4054 %} 4055 4056 instruct Repl8I_mem_evex(vecY dst, memory mem) %{ 4057 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4058 match(Set dst (ReplicateI (LoadI mem))); 4059 format %{ "vpbroadcastd $dst,$mem\t! replicate8I" %} 4060 ins_encode %{ 4061 int vector_len = 1; 4062 __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 4063 %} 4064 ins_pipe( pipe_slow ); 4065 %} 4066 4067 instruct Repl16I_evex(vecZ dst, rRegI src) %{ 4068 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4069 match(Set dst (ReplicateI src)); 4070 format %{ "vpbroadcastd $dst,$src\t! replicate16I" %} 4071 ins_encode %{ 4072 int vector_len = 2; 4073 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 4074 %} 4075 ins_pipe( pipe_slow ); 4076 %} 4077 4078 instruct Repl16I_mem_evex(vecZ dst, memory mem) %{ 4079 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4080 match(Set dst (ReplicateI (LoadI mem))); 4081 format %{ "vpbroadcastd $dst,$mem\t! replicate16I" %} 4082 ins_encode %{ 4083 int vector_len = 2; 4084 __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 4085 %} 4086 ins_pipe( pipe_slow ); 4087 %} 4088 4089 instruct Repl4I_imm_evex(vecX dst, immI con) %{ 4090 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4091 match(Set dst (ReplicateI con)); 4092 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 4093 "vpbroadcastd $dst,$dst\t! replicate4I" %} 4094 ins_encode %{ 4095 int vector_len = 0; 4096 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4097 __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4098 %} 4099 ins_pipe( pipe_slow ); 4100 %} 4101 4102 instruct Repl8I_imm_evex(vecY dst, immI con) %{ 4103 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4104 match(Set dst (ReplicateI con)); 4105 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 4106 "vpbroadcastd $dst,$dst\t! replicate8I" %} 4107 ins_encode %{ 4108 int vector_len = 1; 4109 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4110 __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4111 %} 4112 ins_pipe( pipe_slow ); 4113 %} 4114 4115 instruct Repl16I_imm_evex(vecZ dst, immI con) %{ 4116 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4117 match(Set dst (ReplicateI con)); 4118 format %{ "movq $dst,[$constantaddress]\t! replicate16I($con)\n\t" 4119 "vpbroadcastd $dst,$dst\t! replicate16I" %} 4120 ins_encode %{ 4121 int vector_len = 2; 4122 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4123 __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4124 %} 4125 ins_pipe( pipe_slow ); 4126 %} 4127 4128 instruct Repl16I_zero_evex(vecZ dst, immI0 zero) %{ 4129 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4130 match(Set dst (ReplicateI zero)); 4131 format %{ "vpxor $dst k0,$dst,$dst\t! replicate16I zero" %} 4132 ins_encode %{ 4133 // Use vxorpd since AVX does not have vpxor for 512-bit (AVX2 will have it). 4134 int vector_len = 2; 4135 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4136 %} 4137 ins_pipe( fpu_reg_reg ); 4138 %} 4139 4140 // Replicate long (8 byte) scalar to be vector 4141 #ifdef _LP64 4142 instruct Repl4L_evex(vecY dst, rRegL src) %{ 4143 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4144 match(Set dst (ReplicateL src)); 4145 format %{ "vpbroadcastq $dst,$src\t! replicate4L" %} 4146 ins_encode %{ 4147 int vector_len = 1; 4148 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vector_len); 4149 %} 4150 ins_pipe( pipe_slow ); 4151 %} 4152 4153 instruct Repl8L_evex(vecZ dst, rRegL src) %{ 4154 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4155 match(Set dst (ReplicateL src)); 4156 format %{ "vpbroadcastq $dst,$src\t! replicate8L" %} 4157 ins_encode %{ 4158 int vector_len = 2; 4159 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vector_len); 4160 %} 4161 ins_pipe( pipe_slow ); 4162 %} 4163 #else // _LP64 4164 instruct Repl4L_evex(vecY dst, eRegL src, regD tmp) %{ 4165 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4166 match(Set dst (ReplicateL src)); 4167 effect(TEMP dst, USE src, TEMP tmp); 4168 format %{ "movdl $dst,$src.lo\n\t" 4169 "movdl $tmp,$src.hi\n\t" 4170 "punpckldq $dst,$tmp\n\t" 4171 "vpbroadcastq $dst,$dst\t! replicate4L" %} 4172 ins_encode %{ 4173 int vector_len = 1; 4174 __ movdl($dst$$XMMRegister, $src$$Register); 4175 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4176 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4177 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4178 %} 4179 ins_pipe( pipe_slow ); 4180 %} 4181 4182 instruct Repl8L_evex(vecZ dst, eRegL src, regD tmp) %{ 4183 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4184 match(Set dst (ReplicateL src)); 4185 effect(TEMP dst, USE src, TEMP tmp); 4186 format %{ "movdl $dst,$src.lo\n\t" 4187 "movdl $tmp,$src.hi\n\t" 4188 "punpckldq $dst,$tmp\n\t" 4189 "vpbroadcastq $dst,$dst\t! replicate8L" %} 4190 ins_encode %{ 4191 int vector_len = 2; 4192 __ movdl($dst$$XMMRegister, $src$$Register); 4193 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4194 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4195 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4196 %} 4197 ins_pipe( pipe_slow ); 4198 %} 4199 #endif // _LP64 4200 4201 instruct Repl4L_imm_evex(vecY dst, immL con) %{ 4202 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4203 match(Set dst (ReplicateL con)); 4204 format %{ "movq $dst,[$constantaddress]\n\t" 4205 "vpbroadcastq $dst,$dst\t! replicate4L" %} 4206 ins_encode %{ 4207 int vector_len = 1; 4208 __ movq($dst$$XMMRegister, $constantaddress($con)); 4209 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4210 %} 4211 ins_pipe( pipe_slow ); 4212 %} 4213 4214 instruct Repl8L_imm_evex(vecZ dst, immL con) %{ 4215 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4216 match(Set dst (ReplicateL con)); 4217 format %{ "movq $dst,[$constantaddress]\n\t" 4218 "vpbroadcastq $dst,$dst\t! replicate8L" %} 4219 ins_encode %{ 4220 int vector_len = 2; 4221 __ movq($dst$$XMMRegister, $constantaddress($con)); 4222 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4223 %} 4224 ins_pipe( pipe_slow ); 4225 %} 4226 4227 instruct Repl2L_mem_evex(vecX dst, memory mem) %{ 4228 predicate(n->as_Vector()->length() == 2 && VM_Version::supports_avx512vl()); 4229 match(Set dst (ReplicateL (LoadL mem))); 4230 format %{ "vpbroadcastd $dst,$mem\t! replicate2L" %} 4231 ins_encode %{ 4232 int vector_len = 0; 4233 __ evpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4234 %} 4235 ins_pipe( pipe_slow ); 4236 %} 4237 4238 instruct Repl4L_mem_evex(vecY dst, memory mem) %{ 4239 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4240 match(Set dst (ReplicateL (LoadL mem))); 4241 format %{ "vpbroadcastd $dst,$mem\t! replicate4L" %} 4242 ins_encode %{ 4243 int vector_len = 1; 4244 __ evpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4245 %} 4246 ins_pipe( pipe_slow ); 4247 %} 4248 4249 instruct Repl8L_mem_evex(vecZ dst, memory mem) %{ 4250 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4251 match(Set dst (ReplicateL (LoadL mem))); 4252 format %{ "vpbroadcastd $dst,$mem\t! replicate8L" %} 4253 ins_encode %{ 4254 int vector_len = 2; 4255 __ evpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4256 %} 4257 ins_pipe( pipe_slow ); 4258 %} 4259 4260 instruct Repl8L_zero_evex(vecZ dst, immL0 zero) %{ 4261 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4262 match(Set dst (ReplicateL zero)); 4263 format %{ "vpxor $dst k0,$dst,$dst\t! replicate8L zero" %} 4264 ins_encode %{ 4265 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 4266 int vector_len = 2; 4267 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4268 %} 4269 ins_pipe( fpu_reg_reg ); 4270 %} 4271 4272 instruct Repl8F_evex(vecY dst, regF src) %{ 4273 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4274 match(Set dst (ReplicateF src)); 4275 format %{ "vbroadcastss $dst,$src\t! replicate8F" %} 4276 ins_encode %{ 4277 int vector_len = 1; 4278 __ evpbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4279 %} 4280 ins_pipe( pipe_slow ); 4281 %} 4282 4283 instruct Repl8F_mem_evex(vecY dst, memory mem) %{ 4284 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4285 match(Set dst (ReplicateF (LoadF mem))); 4286 format %{ "vbroadcastss $dst,$mem\t! replicate8F" %} 4287 ins_encode %{ 4288 int vector_len = 1; 4289 __ evpbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len); 4290 %} 4291 ins_pipe( pipe_slow ); 4292 %} 4293 4294 instruct Repl16F_evex(vecZ dst, regF src) %{ 4295 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4296 match(Set dst (ReplicateF src)); 4297 format %{ "vbroadcastss $dst,$src\t! replicate16F" %} 4298 ins_encode %{ 4299 int vector_len = 2; 4300 __ evpbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4301 %} 4302 ins_pipe( pipe_slow ); 4303 %} 4304 4305 instruct Repl16F_mem_evex(vecZ dst, memory mem) %{ 4306 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4307 match(Set dst (ReplicateF (LoadF mem))); 4308 format %{ "vbroadcastss $dst,$mem\t! replicate16F" %} 4309 ins_encode %{ 4310 int vector_len = 2; 4311 __ evpbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len); 4312 %} 4313 ins_pipe( pipe_slow ); 4314 %} 4315 4316 instruct Repl16F_zero_evex(vecZ dst, immF0 zero) %{ 4317 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4318 match(Set dst (ReplicateF zero)); 4319 format %{ "vxorps $dst k0,$dst,$dst\t! replicate16F zero" %} 4320 ins_encode %{ 4321 int vector_len = 2; 4322 __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4323 %} 4324 ins_pipe( fpu_reg_reg ); 4325 %} 4326 4327 instruct Repl4D_evex(vecY dst, regD src) %{ 4328 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4329 match(Set dst (ReplicateD src)); 4330 format %{ "vbroadcastsd $dst,$src\t! replicate4D" %} 4331 ins_encode %{ 4332 int vector_len = 1; 4333 __ evpbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4334 %} 4335 ins_pipe( pipe_slow ); 4336 %} 4337 4338 instruct Repl4D_mem_evex(vecY dst, memory mem) %{ 4339 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4340 match(Set dst (ReplicateD (LoadD mem))); 4341 format %{ "vbroadcastsd $dst,$mem\t! replicate4D" %} 4342 ins_encode %{ 4343 int vector_len = 1; 4344 __ evpbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len); 4345 %} 4346 ins_pipe( pipe_slow ); 4347 %} 4348 4349 instruct Repl8D_evex(vecZ dst, regD src) %{ 4350 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4351 match(Set dst (ReplicateD src)); 4352 format %{ "vbroadcastsd $dst,$src\t! replicate8D" %} 4353 ins_encode %{ 4354 int vector_len = 2; 4355 __ evpbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4356 %} 4357 ins_pipe( pipe_slow ); 4358 %} 4359 4360 instruct Repl8D_mem_evex(vecZ dst, memory mem) %{ 4361 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4362 match(Set dst (ReplicateD (LoadD mem))); 4363 format %{ "vbroadcastsd $dst,$mem\t! replicate8D" %} 4364 ins_encode %{ 4365 int vector_len = 2; 4366 __ evpbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len); 4367 %} 4368 ins_pipe( pipe_slow ); 4369 %} 4370 4371 instruct Repl8D_zero_evex(vecZ dst, immD0 zero) %{ 4372 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4373 match(Set dst (ReplicateD zero)); 4374 format %{ "vxorpd $dst k0,$dst,$dst,vect512\t! replicate8D zero" %} 4375 ins_encode %{ 4376 int vector_len = 2; 4377 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4378 %} 4379 ins_pipe( fpu_reg_reg ); 4380 %} 4381 4382 // ====================REDUCTION ARITHMETIC======================================= 4383 4384 instruct rsadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 4385 predicate(UseSSE > 2 && UseAVX == 0); 4386 match(Set dst (AddReductionVI src1 src2)); 4387 effect(TEMP tmp2, TEMP tmp); 4388 format %{ "movdqu $tmp2,$src2\n\t" 4389 "phaddd $tmp2,$tmp2\n\t" 4390 "movd $tmp,$src1\n\t" 4391 "paddd $tmp,$tmp2\n\t" 4392 "movd $dst,$tmp\t! add reduction2I" %} 4393 ins_encode %{ 4394 __ movdqu($tmp2$$XMMRegister, $src2$$XMMRegister); 4395 __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister); 4396 __ movdl($tmp$$XMMRegister, $src1$$Register); 4397 __ paddd($tmp$$XMMRegister, $tmp2$$XMMRegister); 4398 __ movdl($dst$$Register, $tmp$$XMMRegister); 4399 %} 4400 ins_pipe( pipe_slow ); 4401 %} 4402 4403 instruct rvadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 4404 predicate(UseAVX > 0 && UseAVX < 3); 4405 match(Set dst (AddReductionVI src1 src2)); 4406 effect(TEMP tmp, TEMP tmp2); 4407 format %{ "vphaddd $tmp,$src2,$src2\n\t" 4408 "movd $tmp2,$src1\n\t" 4409 "vpaddd $tmp2,$tmp2,$tmp\n\t" 4410 "movd $dst,$tmp2\t! add reduction2I" %} 4411 ins_encode %{ 4412 int vector_len = 0; 4413 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 4414 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4415 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 4416 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4417 %} 4418 ins_pipe( pipe_slow ); 4419 %} 4420 4421 instruct rvadd2I_reduction_reg_evex(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 4422 predicate(UseAVX > 2); 4423 match(Set dst (AddReductionVI src1 src2)); 4424 effect(TEMP tmp, TEMP tmp2); 4425 format %{ "pshufd $tmp2,$src2,0x1\n\t" 4426 "vpaddd $tmp,$src2,$tmp2\n\t" 4427 "movd $tmp2,$src1\n\t" 4428 "vpaddd $tmp2,$tmp,$tmp2\n\t" 4429 "movd $dst,$tmp2\t! add reduction2I" %} 4430 ins_encode %{ 4431 int vector_len = 0; 4432 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 4433 __ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4434 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4435 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4436 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4437 %} 4438 ins_pipe( pipe_slow ); 4439 %} 4440 4441 instruct rsadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 4442 predicate(UseSSE > 2 && UseAVX == 0); 4443 match(Set dst (AddReductionVI src1 src2)); 4444 effect(TEMP tmp2, TEMP tmp); 4445 format %{ "movdqu $tmp2,$src2\n\t" 4446 "phaddd $tmp2,$tmp2\n\t" 4447 "phaddd $tmp2,$tmp2\n\t" 4448 "movd $tmp,$src1\n\t" 4449 "paddd $tmp,$tmp2\n\t" 4450 "movd $dst,$tmp\t! add reduction4I" %} 4451 ins_encode %{ 4452 __ movdqu($tmp2$$XMMRegister, $src2$$XMMRegister); 4453 __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister); 4454 __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister); 4455 __ movdl($tmp$$XMMRegister, $src1$$Register); 4456 __ paddd($tmp$$XMMRegister, $tmp2$$XMMRegister); 4457 __ movdl($dst$$Register, $tmp$$XMMRegister); 4458 %} 4459 ins_pipe( pipe_slow ); 4460 %} 4461 4462 instruct rvadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 4463 predicate(UseAVX > 0 && UseAVX < 3); 4464 match(Set dst (AddReductionVI src1 src2)); 4465 effect(TEMP tmp, TEMP tmp2); 4466 format %{ "vphaddd $tmp,$src2,$src2\n\t" 4467 "vphaddd $tmp,$tmp,$tmp2\n\t" 4468 "movd $tmp2,$src1\n\t" 4469 "vpaddd $tmp2,$tmp2,$tmp\n\t" 4470 "movd $dst,$tmp2\t! add reduction4I" %} 4471 ins_encode %{ 4472 int vector_len = 0; 4473 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 4474 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4475 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4476 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 4477 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4478 %} 4479 ins_pipe( pipe_slow ); 4480 %} 4481 4482 instruct rvadd4I_reduction_reg_evex(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 4483 predicate(UseAVX > 2); 4484 match(Set dst (AddReductionVI src1 src2)); 4485 effect(TEMP tmp, TEMP tmp2); 4486 format %{ "pshufd $tmp2,$src2,0xE\n\t" 4487 "vpaddd $tmp,$src2,$tmp2\n\t" 4488 "pshufd $tmp2,$tmp,0x1\n\t" 4489 "vpaddd $tmp,$tmp,$tmp2\n\t" 4490 "movd $tmp2,$src1\n\t" 4491 "vpaddd $tmp2,$tmp,$tmp2\n\t" 4492 "movd $dst,$tmp2\t! add reduction4I" %} 4493 ins_encode %{ 4494 int vector_len = 0; 4495 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 4496 __ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4497 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 4498 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4499 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4500 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4501 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4502 %} 4503 ins_pipe( pipe_slow ); 4504 %} 4505 4506 instruct rvadd8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 4507 predicate(UseAVX > 0 && UseAVX < 3); 4508 match(Set dst (AddReductionVI src1 src2)); 4509 effect(TEMP tmp, TEMP tmp2); 4510 format %{ "vphaddd $tmp,$src2,$src2\n\t" 4511 "vphaddd $tmp,$tmp,$tmp2\n\t" 4512 "vextracti128 $tmp2,$tmp\n\t" 4513 "vpaddd $tmp,$tmp,$tmp2\n\t" 4514 "movd $tmp2,$src1\n\t" 4515 "vpaddd $tmp2,$tmp2,$tmp\n\t" 4516 "movd $dst,$tmp2\t! add reduction8I" %} 4517 ins_encode %{ 4518 int vector_len = 1; 4519 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 4520 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4521 __ vextracti128h($tmp2$$XMMRegister, $tmp$$XMMRegister); 4522 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4523 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4524 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4525 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4526 %} 4527 ins_pipe( pipe_slow ); 4528 %} 4529 4530 instruct rvadd8I_reduction_reg_evex(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 4531 predicate(UseAVX > 2); 4532 match(Set dst (AddReductionVI src1 src2)); 4533 effect(TEMP tmp, TEMP tmp2); 4534 format %{ "vextracti128 $tmp,$src2\n\t" 4535 "vpaddd $tmp,$tmp,$src2\n\t" 4536 "pshufd $tmp2,$tmp,0xE\n\t" 4537 "vpaddd $tmp,$tmp,$tmp2\n\t" 4538 "pshufd $tmp2,$tmp,0x1\n\t" 4539 "vpaddd $tmp,$tmp,$tmp2\n\t" 4540 "movd $tmp2,$src1\n\t" 4541 "vpaddd $tmp2,$tmp,$tmp2\n\t" 4542 "movd $dst,$tmp2\t! add reduction8I" %} 4543 ins_encode %{ 4544 int vector_len = 0; 4545 __ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister); 4546 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 4547 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 4548 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4549 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 4550 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4551 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4552 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4553 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4554 %} 4555 ins_pipe( pipe_slow ); 4556 %} 4557 4558 instruct rvadd16I_reduction_reg_evex(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ 4559 predicate(UseAVX > 2); 4560 match(Set dst (AddReductionVI src1 src2)); 4561 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 4562 format %{ "vextracti64x4 $tmp3,$src2\n\t" 4563 "vpaddd $tmp3,$tmp3,$src2\n\t" 4564 "vextracti128 $tmp,$tmp3\n\t" 4565 "vpaddd $tmp,$tmp,$tmp3\n\t" 4566 "pshufd $tmp2,$tmp,0xE\n\t" 4567 "vpaddd $tmp,$tmp,$tmp2\n\t" 4568 "pshufd $tmp2,$tmp,0x1\n\t" 4569 "vpaddd $tmp,$tmp,$tmp2\n\t" 4570 "movd $tmp2,$src1\n\t" 4571 "vpaddd $tmp2,$tmp,$tmp2\n\t" 4572 "movd $dst,$tmp2\t! mul reduction16I" %} 4573 ins_encode %{ 4574 __ vextracti64x4h($tmp3$$XMMRegister, $src2$$XMMRegister); 4575 __ vpaddd($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); 4576 __ vextracti128h($tmp$$XMMRegister, $tmp3$$XMMRegister); 4577 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); 4578 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 4579 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4580 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 4581 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4582 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4583 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4584 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4585 %} 4586 ins_pipe( pipe_slow ); 4587 %} 4588 4589 #ifdef _LP64 4590 instruct rvadd2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, regF tmp, regF tmp2) %{ 4591 predicate(UseAVX > 2); 4592 match(Set dst (AddReductionVL src1 src2)); 4593 effect(TEMP tmp, TEMP tmp2); 4594 format %{ "pshufd $tmp2,$src2,0xE\n\t" 4595 "vpaddq $tmp,$src2,$tmp2\n\t" 4596 "movdq $tmp2,$src1\n\t" 4597 "vpaddq $tmp2,$tmp,$tmp2\n\t" 4598 "movdq $dst,$tmp2\t! add reduction2L" %} 4599 ins_encode %{ 4600 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 4601 __ vpaddq($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, 0); 4602 __ movdq($tmp2$$XMMRegister, $src1$$Register); 4603 __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4604 __ movdq($dst$$Register, $tmp2$$XMMRegister); 4605 %} 4606 ins_pipe( pipe_slow ); 4607 %} 4608 4609 instruct rvadd4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{ 4610 predicate(UseAVX > 2); 4611 match(Set dst (AddReductionVL src1 src2)); 4612 effect(TEMP tmp, TEMP tmp2); 4613 format %{ "vextracti64x2 $tmp,$src2, 0x1\n\t" 4614 "vpaddq $tmp2,$tmp,$src2\n\t" 4615 "pshufd $tmp,$tmp2,0xE\n\t" 4616 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4617 "movdq $tmp,$src1\n\t" 4618 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4619 "movdq $dst,$tmp2\t! add reduction4L" %} 4620 ins_encode %{ 4621 __ vextracti64x2h($tmp$$XMMRegister, $src2$$XMMRegister, 0x1); 4622 __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); 4623 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 4624 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4625 __ movdq($tmp$$XMMRegister, $src1$$Register); 4626 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4627 __ movdq($dst$$Register, $tmp2$$XMMRegister); 4628 %} 4629 ins_pipe( pipe_slow ); 4630 %} 4631 4632 instruct rvadd8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{ 4633 predicate(UseAVX > 2); 4634 match(Set dst (AddReductionVL src1 src2)); 4635 effect(TEMP tmp, TEMP tmp2); 4636 format %{ "vextracti64x4 $tmp2,$src2\n\t" 4637 "vpaddq $tmp2,$tmp2,$src2\n\t" 4638 "vextracti128 $tmp,$tmp2\n\t" 4639 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4640 "pshufd $tmp,$tmp2,0xE\n\t" 4641 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4642 "movdq $tmp,$src1\n\t" 4643 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4644 "movdq $dst,$tmp2\t! add reduction8L" %} 4645 ins_encode %{ 4646 __ vextracti64x4h($tmp2$$XMMRegister, $src2$$XMMRegister); 4647 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 4648 __ vextracti128h($tmp$$XMMRegister, $tmp2$$XMMRegister); 4649 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4650 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 4651 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4652 __ movdq($tmp$$XMMRegister, $src1$$Register); 4653 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4654 __ movdq($dst$$Register, $tmp2$$XMMRegister); 4655 %} 4656 ins_pipe( pipe_slow ); 4657 %} 4658 #endif 4659 4660 instruct rsadd2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{ 4661 predicate(UseSSE >= 1 && UseAVX == 0); 4662 match(Set dst (AddReductionVF src1 src2)); 4663 effect(TEMP tmp, TEMP tmp2); 4664 format %{ "movdqu $tmp,$src1\n\t" 4665 "addss $tmp,$src2\n\t" 4666 "pshufd $tmp2,$src2,0x01\n\t" 4667 "addss $tmp,$tmp2\n\t" 4668 "movdqu $dst,$tmp\t! add reduction2F" %} 4669 ins_encode %{ 4670 __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); 4671 __ addss($tmp$$XMMRegister, $src2$$XMMRegister); 4672 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01); 4673 __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister); 4674 __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister); 4675 %} 4676 ins_pipe( pipe_slow ); 4677 %} 4678 4679 instruct rvadd2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{ 4680 predicate(UseAVX > 0); 4681 match(Set dst (AddReductionVF src1 src2)); 4682 effect(TEMP tmp2, TEMP tmp); 4683 format %{ "vaddss $tmp2,$src1,$src2\n\t" 4684 "pshufd $tmp,$src2,0x01\n\t" 4685 "vaddss $dst,$tmp2,$tmp\t! add reduction2F" %} 4686 ins_encode %{ 4687 __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 4688 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 4689 __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4690 %} 4691 ins_pipe( pipe_slow ); 4692 %} 4693 4694 instruct rsadd4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{ 4695 predicate(UseSSE >= 1 && UseAVX == 0); 4696 match(Set dst (AddReductionVF src1 src2)); 4697 effect(TEMP tmp, TEMP tmp2); 4698 format %{ "movdqu $tmp,$src1\n\t" 4699 "addss $tmp,$src2\n\t" 4700 "pshufd $tmp2,$src2,0x01\n\t" 4701 "addss $tmp,$tmp2\n\t" 4702 "pshufd $tmp2,$src2,0x02\n\t" 4703 "addss $tmp,$tmp2\n\t" 4704 "pshufd $tmp2,$src2,0x03\n\t" 4705 "addss $tmp,$tmp2\n\t" 4706 "movdqu $dst,$tmp\t! add reduction4F" %} 4707 ins_encode %{ 4708 __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); 4709 __ addss($tmp$$XMMRegister, $src2$$XMMRegister); 4710 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01); 4711 __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister); 4712 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x02); 4713 __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister); 4714 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x03); 4715 __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister); 4716 __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister); 4717 %} 4718 ins_pipe( pipe_slow ); 4719 %} 4720 4721 instruct rvadd4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{ 4722 predicate(UseAVX > 0); 4723 match(Set dst (AddReductionVF src1 src2)); 4724 effect(TEMP tmp, TEMP tmp2); 4725 format %{ "vaddss $tmp2,$src1,$src2\n\t" 4726 "pshufd $tmp,$src2,0x01\n\t" 4727 "vaddss $tmp2,$tmp2,$tmp\n\t" 4728 "pshufd $tmp,$src2,0x02\n\t" 4729 "vaddss $tmp2,$tmp2,$tmp\n\t" 4730 "pshufd $tmp,$src2,0x03\n\t" 4731 "vaddss $dst,$tmp2,$tmp\t! add reduction4F" %} 4732 ins_encode %{ 4733 __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 4734 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 4735 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4736 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 4737 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4738 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 4739 __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4740 %} 4741 ins_pipe( pipe_slow ); 4742 %} 4743 4744 instruct radd8F_reduction_reg(regF dst, regF src1, vecY src2, regF tmp, regF tmp2, regF tmp3) %{ 4745 predicate(UseAVX > 0); 4746 match(Set dst (AddReductionVF src1 src2)); 4747 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 4748 format %{ "vaddss $tmp2,$src1,$src2\n\t" 4749 "pshufd $tmp,$src2,0x01\n\t" 4750 "vaddss $tmp2,$tmp2,$tmp\n\t" 4751 "pshufd $tmp,$src2,0x02\n\t" 4752 "vaddss $tmp2,$tmp2,$tmp\n\t" 4753 "pshufd $tmp,$src2,0x03\n\t" 4754 "vaddss $tmp2,$tmp2,$tmp\n\t" 4755 "vextractf128 $tmp3,$src2\n\t" 4756 "vaddss $tmp2,$tmp2,$tmp3\n\t" 4757 "pshufd $tmp,$tmp3,0x01\n\t" 4758 "vaddss $tmp2,$tmp2,$tmp\n\t" 4759 "pshufd $tmp,$tmp3,0x02\n\t" 4760 "vaddss $tmp2,$tmp2,$tmp\n\t" 4761 "pshufd $tmp,$tmp3,0x03\n\t" 4762 "vaddss $dst,$tmp2,$tmp\t! add reduction8F" %} 4763 ins_encode %{ 4764 __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 4765 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 4766 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4767 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 4768 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4769 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 4770 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4771 __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister); 4772 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 4773 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); 4774 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4775 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); 4776 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4777 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); 4778 __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4779 %} 4780 ins_pipe( pipe_slow ); 4781 %} 4782 4783 instruct radd16F_reduction_reg(regF dst, regF src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ 4784 predicate(UseAVX > 2); 4785 match(Set dst (AddReductionVF src1 src2)); 4786 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 4787 format %{ "vaddss $tmp2,$src1,$src2\n\t" 4788 "pshufd $tmp,$src2,0x01\n\t" 4789 "vaddss $tmp2,$tmp2,$tmp\n\t" 4790 "pshufd $tmp,$src2,0x02\n\t" 4791 "vaddss $tmp2,$tmp2,$tmp\n\t" 4792 "pshufd $tmp,$src2,0x03\n\t" 4793 "vaddss $tmp2,$tmp2,$tmp\n\t" 4794 "vextractf64x2 $tmp3,$src2, 0x1\n\t" 4795 "vaddss $tmp2,$tmp2,$tmp3\n\t" 4796 "pshufd $tmp,$tmp3,0x01\n\t" 4797 "vaddss $tmp2,$tmp2,$tmp\n\t" 4798 "pshufd $tmp,$tmp3,0x02\n\t" 4799 "vaddss $tmp2,$tmp2,$tmp\n\t" 4800 "pshufd $tmp,$tmp3,0x03\n\t" 4801 "vaddss $tmp2,$tmp2,$tmp\n\t" 4802 "vextractf64x2 $tmp3,$src2, 0x2\n\t" 4803 "vaddss $tmp2,$tmp2,$tmp3\n\t" 4804 "pshufd $tmp,$tmp3,0x01\n\t" 4805 "vaddss $tmp2,$tmp2,$tmp\n\t" 4806 "pshufd $tmp,$tmp3,0x02\n\t" 4807 "vaddss $tmp2,$tmp2,$tmp\n\t" 4808 "pshufd $tmp,$tmp3,0x03\n\t" 4809 "vaddss $tmp2,$tmp2,$tmp\n\t" 4810 "vextractf64x2 $tmp3,$src2, 0x3\n\t" 4811 "vaddss $tmp2,$tmp2,$tmp3\n\t" 4812 "pshufd $tmp,$tmp3,0x01\n\t" 4813 "vaddss $tmp2,$tmp2,$tmp\n\t" 4814 "pshufd $tmp,$tmp3,0x02\n\t" 4815 "vaddss $tmp2,$tmp2,$tmp\n\t" 4816 "pshufd $tmp,$tmp3,0x03\n\t" 4817 "vaddss $dst,$tmp2,$tmp\t! add reduction16F" %} 4818 ins_encode %{ 4819 __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 4820 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 4821 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4822 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 4823 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4824 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 4825 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4826 __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1); 4827 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 4828 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); 4829 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4830 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); 4831 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4832 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); 4833 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4834 __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x2); 4835 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 4836 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); 4837 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4838 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); 4839 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4840 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); 4841 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4842 __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x3); 4843 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 4844 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); 4845 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4846 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); 4847 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4848 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); 4849 __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4850 %} 4851 ins_pipe( pipe_slow ); 4852 %} 4853 4854 instruct rsadd2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp) %{ 4855 predicate(UseSSE >= 1 && UseAVX == 0); 4856 match(Set dst (AddReductionVD src1 src2)); 4857 effect(TEMP tmp, TEMP dst); 4858 format %{ "movdqu $tmp,$src1\n\t" 4859 "addsd $tmp,$src2\n\t" 4860 "pshufd $dst,$src2,0xE\n\t" 4861 "addsd $dst,$tmp\t! add reduction2D" %} 4862 ins_encode %{ 4863 __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); 4864 __ addsd($tmp$$XMMRegister, $src2$$XMMRegister); 4865 __ pshufd($dst$$XMMRegister, $src2$$XMMRegister, 0xE); 4866 __ addsd($dst$$XMMRegister, $tmp$$XMMRegister); 4867 %} 4868 ins_pipe( pipe_slow ); 4869 %} 4870 4871 instruct rvadd2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp, regD tmp2) %{ 4872 predicate(UseAVX > 0); 4873 match(Set dst (AddReductionVD src1 src2)); 4874 effect(TEMP tmp, TEMP tmp2); 4875 format %{ "vaddsd $tmp2,$src1,$src2\n\t" 4876 "pshufd $tmp,$src2,0xE\n\t" 4877 "vaddsd $dst,$tmp2,$tmp\t! add reduction2D" %} 4878 ins_encode %{ 4879 __ vaddsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 4880 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 4881 __ vaddsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4882 %} 4883 ins_pipe( pipe_slow ); 4884 %} 4885 4886 instruct rvadd4D_reduction_reg(regD dst, regD src1, vecY src2, regD tmp, regD tmp2, regD tmp3) %{ 4887 predicate(UseAVX > 0); 4888 match(Set dst (AddReductionVD src1 src2)); 4889 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 4890 format %{ "vaddsd $tmp2,$src1,$src2\n\t" 4891 "pshufd $tmp,$src2,0xE\n\t" 4892 "vaddsd $tmp2,$tmp2,$tmp\n\t" 4893 "vextractf128 $tmp3,$src2\n\t" 4894 "vaddsd $tmp2,$tmp2,$tmp3\n\t" 4895 "pshufd $tmp,$tmp3,0xE\n\t" 4896 "vaddsd $dst,$tmp2,$tmp\t! add reduction4D" %} 4897 ins_encode %{ 4898 __ vaddsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 4899 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 4900 __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4901 __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister); 4902 __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 4903 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); 4904 __ vaddsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4905 %} 4906 ins_pipe( pipe_slow ); 4907 %} 4908 4909 instruct rvadd8D_reduction_reg(regD dst, regD src1, vecZ src2, regD tmp, regD tmp2, regD tmp3) %{ 4910 predicate(UseAVX > 2); 4911 match(Set dst (AddReductionVD src1 src2)); 4912 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 4913 format %{ "vaddsd $tmp2,$src1,$src2\n\t" 4914 "pshufd $tmp,$src2,0xE\n\t" 4915 "vaddsd $tmp2,$tmp2,$tmp\n\t" 4916 "vextractf64x2 $tmp3,$src2, 0x1\n\t" 4917 "vaddsd $tmp2,$tmp2,$tmp3\n\t" 4918 "pshufd $tmp,$tmp3,0xE\n\t" 4919 "vaddsd $tmp2,$tmp2,$tmp\n\t" 4920 "vextractf64x2 $tmp3,$src2, 0x2\n\t" 4921 "vaddsd $tmp2,$tmp2,$tmp3\n\t" 4922 "pshufd $tmp,$tmp3,0xE\n\t" 4923 "vaddsd $tmp2,$tmp2,$tmp\n\t" 4924 "vextractf64x2 $tmp3,$src2, 0x3\n\t" 4925 "vaddsd $tmp2,$tmp2,$tmp3\n\t" 4926 "pshufd $tmp,$tmp3,0xE\n\t" 4927 "vaddsd $dst,$tmp2,$tmp\t! add reduction8D" %} 4928 ins_encode %{ 4929 __ vaddsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 4930 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 4931 __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4932 __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1); 4933 __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 4934 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); 4935 __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4936 __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x2); 4937 __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 4938 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); 4939 __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4940 __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x3); 4941 __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 4942 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); 4943 __ vaddsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4944 %} 4945 ins_pipe( pipe_slow ); 4946 %} 4947 4948 instruct rsmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 4949 predicate(UseSSE > 3 && UseAVX == 0); 4950 match(Set dst (MulReductionVI src1 src2)); 4951 effect(TEMP tmp, TEMP tmp2); 4952 format %{ "pshufd $tmp2,$src2,0x1\n\t" 4953 "pmulld $tmp2,$src2\n\t" 4954 "movd $tmp,$src1\n\t" 4955 "pmulld $tmp2,$tmp\n\t" 4956 "movd $dst,$tmp2\t! mul reduction2I" %} 4957 ins_encode %{ 4958 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 4959 __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister); 4960 __ movdl($tmp$$XMMRegister, $src1$$Register); 4961 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 4962 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4963 %} 4964 ins_pipe( pipe_slow ); 4965 %} 4966 4967 instruct rvmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 4968 predicate(UseAVX > 0); 4969 match(Set dst (MulReductionVI src1 src2)); 4970 effect(TEMP tmp, TEMP tmp2); 4971 format %{ "pshufd $tmp2,$src2,0x1\n\t" 4972 "vpmulld $tmp,$src2,$tmp2\n\t" 4973 "movd $tmp2,$src1\n\t" 4974 "vpmulld $tmp2,$tmp,$tmp2\n\t" 4975 "movd $dst,$tmp2\t! mul reduction2I" %} 4976 ins_encode %{ 4977 int vector_len = 0; 4978 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 4979 __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4980 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4981 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4982 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4983 %} 4984 ins_pipe( pipe_slow ); 4985 %} 4986 4987 instruct rsmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 4988 predicate(UseSSE > 3 && UseAVX == 0); 4989 match(Set dst (MulReductionVI src1 src2)); 4990 effect(TEMP tmp, TEMP tmp2); 4991 format %{ "pshufd $tmp2,$src2,0xE\n\t" 4992 "pmulld $tmp2,$src2\n\t" 4993 "pshufd $tmp,$tmp2,0x1\n\t" 4994 "pmulld $tmp2,$tmp\n\t" 4995 "movd $tmp,$src1\n\t" 4996 "pmulld $tmp2,$tmp\n\t" 4997 "movd $dst,$tmp2\t! mul reduction4I" %} 4998 ins_encode %{ 4999 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5000 __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister); 5001 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x1); 5002 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 5003 __ movdl($tmp$$XMMRegister, $src1$$Register); 5004 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 5005 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5006 %} 5007 ins_pipe( pipe_slow ); 5008 %} 5009 5010 instruct rvmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 5011 predicate(UseAVX > 0); 5012 match(Set dst (MulReductionVI src1 src2)); 5013 effect(TEMP tmp, TEMP tmp2); 5014 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5015 "vpmulld $tmp,$src2,$tmp2\n\t" 5016 "pshufd $tmp2,$tmp,0x1\n\t" 5017 "vpmulld $tmp,$tmp,$tmp2\n\t" 5018 "movd $tmp2,$src1\n\t" 5019 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5020 "movd $dst,$tmp2\t! mul reduction4I" %} 5021 ins_encode %{ 5022 int vector_len = 0; 5023 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5024 __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5025 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5026 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5027 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5028 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5029 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5030 %} 5031 ins_pipe( pipe_slow ); 5032 %} 5033 5034 instruct rvmul8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 5035 predicate(UseAVX > 0); 5036 match(Set dst (MulReductionVI src1 src2)); 5037 effect(TEMP tmp, TEMP tmp2); 5038 format %{ "vextracti128 $tmp,$src2\n\t" 5039 "vpmulld $tmp,$tmp,$src2\n\t" 5040 "pshufd $tmp2,$tmp,0xE\n\t" 5041 "vpmulld $tmp,$tmp,$tmp2\n\t" 5042 "pshufd $tmp2,$tmp,0x1\n\t" 5043 "vpmulld $tmp,$tmp,$tmp2\n\t" 5044 "movd $tmp2,$src1\n\t" 5045 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5046 "movd $dst,$tmp2\t! mul reduction8I" %} 5047 ins_encode %{ 5048 int vector_len = 0; 5049 __ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister); 5050 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 5051 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5052 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5053 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5054 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5055 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5056 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5057 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5058 %} 5059 ins_pipe( pipe_slow ); 5060 %} 5061 5062 instruct rvmul16I_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ 5063 predicate(UseAVX > 2); 5064 match(Set dst (MulReductionVI src1 src2)); 5065 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 5066 format %{ "vextracti64x4 $tmp3,$src2\n\t" 5067 "vpmulld $tmp3,$tmp3,$src2\n\t" 5068 "vextracti128 $tmp,$tmp3\n\t" 5069 "vpmulld $tmp,$tmp,$src2\n\t" 5070 "pshufd $tmp2,$tmp,0xE\n\t" 5071 "vpmulld $tmp,$tmp,$tmp2\n\t" 5072 "pshufd $tmp2,$tmp,0x1\n\t" 5073 "vpmulld $tmp,$tmp,$tmp2\n\t" 5074 "movd $tmp2,$src1\n\t" 5075 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5076 "movd $dst,$tmp2\t! mul reduction16I" %} 5077 ins_encode %{ 5078 __ vextracti64x4h($tmp3$$XMMRegister, $src2$$XMMRegister); 5079 __ vpmulld($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); 5080 __ vextracti128h($tmp$$XMMRegister, $tmp3$$XMMRegister); 5081 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); 5082 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5083 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5084 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5085 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5086 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5087 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5088 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5089 %} 5090 ins_pipe( pipe_slow ); 5091 %} 5092 5093 #ifdef _LP64 5094 instruct rvmul2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, regF tmp, regF tmp2) %{ 5095 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 5096 match(Set dst (MulReductionVL src1 src2)); 5097 effect(TEMP tmp, TEMP tmp2); 5098 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5099 "vpmullq $tmp,$src2,$tmp2\n\t" 5100 "movdq $tmp2,$src1\n\t" 5101 "vpmullq $tmp2,$tmp,$tmp2\n\t" 5102 "movdq $dst,$tmp2\t! mul reduction2L" %} 5103 ins_encode %{ 5104 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5105 __ vpmullq($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, 0); 5106 __ movdq($tmp2$$XMMRegister, $src1$$Register); 5107 __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5108 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5109 %} 5110 ins_pipe( pipe_slow ); 5111 %} 5112 5113 instruct rvmul4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{ 5114 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 5115 match(Set dst (MulReductionVL src1 src2)); 5116 effect(TEMP tmp, TEMP tmp2); 5117 format %{ "vextracti64x2 $tmp,$src2, 0x1\n\t" 5118 "vpmullq $tmp2,$tmp,$src2\n\t" 5119 "pshufd $tmp,$tmp2,0xE\n\t" 5120 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5121 "movdq $tmp,$src1\n\t" 5122 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5123 "movdq $dst,$tmp2\t! mul reduction4L" %} 5124 ins_encode %{ 5125 __ vextracti64x2h($tmp$$XMMRegister, $src2$$XMMRegister, 0x1); 5126 __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); 5127 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5128 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5129 __ movdq($tmp$$XMMRegister, $src1$$Register); 5130 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5131 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5132 %} 5133 ins_pipe( pipe_slow ); 5134 %} 5135 5136 instruct rvmul8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{ 5137 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 5138 match(Set dst (MulReductionVL src1 src2)); 5139 effect(TEMP tmp, TEMP tmp2); 5140 format %{ "vextracti64x4 $tmp2,$src2\n\t" 5141 "vpmullq $tmp2,$tmp2,$src2\n\t" 5142 "vextracti128 $tmp,$tmp2\n\t" 5143 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5144 "pshufd $tmp,$tmp2,0xE\n\t" 5145 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5146 "movdq $tmp,$src1\n\t" 5147 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5148 "movdq $dst,$tmp2\t! mul reduction8L" %} 5149 ins_encode %{ 5150 __ vextracti64x4h($tmp2$$XMMRegister, $src2$$XMMRegister); 5151 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 5152 __ vextracti128h($tmp$$XMMRegister, $tmp2$$XMMRegister); 5153 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5154 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5155 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5156 __ movdq($tmp$$XMMRegister, $src1$$Register); 5157 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5158 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5159 %} 5160 ins_pipe( pipe_slow ); 5161 %} 5162 #endif 5163 5164 instruct rsmul2F_reduction(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{ 5165 predicate(UseSSE >= 1 && UseAVX == 0); 5166 match(Set dst (MulReductionVF src1 src2)); 5167 effect(TEMP tmp, TEMP tmp2); 5168 format %{ "movdqu $tmp,$src1\n\t" 5169 "mulss $tmp,$src2\n\t" 5170 "pshufd $tmp2,$src2,0x01\n\t" 5171 "mulss $tmp,$tmp2\n\t" 5172 "movdqu $dst,$tmp\t! mul reduction2F" %} 5173 ins_encode %{ 5174 __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); 5175 __ mulss($tmp$$XMMRegister, $src2$$XMMRegister); 5176 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01); 5177 __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister); 5178 __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister); 5179 %} 5180 ins_pipe( pipe_slow ); 5181 %} 5182 5183 instruct rvmul2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{ 5184 predicate(UseAVX > 0); 5185 match(Set dst (MulReductionVF src1 src2)); 5186 effect(TEMP tmp, TEMP tmp2); 5187 format %{ "vmulss $tmp2,$src1,$src2\n\t" 5188 "pshufd $tmp,$src2,0x01\n\t" 5189 "vmulss $dst,$tmp2,$tmp\t! mul reduction2F" %} 5190 ins_encode %{ 5191 __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 5192 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5193 __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5194 %} 5195 ins_pipe( pipe_slow ); 5196 %} 5197 5198 instruct rsmul4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{ 5199 predicate(UseSSE >= 1 && UseAVX == 0); 5200 match(Set dst (MulReductionVF src1 src2)); 5201 effect(TEMP tmp, TEMP tmp2); 5202 format %{ "movdqu $tmp,$src1\n\t" 5203 "mulss $tmp,$src2\n\t" 5204 "pshufd $tmp2,$src2,0x01\n\t" 5205 "mulss $tmp,$tmp2\n\t" 5206 "pshufd $tmp2,$src2,0x02\n\t" 5207 "mulss $tmp,$tmp2\n\t" 5208 "pshufd $tmp2,$src2,0x03\n\t" 5209 "mulss $tmp,$tmp2\n\t" 5210 "movdqu $dst,$tmp\t! mul reduction4F" %} 5211 ins_encode %{ 5212 __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); 5213 __ mulss($tmp$$XMMRegister, $src2$$XMMRegister); 5214 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01); 5215 __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister); 5216 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x02); 5217 __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister); 5218 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x03); 5219 __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister); 5220 __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister); 5221 %} 5222 ins_pipe( pipe_slow ); 5223 %} 5224 5225 instruct rvmul4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{ 5226 predicate(UseAVX > 0); 5227 match(Set dst (MulReductionVF src1 src2)); 5228 effect(TEMP tmp, TEMP tmp2); 5229 format %{ "vmulss $tmp2,$src1,$src2\n\t" 5230 "pshufd $tmp,$src2,0x01\n\t" 5231 "vmulss $tmp2,$tmp2,$tmp\n\t" 5232 "pshufd $tmp,$src2,0x02\n\t" 5233 "vmulss $tmp2,$tmp2,$tmp\n\t" 5234 "pshufd $tmp,$src2,0x03\n\t" 5235 "vmulss $dst,$tmp2,$tmp\t! mul reduction4F" %} 5236 ins_encode %{ 5237 __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 5238 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5239 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5240 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5241 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5242 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5243 __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5244 %} 5245 ins_pipe( pipe_slow ); 5246 %} 5247 5248 instruct rvmul8F_reduction_reg(regF dst, regF src1, vecY src2, regF tmp, regF tmp2, regF tmp3) %{ 5249 predicate(UseAVX > 0); 5250 match(Set dst (MulReductionVF src1 src2)); 5251 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 5252 format %{ "vmulss $tmp2,$src1,$src2\n\t" 5253 "pshufd $tmp,$src2,0x01\n\t" 5254 "vmulss $tmp2,$tmp2,$tmp\n\t" 5255 "pshufd $tmp,$src2,0x02\n\t" 5256 "vmulss $tmp2,$tmp2,$tmp\n\t" 5257 "pshufd $tmp,$src2,0x03\n\t" 5258 "vmulss $tmp2,$tmp2,$tmp\n\t" 5259 "vextractf128 $tmp3,$src2\n\t" 5260 "vmulss $tmp2,$tmp2,$tmp3\n\t" 5261 "pshufd $tmp,$tmp3,0x01\n\t" 5262 "vmulss $tmp2,$tmp2,$tmp\n\t" 5263 "pshufd $tmp,$tmp3,0x02\n\t" 5264 "vmulss $tmp2,$tmp2,$tmp\n\t" 5265 "pshufd $tmp,$tmp3,0x03\n\t" 5266 "vmulss $dst,$tmp2,$tmp\t! mul reduction8F" %} 5267 ins_encode %{ 5268 __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 5269 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5270 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5271 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5272 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5273 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5274 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5275 __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister); 5276 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 5277 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); 5278 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5279 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); 5280 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5281 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); 5282 __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5283 %} 5284 ins_pipe( pipe_slow ); 5285 %} 5286 5287 instruct rvmul16F_reduction_reg(regF dst, regF src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ 5288 predicate(UseAVX > 2); 5289 match(Set dst (MulReductionVF src1 src2)); 5290 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 5291 format %{ "vmulss $tmp2,$src1,$src2\n\t" 5292 "pshufd $tmp,$src2,0x01\n\t" 5293 "vmulss $tmp2,$tmp2,$tmp\n\t" 5294 "pshufd $tmp,$src2,0x02\n\t" 5295 "vmulss $tmp2,$tmp2,$tmp\n\t" 5296 "pshufd $tmp,$src2,0x03\n\t" 5297 "vmulss $tmp2,$tmp2,$tmp\n\t" 5298 "vextractf32x4 $tmp3,$src2, 0x1\n\t" 5299 "vmulss $tmp2,$tmp2,$tmp3\n\t" 5300 "pshufd $tmp,$tmp3,0x01\n\t" 5301 "vmulss $tmp2,$tmp2,$tmp\n\t" 5302 "pshufd $tmp,$tmp3,0x02\n\t" 5303 "vmulss $tmp2,$tmp2,$tmp\n\t" 5304 "pshufd $tmp,$tmp3,0x03\n\t" 5305 "vmulss $tmp2,$tmp2,$tmp\n\t" 5306 "vextractf32x4 $tmp3,$src2, 0x2\n\t" 5307 "vmulss $tmp2,$tmp2,$tmp3\n\t" 5308 "pshufd $tmp,$tmp3,0x01\n\t" 5309 "vmulss $tmp2,$tmp2,$tmp\n\t" 5310 "pshufd $tmp,$tmp3,0x02\n\t" 5311 "vmulss $tmp2,$tmp2,$tmp\n\t" 5312 "pshufd $tmp,$tmp3,0x03\n\t" 5313 "vmulss $tmp2,$tmp2,$tmp\n\t" 5314 "vextractf32x4 $tmp3,$src2, 0x3\n\t" 5315 "vmulss $tmp2,$tmp2,$tmp3\n\t" 5316 "pshufd $tmp,$tmp3,0x01\n\t" 5317 "vmulss $tmp2,$tmp2,$tmp\n\t" 5318 "pshufd $tmp,$tmp3,0x02\n\t" 5319 "vmulss $tmp2,$tmp2,$tmp\n\t" 5320 "pshufd $tmp,$tmp3,0x03\n\t" 5321 "vmulss $dst,$tmp2,$tmp\t! mul reduction16F" %} 5322 ins_encode %{ 5323 __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 5324 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5325 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5326 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5327 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5328 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5329 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5330 __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1); 5331 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 5332 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); 5333 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5334 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); 5335 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5336 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); 5337 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5338 __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x2); 5339 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 5340 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); 5341 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5342 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); 5343 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5344 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); 5345 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5346 __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x3); 5347 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 5348 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); 5349 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5350 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); 5351 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5352 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); 5353 __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5354 %} 5355 ins_pipe( pipe_slow ); 5356 %} 5357 5358 instruct rsmul2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp) %{ 5359 predicate(UseSSE >= 1 && UseAVX == 0); 5360 match(Set dst (MulReductionVD src1 src2)); 5361 effect(TEMP tmp, TEMP dst); 5362 format %{ "movdqu $tmp,$src1\n\t" 5363 "mulsd $tmp,$src2\n\t" 5364 "pshufd $dst,$src2,0xE\n\t" 5365 "mulsd $dst,$tmp\t! mul reduction2D" %} 5366 ins_encode %{ 5367 __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); 5368 __ mulsd($tmp$$XMMRegister, $src2$$XMMRegister); 5369 __ pshufd($dst$$XMMRegister, $src2$$XMMRegister, 0xE); 5370 __ mulsd($dst$$XMMRegister, $tmp$$XMMRegister); 5371 %} 5372 ins_pipe( pipe_slow ); 5373 %} 5374 5375 instruct rvmul2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp, regD tmp2) %{ 5376 predicate(UseAVX > 0); 5377 match(Set dst (MulReductionVD src1 src2)); 5378 effect(TEMP tmp, TEMP tmp2); 5379 format %{ "vmulsd $tmp2,$src1,$src2\n\t" 5380 "pshufd $tmp,$src2,0xE\n\t" 5381 "vmulsd $dst,$tmp2,$tmp\t! mul reduction2D" %} 5382 ins_encode %{ 5383 __ vmulsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 5384 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5385 __ vmulsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5386 %} 5387 ins_pipe( pipe_slow ); 5388 %} 5389 5390 instruct rvmul4D_reduction_reg(regD dst, regD src1, vecY src2, regD tmp, regD tmp2, regD tmp3) %{ 5391 predicate(UseAVX > 0); 5392 match(Set dst (MulReductionVD src1 src2)); 5393 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 5394 format %{ "vmulsd $tmp2,$src1,$src2\n\t" 5395 "pshufd $tmp,$src2,0xE\n\t" 5396 "vmulsd $tmp2,$tmp2,$tmp\n\t" 5397 "vextractf128 $tmp3,$src2\n\t" 5398 "vmulsd $tmp2,$tmp2,$tmp3\n\t" 5399 "pshufd $tmp,$tmp3,0xE\n\t" 5400 "vmulsd $dst,$tmp2,$tmp\t! mul reduction4D" %} 5401 ins_encode %{ 5402 __ vmulsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 5403 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5404 __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5405 __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister); 5406 __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 5407 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); 5408 __ vmulsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5409 %} 5410 ins_pipe( pipe_slow ); 5411 %} 5412 5413 instruct rvmul8D_reduction_reg(regD dst, regD src1, vecZ src2, regD tmp, regD tmp2, regD tmp3) %{ 5414 predicate(UseAVX > 2); 5415 match(Set dst (MulReductionVD src1 src2)); 5416 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 5417 format %{ "vmulsd $tmp2,$src1,$src2\n\t" 5418 "pshufd $tmp,$src2,0xE\n\t" 5419 "vmulsd $tmp2,$tmp2,$tmp\n\t" 5420 "vextractf64x2 $tmp3,$src2, 0x1\n\t" 5421 "vmulsd $tmp2,$tmp2,$tmp3\n\t" 5422 "pshufd $tmp,$src2,0xE\n\t" 5423 "vmulsd $tmp2,$tmp2,$tmp\n\t" 5424 "vextractf64x2 $tmp3,$src2, 0x2\n\t" 5425 "vmulsd $tmp2,$tmp2,$tmp3\n\t" 5426 "pshufd $tmp,$tmp3,0xE\n\t" 5427 "vmulsd $tmp2,$tmp2,$tmp\n\t" 5428 "vextractf64x2 $tmp3,$src2, 0x3\n\t" 5429 "vmulsd $tmp2,$tmp2,$tmp3\n\t" 5430 "pshufd $tmp,$tmp3,0xE\n\t" 5431 "vmulsd $dst,$tmp2,$tmp\t! mul reduction8D" %} 5432 ins_encode %{ 5433 __ vmulsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 5434 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5435 __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5436 __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1); 5437 __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 5438 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); 5439 __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5440 __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x2); 5441 __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 5442 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); 5443 __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5444 __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x3); 5445 __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 5446 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); 5447 __ vmulsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5448 %} 5449 ins_pipe( pipe_slow ); 5450 %} 5451 5452 // ====================VECTOR ARITHMETIC======================================= 5453 5454 // --------------------------------- ADD -------------------------------------- 5455 5456 // Bytes vector add 5457 instruct vadd4B(vecS dst, vecS src) %{ 5458 predicate(n->as_Vector()->length() == 4); 5459 match(Set dst (AddVB dst src)); 5460 format %{ "paddb $dst,$src\t! add packed4B" %} 5461 ins_encode %{ 5462 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5463 %} 5464 ins_pipe( pipe_slow ); 5465 %} 5466 5467 instruct vadd4B_reg(vecS dst, vecS src1, vecS src2) %{ 5468 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5469 match(Set dst (AddVB src1 src2)); 5470 format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %} 5471 ins_encode %{ 5472 int vector_len = 0; 5473 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5474 %} 5475 ins_pipe( pipe_slow ); 5476 %} 5477 5478 instruct vadd4B_mem(vecS dst, vecS src, memory mem) %{ 5479 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5480 match(Set dst (AddVB src (LoadVector mem))); 5481 format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %} 5482 ins_encode %{ 5483 int vector_len = 0; 5484 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5485 %} 5486 ins_pipe( pipe_slow ); 5487 %} 5488 5489 instruct vadd8B(vecD dst, vecD src) %{ 5490 predicate(n->as_Vector()->length() == 8); 5491 match(Set dst (AddVB dst src)); 5492 format %{ "paddb $dst,$src\t! add packed8B" %} 5493 ins_encode %{ 5494 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5495 %} 5496 ins_pipe( pipe_slow ); 5497 %} 5498 5499 instruct vadd8B_reg(vecD dst, vecD src1, vecD src2) %{ 5500 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 5501 match(Set dst (AddVB src1 src2)); 5502 format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %} 5503 ins_encode %{ 5504 int vector_len = 0; 5505 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5506 %} 5507 ins_pipe( pipe_slow ); 5508 %} 5509 5510 instruct vadd8B_mem(vecD dst, vecD src, memory mem) %{ 5511 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 5512 match(Set dst (AddVB src (LoadVector mem))); 5513 format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %} 5514 ins_encode %{ 5515 int vector_len = 0; 5516 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5517 %} 5518 ins_pipe( pipe_slow ); 5519 %} 5520 5521 instruct vadd16B(vecX dst, vecX src) %{ 5522 predicate(n->as_Vector()->length() == 16); 5523 match(Set dst (AddVB dst src)); 5524 format %{ "paddb $dst,$src\t! add packed16B" %} 5525 ins_encode %{ 5526 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5527 %} 5528 ins_pipe( pipe_slow ); 5529 %} 5530 5531 instruct vadd16B_reg(vecX dst, vecX src1, vecX src2) %{ 5532 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 5533 match(Set dst (AddVB src1 src2)); 5534 format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %} 5535 ins_encode %{ 5536 int vector_len = 0; 5537 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5538 %} 5539 ins_pipe( pipe_slow ); 5540 %} 5541 5542 instruct vadd16B_mem(vecX dst, vecX src, memory mem) %{ 5543 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 5544 match(Set dst (AddVB src (LoadVector mem))); 5545 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} 5546 ins_encode %{ 5547 int vector_len = 0; 5548 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5549 %} 5550 ins_pipe( pipe_slow ); 5551 %} 5552 5553 instruct vadd32B_reg(vecY dst, vecY src1, vecY src2) %{ 5554 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 5555 match(Set dst (AddVB src1 src2)); 5556 format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %} 5557 ins_encode %{ 5558 int vector_len = 1; 5559 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5560 %} 5561 ins_pipe( pipe_slow ); 5562 %} 5563 5564 instruct vadd32B_mem(vecY dst, vecY src, memory mem) %{ 5565 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 5566 match(Set dst (AddVB src (LoadVector mem))); 5567 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} 5568 ins_encode %{ 5569 int vector_len = 1; 5570 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5571 %} 5572 ins_pipe( pipe_slow ); 5573 %} 5574 5575 instruct vadd64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 5576 predicate(UseAVX > 2 && n->as_Vector()->length() == 64); 5577 match(Set dst (AddVB src1 src2)); 5578 format %{ "vpaddb $dst,$src1,$src2\t! add packed64B" %} 5579 ins_encode %{ 5580 int vector_len = 2; 5581 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5582 %} 5583 ins_pipe( pipe_slow ); 5584 %} 5585 5586 instruct vadd64B_mem(vecZ dst, vecZ src, memory mem) %{ 5587 predicate(UseAVX > 2 && n->as_Vector()->length() == 64); 5588 match(Set dst (AddVB src (LoadVector mem))); 5589 format %{ "vpaddb $dst,$src,$mem\t! add packed64B" %} 5590 ins_encode %{ 5591 int vector_len = 2; 5592 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5593 %} 5594 ins_pipe( pipe_slow ); 5595 %} 5596 5597 // Shorts/Chars vector add 5598 instruct vadd2S(vecS dst, vecS src) %{ 5599 predicate(n->as_Vector()->length() == 2); 5600 match(Set dst (AddVS dst src)); 5601 format %{ "paddw $dst,$src\t! add packed2S" %} 5602 ins_encode %{ 5603 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 5604 %} 5605 ins_pipe( pipe_slow ); 5606 %} 5607 5608 instruct vadd2S_reg(vecS dst, vecS src1, vecS src2) %{ 5609 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 5610 match(Set dst (AddVS src1 src2)); 5611 format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %} 5612 ins_encode %{ 5613 int vector_len = 0; 5614 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5615 %} 5616 ins_pipe( pipe_slow ); 5617 %} 5618 5619 instruct vadd2S_mem(vecS dst, vecS src, memory mem) %{ 5620 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 5621 match(Set dst (AddVS src (LoadVector mem))); 5622 format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %} 5623 ins_encode %{ 5624 int vector_len = 0; 5625 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5626 %} 5627 ins_pipe( pipe_slow ); 5628 %} 5629 5630 instruct vadd4S(vecD dst, vecD src) %{ 5631 predicate(n->as_Vector()->length() == 4); 5632 match(Set dst (AddVS dst src)); 5633 format %{ "paddw $dst,$src\t! add packed4S" %} 5634 ins_encode %{ 5635 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 5636 %} 5637 ins_pipe( pipe_slow ); 5638 %} 5639 5640 instruct vadd4S_reg(vecD dst, vecD src1, vecD src2) %{ 5641 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5642 match(Set dst (AddVS src1 src2)); 5643 format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %} 5644 ins_encode %{ 5645 int vector_len = 0; 5646 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5647 %} 5648 ins_pipe( pipe_slow ); 5649 %} 5650 5651 instruct vadd4S_mem(vecD dst, vecD src, memory mem) %{ 5652 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5653 match(Set dst (AddVS src (LoadVector mem))); 5654 format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %} 5655 ins_encode %{ 5656 int vector_len = 0; 5657 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5658 %} 5659 ins_pipe( pipe_slow ); 5660 %} 5661 5662 instruct vadd8S(vecX dst, vecX src) %{ 5663 predicate(n->as_Vector()->length() == 8); 5664 match(Set dst (AddVS dst src)); 5665 format %{ "paddw $dst,$src\t! add packed8S" %} 5666 ins_encode %{ 5667 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 5668 %} 5669 ins_pipe( pipe_slow ); 5670 %} 5671 5672 instruct vadd8S_reg(vecX dst, vecX src1, vecX src2) %{ 5673 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 5674 match(Set dst (AddVS src1 src2)); 5675 format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %} 5676 ins_encode %{ 5677 int vector_len = 0; 5678 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5679 %} 5680 ins_pipe( pipe_slow ); 5681 %} 5682 5683 instruct vadd8S_mem(vecX dst, vecX src, memory mem) %{ 5684 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 5685 match(Set dst (AddVS src (LoadVector mem))); 5686 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} 5687 ins_encode %{ 5688 int vector_len = 0; 5689 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5690 %} 5691 ins_pipe( pipe_slow ); 5692 %} 5693 5694 instruct vadd16S_reg(vecY dst, vecY src1, vecY src2) %{ 5695 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 5696 match(Set dst (AddVS src1 src2)); 5697 format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %} 5698 ins_encode %{ 5699 int vector_len = 1; 5700 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5701 %} 5702 ins_pipe( pipe_slow ); 5703 %} 5704 5705 instruct vadd16S_mem(vecY dst, vecY src, memory mem) %{ 5706 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 5707 match(Set dst (AddVS src (LoadVector mem))); 5708 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} 5709 ins_encode %{ 5710 int vector_len = 1; 5711 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5712 %} 5713 ins_pipe( pipe_slow ); 5714 %} 5715 5716 instruct vadd32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 5717 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); 5718 match(Set dst (AddVS src1 src2)); 5719 format %{ "vpaddw $dst,$src1,$src2\t! add packed32S" %} 5720 ins_encode %{ 5721 int vector_len = 2; 5722 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5723 %} 5724 ins_pipe( pipe_slow ); 5725 %} 5726 5727 instruct vadd32S_mem(vecZ dst, vecZ src, memory mem) %{ 5728 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); 5729 match(Set dst (AddVS src (LoadVector mem))); 5730 format %{ "vpaddw $dst,$src,$mem\t! add packed32S" %} 5731 ins_encode %{ 5732 int vector_len = 2; 5733 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5734 %} 5735 ins_pipe( pipe_slow ); 5736 %} 5737 5738 // Integers vector add 5739 instruct vadd2I(vecD dst, vecD src) %{ 5740 predicate(n->as_Vector()->length() == 2); 5741 match(Set dst (AddVI dst src)); 5742 format %{ "paddd $dst,$src\t! add packed2I" %} 5743 ins_encode %{ 5744 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 5745 %} 5746 ins_pipe( pipe_slow ); 5747 %} 5748 5749 instruct vadd2I_reg(vecD dst, vecD src1, vecD src2) %{ 5750 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 5751 match(Set dst (AddVI src1 src2)); 5752 format %{ "vpaddd $dst,$src1,$src2\t! add packed2I" %} 5753 ins_encode %{ 5754 int vector_len = 0; 5755 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5756 %} 5757 ins_pipe( pipe_slow ); 5758 %} 5759 5760 instruct vadd2I_mem(vecD dst, vecD src, memory mem) %{ 5761 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 5762 match(Set dst (AddVI src (LoadVector mem))); 5763 format %{ "vpaddd $dst,$src,$mem\t! add packed2I" %} 5764 ins_encode %{ 5765 int vector_len = 0; 5766 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5767 %} 5768 ins_pipe( pipe_slow ); 5769 %} 5770 5771 instruct vadd4I(vecX dst, vecX src) %{ 5772 predicate(n->as_Vector()->length() == 4); 5773 match(Set dst (AddVI dst src)); 5774 format %{ "paddd $dst,$src\t! add packed4I" %} 5775 ins_encode %{ 5776 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 5777 %} 5778 ins_pipe( pipe_slow ); 5779 %} 5780 5781 instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{ 5782 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5783 match(Set dst (AddVI src1 src2)); 5784 format %{ "vpaddd $dst,$src1,$src2\t! add packed4I" %} 5785 ins_encode %{ 5786 int vector_len = 0; 5787 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5788 %} 5789 ins_pipe( pipe_slow ); 5790 %} 5791 5792 instruct vadd4I_mem(vecX dst, vecX src, memory mem) %{ 5793 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5794 match(Set dst (AddVI src (LoadVector mem))); 5795 format %{ "vpaddd $dst,$src,$mem\t! add packed4I" %} 5796 ins_encode %{ 5797 int vector_len = 0; 5798 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5799 %} 5800 ins_pipe( pipe_slow ); 5801 %} 5802 5803 instruct vadd8I_reg(vecY dst, vecY src1, vecY src2) %{ 5804 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 5805 match(Set dst (AddVI src1 src2)); 5806 format %{ "vpaddd $dst,$src1,$src2\t! add packed8I" %} 5807 ins_encode %{ 5808 int vector_len = 1; 5809 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5810 %} 5811 ins_pipe( pipe_slow ); 5812 %} 5813 5814 instruct vadd8I_mem(vecY dst, vecY src, memory mem) %{ 5815 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 5816 match(Set dst (AddVI src (LoadVector mem))); 5817 format %{ "vpaddd $dst,$src,$mem\t! add packed8I" %} 5818 ins_encode %{ 5819 int vector_len = 1; 5820 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5821 %} 5822 ins_pipe( pipe_slow ); 5823 %} 5824 5825 instruct vadd16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 5826 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 5827 match(Set dst (AddVI src1 src2)); 5828 format %{ "vpaddd $dst,$src1,$src2\t! add packed16I" %} 5829 ins_encode %{ 5830 int vector_len = 2; 5831 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5832 %} 5833 ins_pipe( pipe_slow ); 5834 %} 5835 5836 instruct vadd16I_mem(vecZ dst, vecZ src, memory mem) %{ 5837 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 5838 match(Set dst (AddVI src (LoadVector mem))); 5839 format %{ "vpaddd $dst,$src,$mem\t! add packed16I" %} 5840 ins_encode %{ 5841 int vector_len = 2; 5842 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5843 %} 5844 ins_pipe( pipe_slow ); 5845 %} 5846 5847 // Longs vector add 5848 instruct vadd2L(vecX dst, vecX src) %{ 5849 predicate(n->as_Vector()->length() == 2); 5850 match(Set dst (AddVL dst src)); 5851 format %{ "paddq $dst,$src\t! add packed2L" %} 5852 ins_encode %{ 5853 __ paddq($dst$$XMMRegister, $src$$XMMRegister); 5854 %} 5855 ins_pipe( pipe_slow ); 5856 %} 5857 5858 instruct vadd2L_reg(vecX dst, vecX src1, vecX src2) %{ 5859 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 5860 match(Set dst (AddVL src1 src2)); 5861 format %{ "vpaddq $dst,$src1,$src2\t! add packed2L" %} 5862 ins_encode %{ 5863 int vector_len = 0; 5864 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5865 %} 5866 ins_pipe( pipe_slow ); 5867 %} 5868 5869 instruct vadd2L_mem(vecX dst, vecX src, memory mem) %{ 5870 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 5871 match(Set dst (AddVL src (LoadVector mem))); 5872 format %{ "vpaddq $dst,$src,$mem\t! add packed2L" %} 5873 ins_encode %{ 5874 int vector_len = 0; 5875 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5876 %} 5877 ins_pipe( pipe_slow ); 5878 %} 5879 5880 instruct vadd4L_reg(vecY dst, vecY src1, vecY src2) %{ 5881 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 5882 match(Set dst (AddVL src1 src2)); 5883 format %{ "vpaddq $dst,$src1,$src2\t! add packed4L" %} 5884 ins_encode %{ 5885 int vector_len = 1; 5886 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5887 %} 5888 ins_pipe( pipe_slow ); 5889 %} 5890 5891 instruct vadd4L_mem(vecY dst, vecY src, memory mem) %{ 5892 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 5893 match(Set dst (AddVL src (LoadVector mem))); 5894 format %{ "vpaddq $dst,$src,$mem\t! add packed4L" %} 5895 ins_encode %{ 5896 int vector_len = 1; 5897 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5898 %} 5899 ins_pipe( pipe_slow ); 5900 %} 5901 5902 instruct vadd8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 5903 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 5904 match(Set dst (AddVL src1 src2)); 5905 format %{ "vpaddq $dst,$src1,$src2\t! add packed8L" %} 5906 ins_encode %{ 5907 int vector_len = 2; 5908 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5909 %} 5910 ins_pipe( pipe_slow ); 5911 %} 5912 5913 instruct vadd8L_mem(vecZ dst, vecZ src, memory mem) %{ 5914 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 5915 match(Set dst (AddVL src (LoadVector mem))); 5916 format %{ "vpaddq $dst,$src,$mem\t! add packed8L" %} 5917 ins_encode %{ 5918 int vector_len = 2; 5919 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5920 %} 5921 ins_pipe( pipe_slow ); 5922 %} 5923 5924 // Floats vector add 5925 instruct vadd2F(vecD dst, vecD src) %{ 5926 predicate(n->as_Vector()->length() == 2); 5927 match(Set dst (AddVF dst src)); 5928 format %{ "addps $dst,$src\t! add packed2F" %} 5929 ins_encode %{ 5930 __ addps($dst$$XMMRegister, $src$$XMMRegister); 5931 %} 5932 ins_pipe( pipe_slow ); 5933 %} 5934 5935 instruct vadd2F_reg(vecD dst, vecD src1, vecD src2) %{ 5936 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 5937 match(Set dst (AddVF src1 src2)); 5938 format %{ "vaddps $dst,$src1,$src2\t! add packed2F" %} 5939 ins_encode %{ 5940 int vector_len = 0; 5941 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5942 %} 5943 ins_pipe( pipe_slow ); 5944 %} 5945 5946 instruct vadd2F_mem(vecD dst, vecD src, memory mem) %{ 5947 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 5948 match(Set dst (AddVF src (LoadVector mem))); 5949 format %{ "vaddps $dst,$src,$mem\t! add packed2F" %} 5950 ins_encode %{ 5951 int vector_len = 0; 5952 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5953 %} 5954 ins_pipe( pipe_slow ); 5955 %} 5956 5957 instruct vadd4F(vecX dst, vecX src) %{ 5958 predicate(n->as_Vector()->length() == 4); 5959 match(Set dst (AddVF dst src)); 5960 format %{ "addps $dst,$src\t! add packed4F" %} 5961 ins_encode %{ 5962 __ addps($dst$$XMMRegister, $src$$XMMRegister); 5963 %} 5964 ins_pipe( pipe_slow ); 5965 %} 5966 5967 instruct vadd4F_reg(vecX dst, vecX src1, vecX src2) %{ 5968 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5969 match(Set dst (AddVF src1 src2)); 5970 format %{ "vaddps $dst,$src1,$src2\t! add packed4F" %} 5971 ins_encode %{ 5972 int vector_len = 0; 5973 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5974 %} 5975 ins_pipe( pipe_slow ); 5976 %} 5977 5978 instruct vadd4F_mem(vecX dst, vecX src, memory mem) %{ 5979 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5980 match(Set dst (AddVF src (LoadVector mem))); 5981 format %{ "vaddps $dst,$src,$mem\t! add packed4F" %} 5982 ins_encode %{ 5983 int vector_len = 0; 5984 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5985 %} 5986 ins_pipe( pipe_slow ); 5987 %} 5988 5989 instruct vadd8F_reg(vecY dst, vecY src1, vecY src2) %{ 5990 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 5991 match(Set dst (AddVF src1 src2)); 5992 format %{ "vaddps $dst,$src1,$src2\t! add packed8F" %} 5993 ins_encode %{ 5994 int vector_len = 1; 5995 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5996 %} 5997 ins_pipe( pipe_slow ); 5998 %} 5999 6000 instruct vadd8F_mem(vecY dst, vecY src, memory mem) %{ 6001 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6002 match(Set dst (AddVF src (LoadVector mem))); 6003 format %{ "vaddps $dst,$src,$mem\t! add packed8F" %} 6004 ins_encode %{ 6005 int vector_len = 1; 6006 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6007 %} 6008 ins_pipe( pipe_slow ); 6009 %} 6010 6011 instruct vadd16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6012 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6013 match(Set dst (AddVF src1 src2)); 6014 format %{ "vaddps $dst,$src1,$src2\t! add packed16F" %} 6015 ins_encode %{ 6016 int vector_len = 2; 6017 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6018 %} 6019 ins_pipe( pipe_slow ); 6020 %} 6021 6022 instruct vadd16F_mem(vecZ dst, vecZ src, memory mem) %{ 6023 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6024 match(Set dst (AddVF src (LoadVector mem))); 6025 format %{ "vaddps $dst,$src,$mem\t! add packed16F" %} 6026 ins_encode %{ 6027 int vector_len = 2; 6028 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6029 %} 6030 ins_pipe( pipe_slow ); 6031 %} 6032 6033 // Doubles vector add 6034 instruct vadd2D(vecX dst, vecX src) %{ 6035 predicate(n->as_Vector()->length() == 2); 6036 match(Set dst (AddVD dst src)); 6037 format %{ "addpd $dst,$src\t! add packed2D" %} 6038 ins_encode %{ 6039 __ addpd($dst$$XMMRegister, $src$$XMMRegister); 6040 %} 6041 ins_pipe( pipe_slow ); 6042 %} 6043 6044 instruct vadd2D_reg(vecX dst, vecX src1, vecX src2) %{ 6045 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6046 match(Set dst (AddVD src1 src2)); 6047 format %{ "vaddpd $dst,$src1,$src2\t! add packed2D" %} 6048 ins_encode %{ 6049 int vector_len = 0; 6050 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6051 %} 6052 ins_pipe( pipe_slow ); 6053 %} 6054 6055 instruct vadd2D_mem(vecX dst, vecX src, memory mem) %{ 6056 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6057 match(Set dst (AddVD src (LoadVector mem))); 6058 format %{ "vaddpd $dst,$src,$mem\t! add packed2D" %} 6059 ins_encode %{ 6060 int vector_len = 0; 6061 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6062 %} 6063 ins_pipe( pipe_slow ); 6064 %} 6065 6066 instruct vadd4D_reg(vecY dst, vecY src1, vecY src2) %{ 6067 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6068 match(Set dst (AddVD src1 src2)); 6069 format %{ "vaddpd $dst,$src1,$src2\t! add packed4D" %} 6070 ins_encode %{ 6071 int vector_len = 1; 6072 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6073 %} 6074 ins_pipe( pipe_slow ); 6075 %} 6076 6077 instruct vadd4D_mem(vecY dst, vecY src, memory mem) %{ 6078 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6079 match(Set dst (AddVD src (LoadVector mem))); 6080 format %{ "vaddpd $dst,$src,$mem\t! add packed4D" %} 6081 ins_encode %{ 6082 int vector_len = 1; 6083 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6084 %} 6085 ins_pipe( pipe_slow ); 6086 %} 6087 6088 instruct vadd8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6089 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6090 match(Set dst (AddVD src1 src2)); 6091 format %{ "vaddpd $dst,$src1,$src2\t! add packed8D" %} 6092 ins_encode %{ 6093 int vector_len = 2; 6094 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6095 %} 6096 ins_pipe( pipe_slow ); 6097 %} 6098 6099 instruct vadd8D_mem(vecZ dst, vecZ src, memory mem) %{ 6100 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6101 match(Set dst (AddVD src (LoadVector mem))); 6102 format %{ "vaddpd $dst,$src,$mem\t! add packed8D" %} 6103 ins_encode %{ 6104 int vector_len = 2; 6105 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6106 %} 6107 ins_pipe( pipe_slow ); 6108 %} 6109 6110 // --------------------------------- SUB -------------------------------------- 6111 6112 // Bytes vector sub 6113 instruct vsub4B(vecS dst, vecS src) %{ 6114 predicate(n->as_Vector()->length() == 4); 6115 match(Set dst (SubVB dst src)); 6116 format %{ "psubb $dst,$src\t! sub packed4B" %} 6117 ins_encode %{ 6118 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 6119 %} 6120 ins_pipe( pipe_slow ); 6121 %} 6122 6123 instruct vsub4B_reg(vecS dst, vecS src1, vecS src2) %{ 6124 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6125 match(Set dst (SubVB src1 src2)); 6126 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} 6127 ins_encode %{ 6128 int vector_len = 0; 6129 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6130 %} 6131 ins_pipe( pipe_slow ); 6132 %} 6133 6134 instruct vsub4B_mem(vecS dst, vecS src, memory mem) %{ 6135 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6136 match(Set dst (SubVB src (LoadVector mem))); 6137 format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %} 6138 ins_encode %{ 6139 int vector_len = 0; 6140 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6141 %} 6142 ins_pipe( pipe_slow ); 6143 %} 6144 6145 instruct vsub8B(vecD dst, vecD src) %{ 6146 predicate(n->as_Vector()->length() == 8); 6147 match(Set dst (SubVB dst src)); 6148 format %{ "psubb $dst,$src\t! sub packed8B" %} 6149 ins_encode %{ 6150 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 6151 %} 6152 ins_pipe( pipe_slow ); 6153 %} 6154 6155 instruct vsub8B_reg(vecD dst, vecD src1, vecD src2) %{ 6156 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6157 match(Set dst (SubVB src1 src2)); 6158 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} 6159 ins_encode %{ 6160 int vector_len = 0; 6161 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6162 %} 6163 ins_pipe( pipe_slow ); 6164 %} 6165 6166 instruct vsub8B_mem(vecD dst, vecD src, memory mem) %{ 6167 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6168 match(Set dst (SubVB src (LoadVector mem))); 6169 format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %} 6170 ins_encode %{ 6171 int vector_len = 0; 6172 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6173 %} 6174 ins_pipe( pipe_slow ); 6175 %} 6176 6177 instruct vsub16B(vecX dst, vecX src) %{ 6178 predicate(n->as_Vector()->length() == 16); 6179 match(Set dst (SubVB dst src)); 6180 format %{ "psubb $dst,$src\t! sub packed16B" %} 6181 ins_encode %{ 6182 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 6183 %} 6184 ins_pipe( pipe_slow ); 6185 %} 6186 6187 instruct vsub16B_reg(vecX dst, vecX src1, vecX src2) %{ 6188 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 6189 match(Set dst (SubVB src1 src2)); 6190 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} 6191 ins_encode %{ 6192 int vector_len = 0; 6193 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6194 %} 6195 ins_pipe( pipe_slow ); 6196 %} 6197 6198 instruct vsub16B_mem(vecX dst, vecX src, memory mem) %{ 6199 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 6200 match(Set dst (SubVB src (LoadVector mem))); 6201 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} 6202 ins_encode %{ 6203 int vector_len = 0; 6204 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6205 %} 6206 ins_pipe( pipe_slow ); 6207 %} 6208 6209 instruct vsub32B_reg(vecY dst, vecY src1, vecY src2) %{ 6210 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 6211 match(Set dst (SubVB src1 src2)); 6212 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} 6213 ins_encode %{ 6214 int vector_len = 1; 6215 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6216 %} 6217 ins_pipe( pipe_slow ); 6218 %} 6219 6220 instruct vsub32B_mem(vecY dst, vecY src, memory mem) %{ 6221 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 6222 match(Set dst (SubVB src (LoadVector mem))); 6223 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} 6224 ins_encode %{ 6225 int vector_len = 1; 6226 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6227 %} 6228 ins_pipe( pipe_slow ); 6229 %} 6230 6231 instruct vsub64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6232 predicate(UseAVX > 2 && n->as_Vector()->length() == 64); 6233 match(Set dst (SubVB src1 src2)); 6234 format %{ "vpsubb $dst,$src1,$src2\t! sub packed64B" %} 6235 ins_encode %{ 6236 int vector_len = 2; 6237 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6238 %} 6239 ins_pipe( pipe_slow ); 6240 %} 6241 6242 instruct vsub64B_mem(vecZ dst, vecZ src, memory mem) %{ 6243 predicate(UseAVX > 2 && n->as_Vector()->length() == 64); 6244 match(Set dst (SubVB src (LoadVector mem))); 6245 format %{ "vpsubb $dst,$src,$mem\t! sub packed64B" %} 6246 ins_encode %{ 6247 int vector_len = 2; 6248 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6249 %} 6250 ins_pipe( pipe_slow ); 6251 %} 6252 6253 // Shorts/Chars vector sub 6254 instruct vsub2S(vecS dst, vecS src) %{ 6255 predicate(n->as_Vector()->length() == 2); 6256 match(Set dst (SubVS dst src)); 6257 format %{ "psubw $dst,$src\t! sub packed2S" %} 6258 ins_encode %{ 6259 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 6260 %} 6261 ins_pipe( pipe_slow ); 6262 %} 6263 6264 instruct vsub2S_reg(vecS dst, vecS src1, vecS src2) %{ 6265 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6266 match(Set dst (SubVS src1 src2)); 6267 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} 6268 ins_encode %{ 6269 int vector_len = 0; 6270 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6271 %} 6272 ins_pipe( pipe_slow ); 6273 %} 6274 6275 instruct vsub2S_mem(vecS dst, vecS src, memory mem) %{ 6276 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6277 match(Set dst (SubVS src (LoadVector mem))); 6278 format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %} 6279 ins_encode %{ 6280 int vector_len = 0; 6281 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6282 %} 6283 ins_pipe( pipe_slow ); 6284 %} 6285 6286 instruct vsub4S(vecD dst, vecD src) %{ 6287 predicate(n->as_Vector()->length() == 4); 6288 match(Set dst (SubVS dst src)); 6289 format %{ "psubw $dst,$src\t! sub packed4S" %} 6290 ins_encode %{ 6291 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 6292 %} 6293 ins_pipe( pipe_slow ); 6294 %} 6295 6296 instruct vsub4S_reg(vecD dst, vecD src1, vecD src2) %{ 6297 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6298 match(Set dst (SubVS src1 src2)); 6299 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} 6300 ins_encode %{ 6301 int vector_len = 0; 6302 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6303 %} 6304 ins_pipe( pipe_slow ); 6305 %} 6306 6307 instruct vsub4S_mem(vecD dst, vecD src, memory mem) %{ 6308 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6309 match(Set dst (SubVS src (LoadVector mem))); 6310 format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %} 6311 ins_encode %{ 6312 int vector_len = 0; 6313 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6314 %} 6315 ins_pipe( pipe_slow ); 6316 %} 6317 6318 instruct vsub8S(vecX dst, vecX src) %{ 6319 predicate(n->as_Vector()->length() == 8); 6320 match(Set dst (SubVS dst src)); 6321 format %{ "psubw $dst,$src\t! sub packed8S" %} 6322 ins_encode %{ 6323 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 6324 %} 6325 ins_pipe( pipe_slow ); 6326 %} 6327 6328 instruct vsub8S_reg(vecX dst, vecX src1, vecX src2) %{ 6329 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6330 match(Set dst (SubVS src1 src2)); 6331 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} 6332 ins_encode %{ 6333 int vector_len = 0; 6334 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6335 %} 6336 ins_pipe( pipe_slow ); 6337 %} 6338 6339 instruct vsub8S_mem(vecX dst, vecX src, memory mem) %{ 6340 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6341 match(Set dst (SubVS src (LoadVector mem))); 6342 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} 6343 ins_encode %{ 6344 int vector_len = 0; 6345 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6346 %} 6347 ins_pipe( pipe_slow ); 6348 %} 6349 6350 instruct vsub16S_reg(vecY dst, vecY src1, vecY src2) %{ 6351 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 6352 match(Set dst (SubVS src1 src2)); 6353 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} 6354 ins_encode %{ 6355 int vector_len = 1; 6356 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6357 %} 6358 ins_pipe( pipe_slow ); 6359 %} 6360 6361 instruct vsub16S_mem(vecY dst, vecY src, memory mem) %{ 6362 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 6363 match(Set dst (SubVS src (LoadVector mem))); 6364 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} 6365 ins_encode %{ 6366 int vector_len = 1; 6367 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6368 %} 6369 ins_pipe( pipe_slow ); 6370 %} 6371 6372 instruct vsub32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6373 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); 6374 match(Set dst (SubVS src1 src2)); 6375 format %{ "vpsubw $dst,$src1,$src2\t! sub packed32S" %} 6376 ins_encode %{ 6377 int vector_len = 2; 6378 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6379 %} 6380 ins_pipe( pipe_slow ); 6381 %} 6382 6383 instruct vsub32S_mem(vecZ dst, vecZ src, memory mem) %{ 6384 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); 6385 match(Set dst (SubVS src (LoadVector mem))); 6386 format %{ "vpsubw $dst,$src,$mem\t! sub packed32S" %} 6387 ins_encode %{ 6388 int vector_len = 2; 6389 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6390 %} 6391 ins_pipe( pipe_slow ); 6392 %} 6393 6394 // Integers vector sub 6395 instruct vsub2I(vecD dst, vecD src) %{ 6396 predicate(n->as_Vector()->length() == 2); 6397 match(Set dst (SubVI dst src)); 6398 format %{ "psubd $dst,$src\t! sub packed2I" %} 6399 ins_encode %{ 6400 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 6401 %} 6402 ins_pipe( pipe_slow ); 6403 %} 6404 6405 instruct vsub2I_reg(vecD dst, vecD src1, vecD src2) %{ 6406 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6407 match(Set dst (SubVI src1 src2)); 6408 format %{ "vpsubd $dst,$src1,$src2\t! sub packed2I" %} 6409 ins_encode %{ 6410 int vector_len = 0; 6411 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6412 %} 6413 ins_pipe( pipe_slow ); 6414 %} 6415 6416 instruct vsub2I_mem(vecD dst, vecD src, memory mem) %{ 6417 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6418 match(Set dst (SubVI src (LoadVector mem))); 6419 format %{ "vpsubd $dst,$src,$mem\t! sub packed2I" %} 6420 ins_encode %{ 6421 int vector_len = 0; 6422 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6423 %} 6424 ins_pipe( pipe_slow ); 6425 %} 6426 6427 instruct vsub4I(vecX dst, vecX src) %{ 6428 predicate(n->as_Vector()->length() == 4); 6429 match(Set dst (SubVI dst src)); 6430 format %{ "psubd $dst,$src\t! sub packed4I" %} 6431 ins_encode %{ 6432 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 6433 %} 6434 ins_pipe( pipe_slow ); 6435 %} 6436 6437 instruct vsub4I_reg(vecX dst, vecX src1, vecX src2) %{ 6438 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6439 match(Set dst (SubVI src1 src2)); 6440 format %{ "vpsubd $dst,$src1,$src2\t! sub packed4I" %} 6441 ins_encode %{ 6442 int vector_len = 0; 6443 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6444 %} 6445 ins_pipe( pipe_slow ); 6446 %} 6447 6448 instruct vsub4I_mem(vecX dst, vecX src, memory mem) %{ 6449 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6450 match(Set dst (SubVI src (LoadVector mem))); 6451 format %{ "vpsubd $dst,$src,$mem\t! sub packed4I" %} 6452 ins_encode %{ 6453 int vector_len = 0; 6454 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6455 %} 6456 ins_pipe( pipe_slow ); 6457 %} 6458 6459 instruct vsub8I_reg(vecY dst, vecY src1, vecY src2) %{ 6460 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 6461 match(Set dst (SubVI src1 src2)); 6462 format %{ "vpsubd $dst,$src1,$src2\t! sub packed8I" %} 6463 ins_encode %{ 6464 int vector_len = 1; 6465 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6466 %} 6467 ins_pipe( pipe_slow ); 6468 %} 6469 6470 instruct vsub8I_mem(vecY dst, vecY src, memory mem) %{ 6471 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 6472 match(Set dst (SubVI src (LoadVector mem))); 6473 format %{ "vpsubd $dst,$src,$mem\t! sub packed8I" %} 6474 ins_encode %{ 6475 int vector_len = 1; 6476 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6477 %} 6478 ins_pipe( pipe_slow ); 6479 %} 6480 6481 instruct vsub16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6482 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6483 match(Set dst (SubVI src1 src2)); 6484 format %{ "vpsubd $dst,$src1,$src2\t! sub packed16I" %} 6485 ins_encode %{ 6486 int vector_len = 2; 6487 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6488 %} 6489 ins_pipe( pipe_slow ); 6490 %} 6491 6492 instruct vsub16I_mem(vecZ dst, vecZ src, memory mem) %{ 6493 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6494 match(Set dst (SubVI src (LoadVector mem))); 6495 format %{ "vpsubd $dst,$src,$mem\t! sub packed16I" %} 6496 ins_encode %{ 6497 int vector_len = 2; 6498 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6499 %} 6500 ins_pipe( pipe_slow ); 6501 %} 6502 6503 // Longs vector sub 6504 instruct vsub2L(vecX dst, vecX src) %{ 6505 predicate(n->as_Vector()->length() == 2); 6506 match(Set dst (SubVL dst src)); 6507 format %{ "psubq $dst,$src\t! sub packed2L" %} 6508 ins_encode %{ 6509 __ psubq($dst$$XMMRegister, $src$$XMMRegister); 6510 %} 6511 ins_pipe( pipe_slow ); 6512 %} 6513 6514 instruct vsub2L_reg(vecX dst, vecX src1, vecX src2) %{ 6515 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6516 match(Set dst (SubVL src1 src2)); 6517 format %{ "vpsubq $dst,$src1,$src2\t! sub packed2L" %} 6518 ins_encode %{ 6519 int vector_len = 0; 6520 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6521 %} 6522 ins_pipe( pipe_slow ); 6523 %} 6524 6525 instruct vsub2L_mem(vecX dst, vecX src, memory mem) %{ 6526 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6527 match(Set dst (SubVL src (LoadVector mem))); 6528 format %{ "vpsubq $dst,$src,$mem\t! sub packed2L" %} 6529 ins_encode %{ 6530 int vector_len = 0; 6531 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6532 %} 6533 ins_pipe( pipe_slow ); 6534 %} 6535 6536 instruct vsub4L_reg(vecY dst, vecY src1, vecY src2) %{ 6537 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 6538 match(Set dst (SubVL src1 src2)); 6539 format %{ "vpsubq $dst,$src1,$src2\t! sub packed4L" %} 6540 ins_encode %{ 6541 int vector_len = 1; 6542 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6543 %} 6544 ins_pipe( pipe_slow ); 6545 %} 6546 6547 instruct vsub4L_mem(vecY dst, vecY src, memory mem) %{ 6548 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 6549 match(Set dst (SubVL src (LoadVector mem))); 6550 format %{ "vpsubq $dst,$src,$mem\t! sub packed4L" %} 6551 ins_encode %{ 6552 int vector_len = 1; 6553 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6554 %} 6555 ins_pipe( pipe_slow ); 6556 %} 6557 6558 instruct vsub8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6559 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6560 match(Set dst (SubVL src1 src2)); 6561 format %{ "vpsubq $dst,$src1,$src2\t! sub packed8L" %} 6562 ins_encode %{ 6563 int vector_len = 2; 6564 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6565 %} 6566 ins_pipe( pipe_slow ); 6567 %} 6568 6569 instruct vsub8L_mem(vecZ dst, vecZ src, memory mem) %{ 6570 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6571 match(Set dst (SubVL src (LoadVector mem))); 6572 format %{ "vpsubq $dst,$src,$mem\t! sub packed8L" %} 6573 ins_encode %{ 6574 int vector_len = 2; 6575 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6576 %} 6577 ins_pipe( pipe_slow ); 6578 %} 6579 6580 // Floats vector sub 6581 instruct vsub2F(vecD dst, vecD src) %{ 6582 predicate(n->as_Vector()->length() == 2); 6583 match(Set dst (SubVF dst src)); 6584 format %{ "subps $dst,$src\t! sub packed2F" %} 6585 ins_encode %{ 6586 __ subps($dst$$XMMRegister, $src$$XMMRegister); 6587 %} 6588 ins_pipe( pipe_slow ); 6589 %} 6590 6591 instruct vsub2F_reg(vecD dst, vecD src1, vecD src2) %{ 6592 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6593 match(Set dst (SubVF src1 src2)); 6594 format %{ "vsubps $dst,$src1,$src2\t! sub packed2F" %} 6595 ins_encode %{ 6596 int vector_len = 0; 6597 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6598 %} 6599 ins_pipe( pipe_slow ); 6600 %} 6601 6602 instruct vsub2F_mem(vecD dst, vecD src, memory mem) %{ 6603 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6604 match(Set dst (SubVF src (LoadVector mem))); 6605 format %{ "vsubps $dst,$src,$mem\t! sub packed2F" %} 6606 ins_encode %{ 6607 int vector_len = 0; 6608 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6609 %} 6610 ins_pipe( pipe_slow ); 6611 %} 6612 6613 instruct vsub4F(vecX dst, vecX src) %{ 6614 predicate(n->as_Vector()->length() == 4); 6615 match(Set dst (SubVF dst src)); 6616 format %{ "subps $dst,$src\t! sub packed4F" %} 6617 ins_encode %{ 6618 __ subps($dst$$XMMRegister, $src$$XMMRegister); 6619 %} 6620 ins_pipe( pipe_slow ); 6621 %} 6622 6623 instruct vsub4F_reg(vecX dst, vecX src1, vecX src2) %{ 6624 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6625 match(Set dst (SubVF src1 src2)); 6626 format %{ "vsubps $dst,$src1,$src2\t! sub packed4F" %} 6627 ins_encode %{ 6628 int vector_len = 0; 6629 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6630 %} 6631 ins_pipe( pipe_slow ); 6632 %} 6633 6634 instruct vsub4F_mem(vecX dst, vecX src, memory mem) %{ 6635 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6636 match(Set dst (SubVF src (LoadVector mem))); 6637 format %{ "vsubps $dst,$src,$mem\t! sub packed4F" %} 6638 ins_encode %{ 6639 int vector_len = 0; 6640 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6641 %} 6642 ins_pipe( pipe_slow ); 6643 %} 6644 6645 instruct vsub8F_reg(vecY dst, vecY src1, vecY src2) %{ 6646 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6647 match(Set dst (SubVF src1 src2)); 6648 format %{ "vsubps $dst,$src1,$src2\t! sub packed8F" %} 6649 ins_encode %{ 6650 int vector_len = 1; 6651 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6652 %} 6653 ins_pipe( pipe_slow ); 6654 %} 6655 6656 instruct vsub8F_mem(vecY dst, vecY src, memory mem) %{ 6657 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6658 match(Set dst (SubVF src (LoadVector mem))); 6659 format %{ "vsubps $dst,$src,$mem\t! sub packed8F" %} 6660 ins_encode %{ 6661 int vector_len = 1; 6662 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6663 %} 6664 ins_pipe( pipe_slow ); 6665 %} 6666 6667 instruct vsub16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6668 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6669 match(Set dst (SubVF src1 src2)); 6670 format %{ "vsubps $dst,$src1,$src2\t! sub packed16F" %} 6671 ins_encode %{ 6672 int vector_len = 2; 6673 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6674 %} 6675 ins_pipe( pipe_slow ); 6676 %} 6677 6678 instruct vsub16F_mem(vecZ dst, vecZ src, memory mem) %{ 6679 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6680 match(Set dst (SubVF src (LoadVector mem))); 6681 format %{ "vsubps $dst,$src,$mem\t! sub packed16F" %} 6682 ins_encode %{ 6683 int vector_len = 2; 6684 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6685 %} 6686 ins_pipe( pipe_slow ); 6687 %} 6688 6689 // Doubles vector sub 6690 instruct vsub2D(vecX dst, vecX src) %{ 6691 predicate(n->as_Vector()->length() == 2); 6692 match(Set dst (SubVD dst src)); 6693 format %{ "subpd $dst,$src\t! sub packed2D" %} 6694 ins_encode %{ 6695 __ subpd($dst$$XMMRegister, $src$$XMMRegister); 6696 %} 6697 ins_pipe( pipe_slow ); 6698 %} 6699 6700 instruct vsub2D_reg(vecX dst, vecX src1, vecX src2) %{ 6701 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6702 match(Set dst (SubVD src1 src2)); 6703 format %{ "vsubpd $dst,$src1,$src2\t! sub packed2D" %} 6704 ins_encode %{ 6705 int vector_len = 0; 6706 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6707 %} 6708 ins_pipe( pipe_slow ); 6709 %} 6710 6711 instruct vsub2D_mem(vecX dst, vecX src, memory mem) %{ 6712 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6713 match(Set dst (SubVD src (LoadVector mem))); 6714 format %{ "vsubpd $dst,$src,$mem\t! sub packed2D" %} 6715 ins_encode %{ 6716 int vector_len = 0; 6717 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6718 %} 6719 ins_pipe( pipe_slow ); 6720 %} 6721 6722 instruct vsub4D_reg(vecY dst, vecY src1, vecY src2) %{ 6723 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6724 match(Set dst (SubVD src1 src2)); 6725 format %{ "vsubpd $dst,$src1,$src2\t! sub packed4D" %} 6726 ins_encode %{ 6727 int vector_len = 1; 6728 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6729 %} 6730 ins_pipe( pipe_slow ); 6731 %} 6732 6733 instruct vsub4D_mem(vecY dst, vecY src, memory mem) %{ 6734 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6735 match(Set dst (SubVD src (LoadVector mem))); 6736 format %{ "vsubpd $dst,$src,$mem\t! sub packed4D" %} 6737 ins_encode %{ 6738 int vector_len = 1; 6739 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6740 %} 6741 ins_pipe( pipe_slow ); 6742 %} 6743 6744 instruct vsub8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6745 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6746 match(Set dst (SubVD src1 src2)); 6747 format %{ "vsubpd $dst,$src1,$src2\t! sub packed8D" %} 6748 ins_encode %{ 6749 int vector_len = 2; 6750 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6751 %} 6752 ins_pipe( pipe_slow ); 6753 %} 6754 6755 instruct vsub8D_mem(vecZ dst, vecZ src, memory mem) %{ 6756 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6757 match(Set dst (SubVD src (LoadVector mem))); 6758 format %{ "vsubpd $dst,$src,$mem\t! sub packed8D" %} 6759 ins_encode %{ 6760 int vector_len = 2; 6761 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6762 %} 6763 ins_pipe( pipe_slow ); 6764 %} 6765 6766 // --------------------------------- MUL -------------------------------------- 6767 6768 // Shorts/Chars vector mul 6769 instruct vmul2S(vecS dst, vecS src) %{ 6770 predicate(n->as_Vector()->length() == 2); 6771 match(Set dst (MulVS dst src)); 6772 format %{ "pmullw $dst,$src\t! mul packed2S" %} 6773 ins_encode %{ 6774 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 6775 %} 6776 ins_pipe( pipe_slow ); 6777 %} 6778 6779 instruct vmul2S_reg(vecS dst, vecS src1, vecS src2) %{ 6780 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6781 match(Set dst (MulVS src1 src2)); 6782 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} 6783 ins_encode %{ 6784 int vector_len = 0; 6785 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6786 %} 6787 ins_pipe( pipe_slow ); 6788 %} 6789 6790 instruct vmul2S_mem(vecS dst, vecS src, memory mem) %{ 6791 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6792 match(Set dst (MulVS src (LoadVector mem))); 6793 format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %} 6794 ins_encode %{ 6795 int vector_len = 0; 6796 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6797 %} 6798 ins_pipe( pipe_slow ); 6799 %} 6800 6801 instruct vmul4S(vecD dst, vecD src) %{ 6802 predicate(n->as_Vector()->length() == 4); 6803 match(Set dst (MulVS dst src)); 6804 format %{ "pmullw $dst,$src\t! mul packed4S" %} 6805 ins_encode %{ 6806 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 6807 %} 6808 ins_pipe( pipe_slow ); 6809 %} 6810 6811 instruct vmul4S_reg(vecD dst, vecD src1, vecD src2) %{ 6812 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6813 match(Set dst (MulVS src1 src2)); 6814 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} 6815 ins_encode %{ 6816 int vector_len = 0; 6817 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6818 %} 6819 ins_pipe( pipe_slow ); 6820 %} 6821 6822 instruct vmul4S_mem(vecD dst, vecD src, memory mem) %{ 6823 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6824 match(Set dst (MulVS src (LoadVector mem))); 6825 format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %} 6826 ins_encode %{ 6827 int vector_len = 0; 6828 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6829 %} 6830 ins_pipe( pipe_slow ); 6831 %} 6832 6833 instruct vmul8S(vecX dst, vecX src) %{ 6834 predicate(n->as_Vector()->length() == 8); 6835 match(Set dst (MulVS dst src)); 6836 format %{ "pmullw $dst,$src\t! mul packed8S" %} 6837 ins_encode %{ 6838 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 6839 %} 6840 ins_pipe( pipe_slow ); 6841 %} 6842 6843 instruct vmul8S_reg(vecX dst, vecX src1, vecX src2) %{ 6844 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6845 match(Set dst (MulVS src1 src2)); 6846 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} 6847 ins_encode %{ 6848 int vector_len = 0; 6849 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6850 %} 6851 ins_pipe( pipe_slow ); 6852 %} 6853 6854 instruct vmul8S_mem(vecX dst, vecX src, memory mem) %{ 6855 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6856 match(Set dst (MulVS src (LoadVector mem))); 6857 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} 6858 ins_encode %{ 6859 int vector_len = 0; 6860 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6861 %} 6862 ins_pipe( pipe_slow ); 6863 %} 6864 6865 instruct vmul16S_reg(vecY dst, vecY src1, vecY src2) %{ 6866 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 6867 match(Set dst (MulVS src1 src2)); 6868 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} 6869 ins_encode %{ 6870 int vector_len = 1; 6871 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6872 %} 6873 ins_pipe( pipe_slow ); 6874 %} 6875 6876 instruct vmul16S_mem(vecY dst, vecY src, memory mem) %{ 6877 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 6878 match(Set dst (MulVS src (LoadVector mem))); 6879 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} 6880 ins_encode %{ 6881 int vector_len = 1; 6882 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6883 %} 6884 ins_pipe( pipe_slow ); 6885 %} 6886 6887 instruct vmul32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6888 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); 6889 match(Set dst (MulVS src1 src2)); 6890 format %{ "vpmullw $dst,$src1,$src2\t! mul packed32S" %} 6891 ins_encode %{ 6892 int vector_len = 2; 6893 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6894 %} 6895 ins_pipe( pipe_slow ); 6896 %} 6897 6898 instruct vmul32S_mem(vecZ dst, vecZ src, memory mem) %{ 6899 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); 6900 match(Set dst (MulVS src (LoadVector mem))); 6901 format %{ "vpmullw $dst,$src,$mem\t! mul packed32S" %} 6902 ins_encode %{ 6903 int vector_len = 2; 6904 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6905 %} 6906 ins_pipe( pipe_slow ); 6907 %} 6908 6909 // Integers vector mul (sse4_1) 6910 instruct vmul2I(vecD dst, vecD src) %{ 6911 predicate(UseSSE > 3 && n->as_Vector()->length() == 2); 6912 match(Set dst (MulVI dst src)); 6913 format %{ "pmulld $dst,$src\t! mul packed2I" %} 6914 ins_encode %{ 6915 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 6916 %} 6917 ins_pipe( pipe_slow ); 6918 %} 6919 6920 instruct vmul2I_reg(vecD dst, vecD src1, vecD src2) %{ 6921 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6922 match(Set dst (MulVI src1 src2)); 6923 format %{ "vpmulld $dst,$src1,$src2\t! mul packed2I" %} 6924 ins_encode %{ 6925 int vector_len = 0; 6926 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6927 %} 6928 ins_pipe( pipe_slow ); 6929 %} 6930 6931 instruct vmul2I_mem(vecD dst, vecD src, memory mem) %{ 6932 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6933 match(Set dst (MulVI src (LoadVector mem))); 6934 format %{ "vpmulld $dst,$src,$mem\t! mul packed2I" %} 6935 ins_encode %{ 6936 int vector_len = 0; 6937 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6938 %} 6939 ins_pipe( pipe_slow ); 6940 %} 6941 6942 instruct vmul4I(vecX dst, vecX src) %{ 6943 predicate(UseSSE > 3 && n->as_Vector()->length() == 4); 6944 match(Set dst (MulVI dst src)); 6945 format %{ "pmulld $dst,$src\t! mul packed4I" %} 6946 ins_encode %{ 6947 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 6948 %} 6949 ins_pipe( pipe_slow ); 6950 %} 6951 6952 instruct vmul4I_reg(vecX dst, vecX src1, vecX src2) %{ 6953 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6954 match(Set dst (MulVI src1 src2)); 6955 format %{ "vpmulld $dst,$src1,$src2\t! mul packed4I" %} 6956 ins_encode %{ 6957 int vector_len = 0; 6958 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6959 %} 6960 ins_pipe( pipe_slow ); 6961 %} 6962 6963 instruct vmul4I_mem(vecX dst, vecX src, memory mem) %{ 6964 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6965 match(Set dst (MulVI src (LoadVector mem))); 6966 format %{ "vpmulld $dst,$src,$mem\t! mul packed4I" %} 6967 ins_encode %{ 6968 int vector_len = 0; 6969 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6970 %} 6971 ins_pipe( pipe_slow ); 6972 %} 6973 6974 instruct vmul2L_reg(vecX dst, vecX src1, vecX src2) %{ 6975 predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && VM_Version::supports_avx512dq()); 6976 match(Set dst (MulVL src1 src2)); 6977 format %{ "vpmullq $dst,$src1,$src2\t! mul packed2L" %} 6978 ins_encode %{ 6979 int vector_len = 0; 6980 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6981 %} 6982 ins_pipe( pipe_slow ); 6983 %} 6984 6985 instruct vmul2L_mem(vecX dst, vecX src, memory mem) %{ 6986 predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && VM_Version::supports_avx512dq()); 6987 match(Set dst (MulVL src (LoadVector mem))); 6988 format %{ "vpmullq $dst,$src,$mem\t! mul packed2L" %} 6989 ins_encode %{ 6990 int vector_len = 0; 6991 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6992 %} 6993 ins_pipe( pipe_slow ); 6994 %} 6995 6996 instruct vmul4L_reg(vecY dst, vecY src1, vecY src2) %{ 6997 predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && VM_Version::supports_avx512dq()); 6998 match(Set dst (MulVL src1 src2)); 6999 format %{ "vpmullq $dst,$src1,$src2\t! mul packed4L" %} 7000 ins_encode %{ 7001 int vector_len = 1; 7002 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7003 %} 7004 ins_pipe( pipe_slow ); 7005 %} 7006 7007 instruct vmul4L_mem(vecY dst, vecY src, memory mem) %{ 7008 predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && VM_Version::supports_avx512dq()); 7009 match(Set dst (MulVL src (LoadVector mem))); 7010 format %{ "vpmullq $dst,$src,$mem\t! mul packed4L" %} 7011 ins_encode %{ 7012 int vector_len = 1; 7013 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7014 %} 7015 ins_pipe( pipe_slow ); 7016 %} 7017 7018 instruct vmul8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7019 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq()); 7020 match(Set dst (MulVL src1 src2)); 7021 format %{ "vpmullq $dst,$src1,$src2\t! mul packed8L" %} 7022 ins_encode %{ 7023 int vector_len = 2; 7024 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7025 %} 7026 ins_pipe( pipe_slow ); 7027 %} 7028 7029 instruct vmul8L_mem(vecZ dst, vecZ src, memory mem) %{ 7030 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq()); 7031 match(Set dst (MulVL src (LoadVector mem))); 7032 format %{ "vpmullq $dst,$src,$mem\t! mul packed8L" %} 7033 ins_encode %{ 7034 int vector_len = 2; 7035 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7036 %} 7037 ins_pipe( pipe_slow ); 7038 %} 7039 7040 instruct vmul8I_reg(vecY dst, vecY src1, vecY src2) %{ 7041 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 7042 match(Set dst (MulVI src1 src2)); 7043 format %{ "vpmulld $dst,$src1,$src2\t! mul packed8I" %} 7044 ins_encode %{ 7045 int vector_len = 1; 7046 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7047 %} 7048 ins_pipe( pipe_slow ); 7049 %} 7050 7051 instruct vmul8I_mem(vecY dst, vecY src, memory mem) %{ 7052 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 7053 match(Set dst (MulVI src (LoadVector mem))); 7054 format %{ "vpmulld $dst,$src,$mem\t! mul packed8I" %} 7055 ins_encode %{ 7056 int vector_len = 1; 7057 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7058 %} 7059 ins_pipe( pipe_slow ); 7060 %} 7061 7062 instruct vmul16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7063 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7064 match(Set dst (MulVI src1 src2)); 7065 format %{ "vpmulld $dst,$src1,$src2\t! mul packed16I" %} 7066 ins_encode %{ 7067 int vector_len = 2; 7068 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7069 %} 7070 ins_pipe( pipe_slow ); 7071 %} 7072 7073 instruct vmul16I_mem(vecZ dst, vecZ src, memory mem) %{ 7074 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7075 match(Set dst (MulVI src (LoadVector mem))); 7076 format %{ "vpmulld $dst,$src,$mem\t! mul packed16I" %} 7077 ins_encode %{ 7078 int vector_len = 2; 7079 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7080 %} 7081 ins_pipe( pipe_slow ); 7082 %} 7083 7084 // Floats vector mul 7085 instruct vmul2F(vecD dst, vecD src) %{ 7086 predicate(n->as_Vector()->length() == 2); 7087 match(Set dst (MulVF dst src)); 7088 format %{ "mulps $dst,$src\t! mul packed2F" %} 7089 ins_encode %{ 7090 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 7091 %} 7092 ins_pipe( pipe_slow ); 7093 %} 7094 7095 instruct vmul2F_reg(vecD dst, vecD src1, vecD src2) %{ 7096 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7097 match(Set dst (MulVF src1 src2)); 7098 format %{ "vmulps $dst,$src1,$src2\t! mul packed2F" %} 7099 ins_encode %{ 7100 int vector_len = 0; 7101 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7102 %} 7103 ins_pipe( pipe_slow ); 7104 %} 7105 7106 instruct vmul2F_mem(vecD dst, vecD src, memory mem) %{ 7107 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7108 match(Set dst (MulVF src (LoadVector mem))); 7109 format %{ "vmulps $dst,$src,$mem\t! mul packed2F" %} 7110 ins_encode %{ 7111 int vector_len = 0; 7112 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7113 %} 7114 ins_pipe( pipe_slow ); 7115 %} 7116 7117 instruct vmul4F(vecX dst, vecX src) %{ 7118 predicate(n->as_Vector()->length() == 4); 7119 match(Set dst (MulVF dst src)); 7120 format %{ "mulps $dst,$src\t! mul packed4F" %} 7121 ins_encode %{ 7122 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 7123 %} 7124 ins_pipe( pipe_slow ); 7125 %} 7126 7127 instruct vmul4F_reg(vecX dst, vecX src1, vecX src2) %{ 7128 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7129 match(Set dst (MulVF src1 src2)); 7130 format %{ "vmulps $dst,$src1,$src2\t! mul packed4F" %} 7131 ins_encode %{ 7132 int vector_len = 0; 7133 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7134 %} 7135 ins_pipe( pipe_slow ); 7136 %} 7137 7138 instruct vmul4F_mem(vecX dst, vecX src, memory mem) %{ 7139 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7140 match(Set dst (MulVF src (LoadVector mem))); 7141 format %{ "vmulps $dst,$src,$mem\t! mul packed4F" %} 7142 ins_encode %{ 7143 int vector_len = 0; 7144 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7145 %} 7146 ins_pipe( pipe_slow ); 7147 %} 7148 7149 instruct vmul8F_reg(vecY dst, vecY src1, vecY src2) %{ 7150 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7151 match(Set dst (MulVF src1 src2)); 7152 format %{ "vmulps $dst,$src1,$src2\t! mul packed8F" %} 7153 ins_encode %{ 7154 int vector_len = 1; 7155 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7156 %} 7157 ins_pipe( pipe_slow ); 7158 %} 7159 7160 instruct vmul8F_mem(vecY dst, vecY src, memory mem) %{ 7161 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7162 match(Set dst (MulVF src (LoadVector mem))); 7163 format %{ "vmulps $dst,$src,$mem\t! mul packed8F" %} 7164 ins_encode %{ 7165 int vector_len = 1; 7166 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7167 %} 7168 ins_pipe( pipe_slow ); 7169 %} 7170 7171 instruct vmul16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7172 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7173 match(Set dst (MulVF src1 src2)); 7174 format %{ "vmulps $dst,$src1,$src2\t! mul packed16F" %} 7175 ins_encode %{ 7176 int vector_len = 2; 7177 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7178 %} 7179 ins_pipe( pipe_slow ); 7180 %} 7181 7182 instruct vmul16F_mem(vecZ dst, vecZ src, memory mem) %{ 7183 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7184 match(Set dst (MulVF src (LoadVector mem))); 7185 format %{ "vmulps $dst,$src,$mem\t! mul packed16F" %} 7186 ins_encode %{ 7187 int vector_len = 2; 7188 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7189 %} 7190 ins_pipe( pipe_slow ); 7191 %} 7192 7193 // Doubles vector mul 7194 instruct vmul2D(vecX dst, vecX src) %{ 7195 predicate(n->as_Vector()->length() == 2); 7196 match(Set dst (MulVD dst src)); 7197 format %{ "mulpd $dst,$src\t! mul packed2D" %} 7198 ins_encode %{ 7199 __ mulpd($dst$$XMMRegister, $src$$XMMRegister); 7200 %} 7201 ins_pipe( pipe_slow ); 7202 %} 7203 7204 instruct vmul2D_reg(vecX dst, vecX src1, vecX src2) %{ 7205 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7206 match(Set dst (MulVD src1 src2)); 7207 format %{ "vmulpd $dst,$src1,$src2\t! mul packed2D" %} 7208 ins_encode %{ 7209 int vector_len = 0; 7210 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7211 %} 7212 ins_pipe( pipe_slow ); 7213 %} 7214 7215 instruct vmul2D_mem(vecX dst, vecX src, memory mem) %{ 7216 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7217 match(Set dst (MulVD src (LoadVector mem))); 7218 format %{ "vmulpd $dst,$src,$mem\t! mul packed2D" %} 7219 ins_encode %{ 7220 int vector_len = 0; 7221 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7222 %} 7223 ins_pipe( pipe_slow ); 7224 %} 7225 7226 instruct vmul4D_reg(vecY dst, vecY src1, vecY src2) %{ 7227 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7228 match(Set dst (MulVD src1 src2)); 7229 format %{ "vmulpd $dst,$src1,$src2\t! mul packed4D" %} 7230 ins_encode %{ 7231 int vector_len = 1; 7232 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7233 %} 7234 ins_pipe( pipe_slow ); 7235 %} 7236 7237 instruct vmul4D_mem(vecY dst, vecY src, memory mem) %{ 7238 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7239 match(Set dst (MulVD src (LoadVector mem))); 7240 format %{ "vmulpd $dst,$src,$mem\t! mul packed4D" %} 7241 ins_encode %{ 7242 int vector_len = 1; 7243 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7244 %} 7245 ins_pipe( pipe_slow ); 7246 %} 7247 7248 instruct vmul8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7249 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7250 match(Set dst (MulVD src1 src2)); 7251 format %{ "vmulpd $dst k0,$src1,$src2\t! mul packed8D" %} 7252 ins_encode %{ 7253 int vector_len = 2; 7254 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7255 %} 7256 ins_pipe( pipe_slow ); 7257 %} 7258 7259 instruct vmul8D_mem(vecZ dst, vecZ src, memory mem) %{ 7260 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7261 match(Set dst (MulVD src (LoadVector mem))); 7262 format %{ "vmulpd $dst k0,$src,$mem\t! mul packed8D" %} 7263 ins_encode %{ 7264 int vector_len = 2; 7265 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7266 %} 7267 ins_pipe( pipe_slow ); 7268 %} 7269 7270 // --------------------------------- DIV -------------------------------------- 7271 7272 // Floats vector div 7273 instruct vdiv2F(vecD dst, vecD src) %{ 7274 predicate(n->as_Vector()->length() == 2); 7275 match(Set dst (DivVF dst src)); 7276 format %{ "divps $dst,$src\t! div packed2F" %} 7277 ins_encode %{ 7278 __ divps($dst$$XMMRegister, $src$$XMMRegister); 7279 %} 7280 ins_pipe( pipe_slow ); 7281 %} 7282 7283 instruct vdiv2F_reg(vecD dst, vecD src1, vecD src2) %{ 7284 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7285 match(Set dst (DivVF src1 src2)); 7286 format %{ "vdivps $dst,$src1,$src2\t! div packed2F" %} 7287 ins_encode %{ 7288 int vector_len = 0; 7289 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7290 %} 7291 ins_pipe( pipe_slow ); 7292 %} 7293 7294 instruct vdiv2F_mem(vecD dst, vecD src, memory mem) %{ 7295 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7296 match(Set dst (DivVF src (LoadVector mem))); 7297 format %{ "vdivps $dst,$src,$mem\t! div packed2F" %} 7298 ins_encode %{ 7299 int vector_len = 0; 7300 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7301 %} 7302 ins_pipe( pipe_slow ); 7303 %} 7304 7305 instruct vdiv4F(vecX dst, vecX src) %{ 7306 predicate(n->as_Vector()->length() == 4); 7307 match(Set dst (DivVF dst src)); 7308 format %{ "divps $dst,$src\t! div packed4F" %} 7309 ins_encode %{ 7310 __ divps($dst$$XMMRegister, $src$$XMMRegister); 7311 %} 7312 ins_pipe( pipe_slow ); 7313 %} 7314 7315 instruct vdiv4F_reg(vecX dst, vecX src1, vecX src2) %{ 7316 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7317 match(Set dst (DivVF src1 src2)); 7318 format %{ "vdivps $dst,$src1,$src2\t! div packed4F" %} 7319 ins_encode %{ 7320 int vector_len = 0; 7321 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7322 %} 7323 ins_pipe( pipe_slow ); 7324 %} 7325 7326 instruct vdiv4F_mem(vecX dst, vecX src, memory mem) %{ 7327 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7328 match(Set dst (DivVF src (LoadVector mem))); 7329 format %{ "vdivps $dst,$src,$mem\t! div packed4F" %} 7330 ins_encode %{ 7331 int vector_len = 0; 7332 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7333 %} 7334 ins_pipe( pipe_slow ); 7335 %} 7336 7337 instruct vdiv8F_reg(vecY dst, vecY src1, vecY src2) %{ 7338 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7339 match(Set dst (DivVF src1 src2)); 7340 format %{ "vdivps $dst,$src1,$src2\t! div packed8F" %} 7341 ins_encode %{ 7342 int vector_len = 1; 7343 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7344 %} 7345 ins_pipe( pipe_slow ); 7346 %} 7347 7348 instruct vdiv8F_mem(vecY dst, vecY src, memory mem) %{ 7349 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7350 match(Set dst (DivVF src (LoadVector mem))); 7351 format %{ "vdivps $dst,$src,$mem\t! div packed8F" %} 7352 ins_encode %{ 7353 int vector_len = 1; 7354 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7355 %} 7356 ins_pipe( pipe_slow ); 7357 %} 7358 7359 instruct vdiv16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7360 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 7361 match(Set dst (DivVF src1 src2)); 7362 format %{ "vdivps $dst,$src1,$src2\t! div packed16F" %} 7363 ins_encode %{ 7364 int vector_len = 2; 7365 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7366 %} 7367 ins_pipe( pipe_slow ); 7368 %} 7369 7370 instruct vdiv16F_mem(vecZ dst, vecZ src, memory mem) %{ 7371 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 7372 match(Set dst (DivVF src (LoadVector mem))); 7373 format %{ "vdivps $dst,$src,$mem\t! div packed16F" %} 7374 ins_encode %{ 7375 int vector_len = 2; 7376 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7377 %} 7378 ins_pipe( pipe_slow ); 7379 %} 7380 7381 // Doubles vector div 7382 instruct vdiv2D(vecX dst, vecX src) %{ 7383 predicate(n->as_Vector()->length() == 2); 7384 match(Set dst (DivVD dst src)); 7385 format %{ "divpd $dst,$src\t! div packed2D" %} 7386 ins_encode %{ 7387 __ divpd($dst$$XMMRegister, $src$$XMMRegister); 7388 %} 7389 ins_pipe( pipe_slow ); 7390 %} 7391 7392 instruct vdiv2D_reg(vecX dst, vecX src1, vecX src2) %{ 7393 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7394 match(Set dst (DivVD src1 src2)); 7395 format %{ "vdivpd $dst,$src1,$src2\t! div packed2D" %} 7396 ins_encode %{ 7397 int vector_len = 0; 7398 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7399 %} 7400 ins_pipe( pipe_slow ); 7401 %} 7402 7403 instruct vdiv2D_mem(vecX dst, vecX src, memory mem) %{ 7404 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7405 match(Set dst (DivVD src (LoadVector mem))); 7406 format %{ "vdivpd $dst,$src,$mem\t! div packed2D" %} 7407 ins_encode %{ 7408 int vector_len = 0; 7409 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7410 %} 7411 ins_pipe( pipe_slow ); 7412 %} 7413 7414 instruct vdiv4D_reg(vecY dst, vecY src1, vecY src2) %{ 7415 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7416 match(Set dst (DivVD src1 src2)); 7417 format %{ "vdivpd $dst,$src1,$src2\t! div packed4D" %} 7418 ins_encode %{ 7419 int vector_len = 1; 7420 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7421 %} 7422 ins_pipe( pipe_slow ); 7423 %} 7424 7425 instruct vdiv4D_mem(vecY dst, vecY src, memory mem) %{ 7426 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7427 match(Set dst (DivVD src (LoadVector mem))); 7428 format %{ "vdivpd $dst,$src,$mem\t! div packed4D" %} 7429 ins_encode %{ 7430 int vector_len = 1; 7431 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7432 %} 7433 ins_pipe( pipe_slow ); 7434 %} 7435 7436 instruct vdiv8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7437 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7438 match(Set dst (DivVD src1 src2)); 7439 format %{ "vdivpd $dst,$src1,$src2\t! div packed8D" %} 7440 ins_encode %{ 7441 int vector_len = 2; 7442 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7443 %} 7444 ins_pipe( pipe_slow ); 7445 %} 7446 7447 instruct vdiv8D_mem(vecZ dst, vecZ src, memory mem) %{ 7448 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7449 match(Set dst (DivVD src (LoadVector mem))); 7450 format %{ "vdivpd $dst,$src,$mem\t! div packed8D" %} 7451 ins_encode %{ 7452 int vector_len = 2; 7453 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7454 %} 7455 ins_pipe( pipe_slow ); 7456 %} 7457 7458 // ------------------------------ Shift --------------------------------------- 7459 7460 // Left and right shift count vectors are the same on x86 7461 // (only lowest bits of xmm reg are used for count). 7462 instruct vshiftcnt(vecS dst, rRegI cnt) %{ 7463 match(Set dst (LShiftCntV cnt)); 7464 match(Set dst (RShiftCntV cnt)); 7465 format %{ "movd $dst,$cnt\t! load shift count" %} 7466 ins_encode %{ 7467 __ movdl($dst$$XMMRegister, $cnt$$Register); 7468 %} 7469 ins_pipe( pipe_slow ); 7470 %} 7471 7472 // ------------------------------ LeftShift ----------------------------------- 7473 7474 // Shorts/Chars vector left shift 7475 instruct vsll2S(vecS dst, vecS shift) %{ 7476 predicate(n->as_Vector()->length() == 2); 7477 match(Set dst (LShiftVS dst shift)); 7478 format %{ "psllw $dst,$shift\t! left shift packed2S" %} 7479 ins_encode %{ 7480 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 7481 %} 7482 ins_pipe( pipe_slow ); 7483 %} 7484 7485 instruct vsll2S_imm(vecS dst, immI8 shift) %{ 7486 predicate(n->as_Vector()->length() == 2); 7487 match(Set dst (LShiftVS dst shift)); 7488 format %{ "psllw $dst,$shift\t! left shift packed2S" %} 7489 ins_encode %{ 7490 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 7491 %} 7492 ins_pipe( pipe_slow ); 7493 %} 7494 7495 instruct vsll2S_reg(vecS dst, vecS src, vecS shift) %{ 7496 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7497 match(Set dst (LShiftVS src shift)); 7498 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 7499 ins_encode %{ 7500 int vector_len = 0; 7501 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7502 %} 7503 ins_pipe( pipe_slow ); 7504 %} 7505 7506 instruct vsll2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ 7507 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7508 match(Set dst (LShiftVS src shift)); 7509 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 7510 ins_encode %{ 7511 int vector_len = 0; 7512 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7513 %} 7514 ins_pipe( pipe_slow ); 7515 %} 7516 7517 instruct vsll4S(vecD dst, vecS shift) %{ 7518 predicate(n->as_Vector()->length() == 4); 7519 match(Set dst (LShiftVS dst shift)); 7520 format %{ "psllw $dst,$shift\t! left shift packed4S" %} 7521 ins_encode %{ 7522 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 7523 %} 7524 ins_pipe( pipe_slow ); 7525 %} 7526 7527 instruct vsll4S_imm(vecD dst, immI8 shift) %{ 7528 predicate(n->as_Vector()->length() == 4); 7529 match(Set dst (LShiftVS dst shift)); 7530 format %{ "psllw $dst,$shift\t! left shift packed4S" %} 7531 ins_encode %{ 7532 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 7533 %} 7534 ins_pipe( pipe_slow ); 7535 %} 7536 7537 instruct vsll4S_reg(vecD dst, vecD src, vecS shift) %{ 7538 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7539 match(Set dst (LShiftVS src shift)); 7540 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 7541 ins_encode %{ 7542 int vector_len = 0; 7543 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7544 %} 7545 ins_pipe( pipe_slow ); 7546 %} 7547 7548 instruct vsll4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ 7549 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7550 match(Set dst (LShiftVS src shift)); 7551 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 7552 ins_encode %{ 7553 int vector_len = 0; 7554 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7555 %} 7556 ins_pipe( pipe_slow ); 7557 %} 7558 7559 instruct vsll8S(vecX dst, vecS shift) %{ 7560 predicate(n->as_Vector()->length() == 8); 7561 match(Set dst (LShiftVS dst shift)); 7562 format %{ "psllw $dst,$shift\t! left shift packed8S" %} 7563 ins_encode %{ 7564 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 7565 %} 7566 ins_pipe( pipe_slow ); 7567 %} 7568 7569 instruct vsll8S_imm(vecX dst, immI8 shift) %{ 7570 predicate(n->as_Vector()->length() == 8); 7571 match(Set dst (LShiftVS dst shift)); 7572 format %{ "psllw $dst,$shift\t! left shift packed8S" %} 7573 ins_encode %{ 7574 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 7575 %} 7576 ins_pipe( pipe_slow ); 7577 %} 7578 7579 instruct vsll8S_reg(vecX dst, vecX src, vecS shift) %{ 7580 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7581 match(Set dst (LShiftVS src shift)); 7582 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 7583 ins_encode %{ 7584 int vector_len = 0; 7585 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7586 %} 7587 ins_pipe( pipe_slow ); 7588 %} 7589 7590 instruct vsll8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ 7591 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7592 match(Set dst (LShiftVS src shift)); 7593 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 7594 ins_encode %{ 7595 int vector_len = 0; 7596 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7597 %} 7598 ins_pipe( pipe_slow ); 7599 %} 7600 7601 instruct vsll16S_reg(vecY dst, vecY src, vecS shift) %{ 7602 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 7603 match(Set dst (LShiftVS src shift)); 7604 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 7605 ins_encode %{ 7606 int vector_len = 1; 7607 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7608 %} 7609 ins_pipe( pipe_slow ); 7610 %} 7611 7612 instruct vsll16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ 7613 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 7614 match(Set dst (LShiftVS src shift)); 7615 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 7616 ins_encode %{ 7617 int vector_len = 1; 7618 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7619 %} 7620 ins_pipe( pipe_slow ); 7621 %} 7622 7623 instruct vsll32S_reg(vecZ dst, vecZ src, vecS shift) %{ 7624 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); 7625 match(Set dst (LShiftVS src shift)); 7626 format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %} 7627 ins_encode %{ 7628 int vector_len = 2; 7629 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7630 %} 7631 ins_pipe( pipe_slow ); 7632 %} 7633 7634 instruct vsll32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 7635 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); 7636 match(Set dst (LShiftVS src shift)); 7637 format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %} 7638 ins_encode %{ 7639 int vector_len = 2; 7640 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7641 %} 7642 ins_pipe( pipe_slow ); 7643 %} 7644 7645 // Integers vector left shift 7646 instruct vsll2I(vecD dst, vecS shift) %{ 7647 predicate(n->as_Vector()->length() == 2); 7648 match(Set dst (LShiftVI dst shift)); 7649 format %{ "pslld $dst,$shift\t! left shift packed2I" %} 7650 ins_encode %{ 7651 __ pslld($dst$$XMMRegister, $shift$$XMMRegister); 7652 %} 7653 ins_pipe( pipe_slow ); 7654 %} 7655 7656 instruct vsll2I_imm(vecD dst, immI8 shift) %{ 7657 predicate(n->as_Vector()->length() == 2); 7658 match(Set dst (LShiftVI dst shift)); 7659 format %{ "pslld $dst,$shift\t! left shift packed2I" %} 7660 ins_encode %{ 7661 __ pslld($dst$$XMMRegister, (int)$shift$$constant); 7662 %} 7663 ins_pipe( pipe_slow ); 7664 %} 7665 7666 instruct vsll2I_reg(vecD dst, vecD src, vecS shift) %{ 7667 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7668 match(Set dst (LShiftVI src shift)); 7669 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} 7670 ins_encode %{ 7671 int vector_len = 0; 7672 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7673 %} 7674 ins_pipe( pipe_slow ); 7675 %} 7676 7677 instruct vsll2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 7678 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7679 match(Set dst (LShiftVI src shift)); 7680 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} 7681 ins_encode %{ 7682 int vector_len = 0; 7683 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7684 %} 7685 ins_pipe( pipe_slow ); 7686 %} 7687 7688 instruct vsll4I(vecX dst, vecS shift) %{ 7689 predicate(n->as_Vector()->length() == 4); 7690 match(Set dst (LShiftVI dst shift)); 7691 format %{ "pslld $dst,$shift\t! left shift packed4I" %} 7692 ins_encode %{ 7693 __ pslld($dst$$XMMRegister, $shift$$XMMRegister); 7694 %} 7695 ins_pipe( pipe_slow ); 7696 %} 7697 7698 instruct vsll4I_imm(vecX dst, immI8 shift) %{ 7699 predicate(n->as_Vector()->length() == 4); 7700 match(Set dst (LShiftVI dst shift)); 7701 format %{ "pslld $dst,$shift\t! left shift packed4I" %} 7702 ins_encode %{ 7703 __ pslld($dst$$XMMRegister, (int)$shift$$constant); 7704 %} 7705 ins_pipe( pipe_slow ); 7706 %} 7707 7708 instruct vsll4I_reg(vecX dst, vecX src, vecS shift) %{ 7709 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7710 match(Set dst (LShiftVI src shift)); 7711 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} 7712 ins_encode %{ 7713 int vector_len = 0; 7714 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7715 %} 7716 ins_pipe( pipe_slow ); 7717 %} 7718 7719 instruct vsll4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 7720 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7721 match(Set dst (LShiftVI src shift)); 7722 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} 7723 ins_encode %{ 7724 int vector_len = 0; 7725 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7726 %} 7727 ins_pipe( pipe_slow ); 7728 %} 7729 7730 instruct vsll8I_reg(vecY dst, vecY src, vecS shift) %{ 7731 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 7732 match(Set dst (LShiftVI src shift)); 7733 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} 7734 ins_encode %{ 7735 int vector_len = 1; 7736 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7737 %} 7738 ins_pipe( pipe_slow ); 7739 %} 7740 7741 instruct vsll8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 7742 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 7743 match(Set dst (LShiftVI src shift)); 7744 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} 7745 ins_encode %{ 7746 int vector_len = 1; 7747 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7748 %} 7749 ins_pipe( pipe_slow ); 7750 %} 7751 7752 instruct vsll16I_reg(vecZ dst, vecZ src, vecS shift) %{ 7753 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7754 match(Set dst (LShiftVI src shift)); 7755 format %{ "vpslld $dst,$src,$shift\t! left shift packed16I" %} 7756 ins_encode %{ 7757 int vector_len = 2; 7758 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7759 %} 7760 ins_pipe( pipe_slow ); 7761 %} 7762 7763 instruct vsll16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 7764 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7765 match(Set dst (LShiftVI src shift)); 7766 format %{ "vpslld $dst,$src,$shift\t! left shift packed16I" %} 7767 ins_encode %{ 7768 int vector_len = 2; 7769 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7770 %} 7771 ins_pipe( pipe_slow ); 7772 %} 7773 7774 // Longs vector left shift 7775 instruct vsll2L(vecX dst, vecS shift) %{ 7776 predicate(n->as_Vector()->length() == 2); 7777 match(Set dst (LShiftVL dst shift)); 7778 format %{ "psllq $dst,$shift\t! left shift packed2L" %} 7779 ins_encode %{ 7780 __ psllq($dst$$XMMRegister, $shift$$XMMRegister); 7781 %} 7782 ins_pipe( pipe_slow ); 7783 %} 7784 7785 instruct vsll2L_imm(vecX dst, immI8 shift) %{ 7786 predicate(n->as_Vector()->length() == 2); 7787 match(Set dst (LShiftVL dst shift)); 7788 format %{ "psllq $dst,$shift\t! left shift packed2L" %} 7789 ins_encode %{ 7790 __ psllq($dst$$XMMRegister, (int)$shift$$constant); 7791 %} 7792 ins_pipe( pipe_slow ); 7793 %} 7794 7795 instruct vsll2L_reg(vecX dst, vecX src, vecS shift) %{ 7796 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7797 match(Set dst (LShiftVL src shift)); 7798 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} 7799 ins_encode %{ 7800 int vector_len = 0; 7801 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7802 %} 7803 ins_pipe( pipe_slow ); 7804 %} 7805 7806 instruct vsll2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ 7807 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7808 match(Set dst (LShiftVL src shift)); 7809 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} 7810 ins_encode %{ 7811 int vector_len = 0; 7812 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7813 %} 7814 ins_pipe( pipe_slow ); 7815 %} 7816 7817 instruct vsll4L_reg(vecY dst, vecY src, vecS shift) %{ 7818 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 7819 match(Set dst (LShiftVL src shift)); 7820 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} 7821 ins_encode %{ 7822 int vector_len = 1; 7823 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7824 %} 7825 ins_pipe( pipe_slow ); 7826 %} 7827 7828 instruct vsll4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ 7829 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 7830 match(Set dst (LShiftVL src shift)); 7831 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} 7832 ins_encode %{ 7833 int vector_len = 1; 7834 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7835 %} 7836 ins_pipe( pipe_slow ); 7837 %} 7838 7839 instruct vsll8L_reg(vecZ dst, vecZ src, vecS shift) %{ 7840 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7841 match(Set dst (LShiftVL src shift)); 7842 format %{ "vpsllq $dst,$src,$shift\t! left shift packed8L" %} 7843 ins_encode %{ 7844 int vector_len = 2; 7845 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7846 %} 7847 ins_pipe( pipe_slow ); 7848 %} 7849 7850 instruct vsll8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 7851 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7852 match(Set dst (LShiftVL src shift)); 7853 format %{ "vpsllq $dst,$src,$shift\t! left shift packed8L" %} 7854 ins_encode %{ 7855 int vector_len = 2; 7856 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7857 %} 7858 ins_pipe( pipe_slow ); 7859 %} 7860 7861 // ----------------------- LogicalRightShift ----------------------------------- 7862 7863 // Shorts vector logical right shift produces incorrect Java result 7864 // for negative data because java code convert short value into int with 7865 // sign extension before a shift. But char vectors are fine since chars are 7866 // unsigned values. 7867 7868 instruct vsrl2S(vecS dst, vecS shift) %{ 7869 predicate(n->as_Vector()->length() == 2); 7870 match(Set dst (URShiftVS dst shift)); 7871 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} 7872 ins_encode %{ 7873 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 7874 %} 7875 ins_pipe( pipe_slow ); 7876 %} 7877 7878 instruct vsrl2S_imm(vecS dst, immI8 shift) %{ 7879 predicate(n->as_Vector()->length() == 2); 7880 match(Set dst (URShiftVS dst shift)); 7881 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} 7882 ins_encode %{ 7883 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 7884 %} 7885 ins_pipe( pipe_slow ); 7886 %} 7887 7888 instruct vsrl2S_reg(vecS dst, vecS src, vecS shift) %{ 7889 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7890 match(Set dst (URShiftVS src shift)); 7891 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 7892 ins_encode %{ 7893 int vector_len = 0; 7894 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7895 %} 7896 ins_pipe( pipe_slow ); 7897 %} 7898 7899 instruct vsrl2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ 7900 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7901 match(Set dst (URShiftVS src shift)); 7902 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 7903 ins_encode %{ 7904 int vector_len = 0; 7905 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7906 %} 7907 ins_pipe( pipe_slow ); 7908 %} 7909 7910 instruct vsrl4S(vecD dst, vecS shift) %{ 7911 predicate(n->as_Vector()->length() == 4); 7912 match(Set dst (URShiftVS dst shift)); 7913 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} 7914 ins_encode %{ 7915 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 7916 %} 7917 ins_pipe( pipe_slow ); 7918 %} 7919 7920 instruct vsrl4S_imm(vecD dst, immI8 shift) %{ 7921 predicate(n->as_Vector()->length() == 4); 7922 match(Set dst (URShiftVS dst shift)); 7923 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} 7924 ins_encode %{ 7925 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 7926 %} 7927 ins_pipe( pipe_slow ); 7928 %} 7929 7930 instruct vsrl4S_reg(vecD dst, vecD src, vecS shift) %{ 7931 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7932 match(Set dst (URShiftVS src shift)); 7933 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 7934 ins_encode %{ 7935 int vector_len = 0; 7936 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7937 %} 7938 ins_pipe( pipe_slow ); 7939 %} 7940 7941 instruct vsrl4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ 7942 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7943 match(Set dst (URShiftVS src shift)); 7944 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 7945 ins_encode %{ 7946 int vector_len = 0; 7947 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7948 %} 7949 ins_pipe( pipe_slow ); 7950 %} 7951 7952 instruct vsrl8S(vecX dst, vecS shift) %{ 7953 predicate(n->as_Vector()->length() == 8); 7954 match(Set dst (URShiftVS dst shift)); 7955 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} 7956 ins_encode %{ 7957 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 7958 %} 7959 ins_pipe( pipe_slow ); 7960 %} 7961 7962 instruct vsrl8S_imm(vecX dst, immI8 shift) %{ 7963 predicate(n->as_Vector()->length() == 8); 7964 match(Set dst (URShiftVS dst shift)); 7965 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} 7966 ins_encode %{ 7967 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 7968 %} 7969 ins_pipe( pipe_slow ); 7970 %} 7971 7972 instruct vsrl8S_reg(vecX dst, vecX src, vecS shift) %{ 7973 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7974 match(Set dst (URShiftVS src shift)); 7975 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 7976 ins_encode %{ 7977 int vector_len = 0; 7978 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7979 %} 7980 ins_pipe( pipe_slow ); 7981 %} 7982 7983 instruct vsrl8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ 7984 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7985 match(Set dst (URShiftVS src shift)); 7986 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 7987 ins_encode %{ 7988 int vector_len = 0; 7989 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7990 %} 7991 ins_pipe( pipe_slow ); 7992 %} 7993 7994 instruct vsrl16S_reg(vecY dst, vecY src, vecS shift) %{ 7995 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 7996 match(Set dst (URShiftVS src shift)); 7997 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 7998 ins_encode %{ 7999 int vector_len = 1; 8000 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8001 %} 8002 ins_pipe( pipe_slow ); 8003 %} 8004 8005 instruct vsrl16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ 8006 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 8007 match(Set dst (URShiftVS src shift)); 8008 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 8009 ins_encode %{ 8010 int vector_len = 1; 8011 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8012 %} 8013 ins_pipe( pipe_slow ); 8014 %} 8015 8016 instruct vsrl32S_reg(vecZ dst, vecZ src, vecS shift) %{ 8017 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); 8018 match(Set dst (URShiftVS src shift)); 8019 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %} 8020 ins_encode %{ 8021 int vector_len = 2; 8022 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8023 %} 8024 ins_pipe( pipe_slow ); 8025 %} 8026 8027 instruct vsrl32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 8028 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); 8029 match(Set dst (URShiftVS src shift)); 8030 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %} 8031 ins_encode %{ 8032 int vector_len = 2; 8033 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8034 %} 8035 ins_pipe( pipe_slow ); 8036 %} 8037 8038 // Integers vector logical right shift 8039 instruct vsrl2I(vecD dst, vecS shift) %{ 8040 predicate(n->as_Vector()->length() == 2); 8041 match(Set dst (URShiftVI dst shift)); 8042 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} 8043 ins_encode %{ 8044 __ psrld($dst$$XMMRegister, $shift$$XMMRegister); 8045 %} 8046 ins_pipe( pipe_slow ); 8047 %} 8048 8049 instruct vsrl2I_imm(vecD dst, immI8 shift) %{ 8050 predicate(n->as_Vector()->length() == 2); 8051 match(Set dst (URShiftVI dst shift)); 8052 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} 8053 ins_encode %{ 8054 __ psrld($dst$$XMMRegister, (int)$shift$$constant); 8055 %} 8056 ins_pipe( pipe_slow ); 8057 %} 8058 8059 instruct vsrl2I_reg(vecD dst, vecD src, vecS shift) %{ 8060 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8061 match(Set dst (URShiftVI src shift)); 8062 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} 8063 ins_encode %{ 8064 int vector_len = 0; 8065 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8066 %} 8067 ins_pipe( pipe_slow ); 8068 %} 8069 8070 instruct vsrl2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 8071 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8072 match(Set dst (URShiftVI src shift)); 8073 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} 8074 ins_encode %{ 8075 int vector_len = 0; 8076 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8077 %} 8078 ins_pipe( pipe_slow ); 8079 %} 8080 8081 instruct vsrl4I(vecX dst, vecS shift) %{ 8082 predicate(n->as_Vector()->length() == 4); 8083 match(Set dst (URShiftVI dst shift)); 8084 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} 8085 ins_encode %{ 8086 __ psrld($dst$$XMMRegister, $shift$$XMMRegister); 8087 %} 8088 ins_pipe( pipe_slow ); 8089 %} 8090 8091 instruct vsrl4I_imm(vecX dst, immI8 shift) %{ 8092 predicate(n->as_Vector()->length() == 4); 8093 match(Set dst (URShiftVI dst shift)); 8094 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} 8095 ins_encode %{ 8096 __ psrld($dst$$XMMRegister, (int)$shift$$constant); 8097 %} 8098 ins_pipe( pipe_slow ); 8099 %} 8100 8101 instruct vsrl4I_reg(vecX dst, vecX src, vecS shift) %{ 8102 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8103 match(Set dst (URShiftVI src shift)); 8104 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} 8105 ins_encode %{ 8106 int vector_len = 0; 8107 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8108 %} 8109 ins_pipe( pipe_slow ); 8110 %} 8111 8112 instruct vsrl4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 8113 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8114 match(Set dst (URShiftVI src shift)); 8115 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} 8116 ins_encode %{ 8117 int vector_len = 0; 8118 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8119 %} 8120 ins_pipe( pipe_slow ); 8121 %} 8122 8123 instruct vsrl8I_reg(vecY dst, vecY src, vecS shift) %{ 8124 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 8125 match(Set dst (URShiftVI src shift)); 8126 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} 8127 ins_encode %{ 8128 int vector_len = 1; 8129 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8130 %} 8131 ins_pipe( pipe_slow ); 8132 %} 8133 8134 instruct vsrl8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 8135 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 8136 match(Set dst (URShiftVI src shift)); 8137 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} 8138 ins_encode %{ 8139 int vector_len = 1; 8140 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8141 %} 8142 ins_pipe( pipe_slow ); 8143 %} 8144 8145 instruct vsrl16I_reg(vecZ dst, vecZ src, vecS shift) %{ 8146 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8147 match(Set dst (URShiftVI src shift)); 8148 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed16I" %} 8149 ins_encode %{ 8150 int vector_len = 2; 8151 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8152 %} 8153 ins_pipe( pipe_slow ); 8154 %} 8155 8156 instruct vsrl16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 8157 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8158 match(Set dst (URShiftVI src shift)); 8159 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed16I" %} 8160 ins_encode %{ 8161 int vector_len = 2; 8162 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8163 %} 8164 ins_pipe( pipe_slow ); 8165 %} 8166 8167 // Longs vector logical right shift 8168 instruct vsrl2L(vecX dst, vecS shift) %{ 8169 predicate(n->as_Vector()->length() == 2); 8170 match(Set dst (URShiftVL dst shift)); 8171 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} 8172 ins_encode %{ 8173 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister); 8174 %} 8175 ins_pipe( pipe_slow ); 8176 %} 8177 8178 instruct vsrl2L_imm(vecX dst, immI8 shift) %{ 8179 predicate(n->as_Vector()->length() == 2); 8180 match(Set dst (URShiftVL dst shift)); 8181 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} 8182 ins_encode %{ 8183 __ psrlq($dst$$XMMRegister, (int)$shift$$constant); 8184 %} 8185 ins_pipe( pipe_slow ); 8186 %} 8187 8188 instruct vsrl2L_reg(vecX dst, vecX src, vecS shift) %{ 8189 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8190 match(Set dst (URShiftVL src shift)); 8191 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} 8192 ins_encode %{ 8193 int vector_len = 0; 8194 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8195 %} 8196 ins_pipe( pipe_slow ); 8197 %} 8198 8199 instruct vsrl2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ 8200 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8201 match(Set dst (URShiftVL src shift)); 8202 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} 8203 ins_encode %{ 8204 int vector_len = 0; 8205 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8206 %} 8207 ins_pipe( pipe_slow ); 8208 %} 8209 8210 instruct vsrl4L_reg(vecY dst, vecY src, vecS shift) %{ 8211 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 8212 match(Set dst (URShiftVL src shift)); 8213 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} 8214 ins_encode %{ 8215 int vector_len = 1; 8216 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8217 %} 8218 ins_pipe( pipe_slow ); 8219 %} 8220 8221 instruct vsrl4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ 8222 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 8223 match(Set dst (URShiftVL src shift)); 8224 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} 8225 ins_encode %{ 8226 int vector_len = 1; 8227 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8228 %} 8229 ins_pipe( pipe_slow ); 8230 %} 8231 8232 instruct vsrl8L_reg(vecZ dst, vecZ src, vecS shift) %{ 8233 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8234 match(Set dst (URShiftVL src shift)); 8235 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed8L" %} 8236 ins_encode %{ 8237 int vector_len = 2; 8238 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8239 %} 8240 ins_pipe( pipe_slow ); 8241 %} 8242 8243 instruct vsrl8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 8244 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8245 match(Set dst (URShiftVL src shift)); 8246 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed8L" %} 8247 ins_encode %{ 8248 int vector_len = 2; 8249 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8250 %} 8251 ins_pipe( pipe_slow ); 8252 %} 8253 8254 // ------------------- ArithmeticRightShift ----------------------------------- 8255 8256 // Shorts/Chars vector arithmetic right shift 8257 instruct vsra2S(vecS dst, vecS shift) %{ 8258 predicate(n->as_Vector()->length() == 2); 8259 match(Set dst (RShiftVS dst shift)); 8260 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} 8261 ins_encode %{ 8262 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 8263 %} 8264 ins_pipe( pipe_slow ); 8265 %} 8266 8267 instruct vsra2S_imm(vecS dst, immI8 shift) %{ 8268 predicate(n->as_Vector()->length() == 2); 8269 match(Set dst (RShiftVS dst shift)); 8270 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} 8271 ins_encode %{ 8272 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 8273 %} 8274 ins_pipe( pipe_slow ); 8275 %} 8276 8277 instruct vsra2S_reg(vecS dst, vecS src, vecS shift) %{ 8278 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8279 match(Set dst (RShiftVS src shift)); 8280 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 8281 ins_encode %{ 8282 int vector_len = 0; 8283 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8284 %} 8285 ins_pipe( pipe_slow ); 8286 %} 8287 8288 instruct vsra2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ 8289 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8290 match(Set dst (RShiftVS src shift)); 8291 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 8292 ins_encode %{ 8293 int vector_len = 0; 8294 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8295 %} 8296 ins_pipe( pipe_slow ); 8297 %} 8298 8299 instruct vsra4S(vecD dst, vecS shift) %{ 8300 predicate(n->as_Vector()->length() == 4); 8301 match(Set dst (RShiftVS dst shift)); 8302 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} 8303 ins_encode %{ 8304 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 8305 %} 8306 ins_pipe( pipe_slow ); 8307 %} 8308 8309 instruct vsra4S_imm(vecD dst, immI8 shift) %{ 8310 predicate(n->as_Vector()->length() == 4); 8311 match(Set dst (RShiftVS dst shift)); 8312 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} 8313 ins_encode %{ 8314 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 8315 %} 8316 ins_pipe( pipe_slow ); 8317 %} 8318 8319 instruct vsra4S_reg(vecD dst, vecD src, vecS shift) %{ 8320 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8321 match(Set dst (RShiftVS src shift)); 8322 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 8323 ins_encode %{ 8324 int vector_len = 0; 8325 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8326 %} 8327 ins_pipe( pipe_slow ); 8328 %} 8329 8330 instruct vsra4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ 8331 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8332 match(Set dst (RShiftVS src shift)); 8333 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 8334 ins_encode %{ 8335 int vector_len = 0; 8336 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8337 %} 8338 ins_pipe( pipe_slow ); 8339 %} 8340 8341 instruct vsra8S(vecX dst, vecS shift) %{ 8342 predicate(n->as_Vector()->length() == 8); 8343 match(Set dst (RShiftVS dst shift)); 8344 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} 8345 ins_encode %{ 8346 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 8347 %} 8348 ins_pipe( pipe_slow ); 8349 %} 8350 8351 instruct vsra8S_imm(vecX dst, immI8 shift) %{ 8352 predicate(n->as_Vector()->length() == 8); 8353 match(Set dst (RShiftVS dst shift)); 8354 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} 8355 ins_encode %{ 8356 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 8357 %} 8358 ins_pipe( pipe_slow ); 8359 %} 8360 8361 instruct vsra8S_reg(vecX dst, vecX src, vecS shift) %{ 8362 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8363 match(Set dst (RShiftVS src shift)); 8364 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 8365 ins_encode %{ 8366 int vector_len = 0; 8367 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8368 %} 8369 ins_pipe( pipe_slow ); 8370 %} 8371 8372 instruct vsra8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ 8373 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8374 match(Set dst (RShiftVS src shift)); 8375 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 8376 ins_encode %{ 8377 int vector_len = 0; 8378 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8379 %} 8380 ins_pipe( pipe_slow ); 8381 %} 8382 8383 instruct vsra16S_reg(vecY dst, vecY src, vecS shift) %{ 8384 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 8385 match(Set dst (RShiftVS src shift)); 8386 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 8387 ins_encode %{ 8388 int vector_len = 1; 8389 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8390 %} 8391 ins_pipe( pipe_slow ); 8392 %} 8393 8394 instruct vsra16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ 8395 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 8396 match(Set dst (RShiftVS src shift)); 8397 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 8398 ins_encode %{ 8399 int vector_len = 1; 8400 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8401 %} 8402 ins_pipe( pipe_slow ); 8403 %} 8404 8405 instruct vsra32S_reg(vecZ dst, vecZ src, vecS shift) %{ 8406 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); 8407 match(Set dst (RShiftVS src shift)); 8408 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %} 8409 ins_encode %{ 8410 int vector_len = 2; 8411 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8412 %} 8413 ins_pipe( pipe_slow ); 8414 %} 8415 8416 instruct vsra32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 8417 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); 8418 match(Set dst (RShiftVS src shift)); 8419 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %} 8420 ins_encode %{ 8421 int vector_len = 2; 8422 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8423 %} 8424 ins_pipe( pipe_slow ); 8425 %} 8426 8427 // Integers vector arithmetic right shift 8428 instruct vsra2I(vecD dst, vecS shift) %{ 8429 predicate(n->as_Vector()->length() == 2); 8430 match(Set dst (RShiftVI dst shift)); 8431 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} 8432 ins_encode %{ 8433 __ psrad($dst$$XMMRegister, $shift$$XMMRegister); 8434 %} 8435 ins_pipe( pipe_slow ); 8436 %} 8437 8438 instruct vsra2I_imm(vecD dst, immI8 shift) %{ 8439 predicate(n->as_Vector()->length() == 2); 8440 match(Set dst (RShiftVI dst shift)); 8441 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} 8442 ins_encode %{ 8443 __ psrad($dst$$XMMRegister, (int)$shift$$constant); 8444 %} 8445 ins_pipe( pipe_slow ); 8446 %} 8447 8448 instruct vsra2I_reg(vecD dst, vecD src, vecS shift) %{ 8449 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8450 match(Set dst (RShiftVI src shift)); 8451 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} 8452 ins_encode %{ 8453 int vector_len = 0; 8454 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8455 %} 8456 ins_pipe( pipe_slow ); 8457 %} 8458 8459 instruct vsra2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 8460 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8461 match(Set dst (RShiftVI src shift)); 8462 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} 8463 ins_encode %{ 8464 int vector_len = 0; 8465 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8466 %} 8467 ins_pipe( pipe_slow ); 8468 %} 8469 8470 instruct vsra4I(vecX dst, vecS shift) %{ 8471 predicate(n->as_Vector()->length() == 4); 8472 match(Set dst (RShiftVI dst shift)); 8473 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} 8474 ins_encode %{ 8475 __ psrad($dst$$XMMRegister, $shift$$XMMRegister); 8476 %} 8477 ins_pipe( pipe_slow ); 8478 %} 8479 8480 instruct vsra4I_imm(vecX dst, immI8 shift) %{ 8481 predicate(n->as_Vector()->length() == 4); 8482 match(Set dst (RShiftVI dst shift)); 8483 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} 8484 ins_encode %{ 8485 __ psrad($dst$$XMMRegister, (int)$shift$$constant); 8486 %} 8487 ins_pipe( pipe_slow ); 8488 %} 8489 8490 instruct vsra4I_reg(vecX dst, vecX src, vecS shift) %{ 8491 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8492 match(Set dst (RShiftVI src shift)); 8493 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} 8494 ins_encode %{ 8495 int vector_len = 0; 8496 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8497 %} 8498 ins_pipe( pipe_slow ); 8499 %} 8500 8501 instruct vsra4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 8502 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8503 match(Set dst (RShiftVI src shift)); 8504 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} 8505 ins_encode %{ 8506 int vector_len = 0; 8507 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8508 %} 8509 ins_pipe( pipe_slow ); 8510 %} 8511 8512 instruct vsra8I_reg(vecY dst, vecY src, vecS shift) %{ 8513 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 8514 match(Set dst (RShiftVI src shift)); 8515 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} 8516 ins_encode %{ 8517 int vector_len = 1; 8518 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8519 %} 8520 ins_pipe( pipe_slow ); 8521 %} 8522 8523 instruct vsra8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 8524 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 8525 match(Set dst (RShiftVI src shift)); 8526 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} 8527 ins_encode %{ 8528 int vector_len = 1; 8529 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8530 %} 8531 ins_pipe( pipe_slow ); 8532 %} 8533 8534 instruct vsra16I_reg(vecZ dst, vecZ src, vecS shift) %{ 8535 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8536 match(Set dst (RShiftVI src shift)); 8537 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed16I" %} 8538 ins_encode %{ 8539 int vector_len = 2; 8540 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8541 %} 8542 ins_pipe( pipe_slow ); 8543 %} 8544 8545 instruct vsra16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 8546 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8547 match(Set dst (RShiftVI src shift)); 8548 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed16I" %} 8549 ins_encode %{ 8550 int vector_len = 2; 8551 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8552 %} 8553 ins_pipe( pipe_slow ); 8554 %} 8555 8556 // There are no longs vector arithmetic right shift instructions. 8557 8558 8559 // --------------------------------- AND -------------------------------------- 8560 8561 instruct vand4B(vecS dst, vecS src) %{ 8562 predicate(n->as_Vector()->length_in_bytes() == 4); 8563 match(Set dst (AndV dst src)); 8564 format %{ "pand $dst,$src\t! and vectors (4 bytes)" %} 8565 ins_encode %{ 8566 __ pand($dst$$XMMRegister, $src$$XMMRegister); 8567 %} 8568 ins_pipe( pipe_slow ); 8569 %} 8570 8571 instruct vand4B_reg(vecS dst, vecS src1, vecS src2) %{ 8572 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 8573 match(Set dst (AndV src1 src2)); 8574 format %{ "vpand $dst,$src1,$src2\t! and vectors (4 bytes)" %} 8575 ins_encode %{ 8576 int vector_len = 0; 8577 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8578 %} 8579 ins_pipe( pipe_slow ); 8580 %} 8581 8582 instruct vand4B_mem(vecS dst, vecS src, memory mem) %{ 8583 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 8584 match(Set dst (AndV src (LoadVector mem))); 8585 format %{ "vpand $dst,$src,$mem\t! and vectors (4 bytes)" %} 8586 ins_encode %{ 8587 int vector_len = 0; 8588 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8589 %} 8590 ins_pipe( pipe_slow ); 8591 %} 8592 8593 instruct vand8B(vecD dst, vecD src) %{ 8594 predicate(n->as_Vector()->length_in_bytes() == 8); 8595 match(Set dst (AndV dst src)); 8596 format %{ "pand $dst,$src\t! and vectors (8 bytes)" %} 8597 ins_encode %{ 8598 __ pand($dst$$XMMRegister, $src$$XMMRegister); 8599 %} 8600 ins_pipe( pipe_slow ); 8601 %} 8602 8603 instruct vand8B_reg(vecD dst, vecD src1, vecD src2) %{ 8604 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 8605 match(Set dst (AndV src1 src2)); 8606 format %{ "vpand $dst,$src1,$src2\t! and vectors (8 bytes)" %} 8607 ins_encode %{ 8608 int vector_len = 0; 8609 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8610 %} 8611 ins_pipe( pipe_slow ); 8612 %} 8613 8614 instruct vand8B_mem(vecD dst, vecD src, memory mem) %{ 8615 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 8616 match(Set dst (AndV src (LoadVector mem))); 8617 format %{ "vpand $dst,$src,$mem\t! and vectors (8 bytes)" %} 8618 ins_encode %{ 8619 int vector_len = 0; 8620 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8621 %} 8622 ins_pipe( pipe_slow ); 8623 %} 8624 8625 instruct vand16B(vecX dst, vecX src) %{ 8626 predicate(n->as_Vector()->length_in_bytes() == 16); 8627 match(Set dst (AndV dst src)); 8628 format %{ "pand $dst,$src\t! and vectors (16 bytes)" %} 8629 ins_encode %{ 8630 __ pand($dst$$XMMRegister, $src$$XMMRegister); 8631 %} 8632 ins_pipe( pipe_slow ); 8633 %} 8634 8635 instruct vand16B_reg(vecX dst, vecX src1, vecX src2) %{ 8636 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 8637 match(Set dst (AndV src1 src2)); 8638 format %{ "vpand $dst,$src1,$src2\t! and vectors (16 bytes)" %} 8639 ins_encode %{ 8640 int vector_len = 0; 8641 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8642 %} 8643 ins_pipe( pipe_slow ); 8644 %} 8645 8646 instruct vand16B_mem(vecX dst, vecX src, memory mem) %{ 8647 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 8648 match(Set dst (AndV src (LoadVector mem))); 8649 format %{ "vpand $dst,$src,$mem\t! and vectors (16 bytes)" %} 8650 ins_encode %{ 8651 int vector_len = 0; 8652 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8653 %} 8654 ins_pipe( pipe_slow ); 8655 %} 8656 8657 instruct vand32B_reg(vecY dst, vecY src1, vecY src2) %{ 8658 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 8659 match(Set dst (AndV src1 src2)); 8660 format %{ "vpand $dst,$src1,$src2\t! and vectors (32 bytes)" %} 8661 ins_encode %{ 8662 int vector_len = 1; 8663 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8664 %} 8665 ins_pipe( pipe_slow ); 8666 %} 8667 8668 instruct vand32B_mem(vecY dst, vecY src, memory mem) %{ 8669 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 8670 match(Set dst (AndV src (LoadVector mem))); 8671 format %{ "vpand $dst,$src,$mem\t! and vectors (32 bytes)" %} 8672 ins_encode %{ 8673 int vector_len = 1; 8674 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8675 %} 8676 ins_pipe( pipe_slow ); 8677 %} 8678 8679 instruct vand64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8680 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 8681 match(Set dst (AndV src1 src2)); 8682 format %{ "vpand $dst,$src1,$src2\t! and vectors (64 bytes)" %} 8683 ins_encode %{ 8684 int vector_len = 2; 8685 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8686 %} 8687 ins_pipe( pipe_slow ); 8688 %} 8689 8690 instruct vand64B_mem(vecZ dst, vecZ src, memory mem) %{ 8691 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 8692 match(Set dst (AndV src (LoadVector mem))); 8693 format %{ "vpand $dst,$src,$mem\t! and vectors (64 bytes)" %} 8694 ins_encode %{ 8695 int vector_len = 2; 8696 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8697 %} 8698 ins_pipe( pipe_slow ); 8699 %} 8700 8701 // --------------------------------- OR --------------------------------------- 8702 8703 instruct vor4B(vecS dst, vecS src) %{ 8704 predicate(n->as_Vector()->length_in_bytes() == 4); 8705 match(Set dst (OrV dst src)); 8706 format %{ "por $dst,$src\t! or vectors (4 bytes)" %} 8707 ins_encode %{ 8708 __ por($dst$$XMMRegister, $src$$XMMRegister); 8709 %} 8710 ins_pipe( pipe_slow ); 8711 %} 8712 8713 instruct vor4B_reg(vecS dst, vecS src1, vecS src2) %{ 8714 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 8715 match(Set dst (OrV src1 src2)); 8716 format %{ "vpor $dst,$src1,$src2\t! or vectors (4 bytes)" %} 8717 ins_encode %{ 8718 int vector_len = 0; 8719 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8720 %} 8721 ins_pipe( pipe_slow ); 8722 %} 8723 8724 instruct vor4B_mem(vecS dst, vecS src, memory mem) %{ 8725 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 8726 match(Set dst (OrV src (LoadVector mem))); 8727 format %{ "vpor $dst,$src,$mem\t! or vectors (4 bytes)" %} 8728 ins_encode %{ 8729 int vector_len = 0; 8730 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8731 %} 8732 ins_pipe( pipe_slow ); 8733 %} 8734 8735 instruct vor8B(vecD dst, vecD src) %{ 8736 predicate(n->as_Vector()->length_in_bytes() == 8); 8737 match(Set dst (OrV dst src)); 8738 format %{ "por $dst,$src\t! or vectors (8 bytes)" %} 8739 ins_encode %{ 8740 __ por($dst$$XMMRegister, $src$$XMMRegister); 8741 %} 8742 ins_pipe( pipe_slow ); 8743 %} 8744 8745 instruct vor8B_reg(vecD dst, vecD src1, vecD src2) %{ 8746 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 8747 match(Set dst (OrV src1 src2)); 8748 format %{ "vpor $dst,$src1,$src2\t! or vectors (8 bytes)" %} 8749 ins_encode %{ 8750 int vector_len = 0; 8751 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8752 %} 8753 ins_pipe( pipe_slow ); 8754 %} 8755 8756 instruct vor8B_mem(vecD dst, vecD src, memory mem) %{ 8757 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 8758 match(Set dst (OrV src (LoadVector mem))); 8759 format %{ "vpor $dst,$src,$mem\t! or vectors (8 bytes)" %} 8760 ins_encode %{ 8761 int vector_len = 0; 8762 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8763 %} 8764 ins_pipe( pipe_slow ); 8765 %} 8766 8767 instruct vor16B(vecX dst, vecX src) %{ 8768 predicate(n->as_Vector()->length_in_bytes() == 16); 8769 match(Set dst (OrV dst src)); 8770 format %{ "por $dst,$src\t! or vectors (16 bytes)" %} 8771 ins_encode %{ 8772 __ por($dst$$XMMRegister, $src$$XMMRegister); 8773 %} 8774 ins_pipe( pipe_slow ); 8775 %} 8776 8777 instruct vor16B_reg(vecX dst, vecX src1, vecX src2) %{ 8778 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 8779 match(Set dst (OrV src1 src2)); 8780 format %{ "vpor $dst,$src1,$src2\t! or vectors (16 bytes)" %} 8781 ins_encode %{ 8782 int vector_len = 0; 8783 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8784 %} 8785 ins_pipe( pipe_slow ); 8786 %} 8787 8788 instruct vor16B_mem(vecX dst, vecX src, memory mem) %{ 8789 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 8790 match(Set dst (OrV src (LoadVector mem))); 8791 format %{ "vpor $dst,$src,$mem\t! or vectors (16 bytes)" %} 8792 ins_encode %{ 8793 int vector_len = 0; 8794 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8795 %} 8796 ins_pipe( pipe_slow ); 8797 %} 8798 8799 instruct vor32B_reg(vecY dst, vecY src1, vecY src2) %{ 8800 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 8801 match(Set dst (OrV src1 src2)); 8802 format %{ "vpor $dst,$src1,$src2\t! or vectors (32 bytes)" %} 8803 ins_encode %{ 8804 int vector_len = 1; 8805 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8806 %} 8807 ins_pipe( pipe_slow ); 8808 %} 8809 8810 instruct vor32B_mem(vecY dst, vecY src, memory mem) %{ 8811 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 8812 match(Set dst (OrV src (LoadVector mem))); 8813 format %{ "vpor $dst,$src,$mem\t! or vectors (32 bytes)" %} 8814 ins_encode %{ 8815 int vector_len = 1; 8816 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8817 %} 8818 ins_pipe( pipe_slow ); 8819 %} 8820 8821 instruct vor64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8822 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 8823 match(Set dst (OrV src1 src2)); 8824 format %{ "vpor $dst,$src1,$src2\t! or vectors (64 bytes)" %} 8825 ins_encode %{ 8826 int vector_len = 2; 8827 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8828 %} 8829 ins_pipe( pipe_slow ); 8830 %} 8831 8832 instruct vor64B_mem(vecZ dst, vecZ src, memory mem) %{ 8833 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 8834 match(Set dst (OrV src (LoadVector mem))); 8835 format %{ "vpor $dst,$src,$mem\t! or vectors (64 bytes)" %} 8836 ins_encode %{ 8837 int vector_len = 2; 8838 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8839 %} 8840 ins_pipe( pipe_slow ); 8841 %} 8842 8843 // --------------------------------- XOR -------------------------------------- 8844 8845 instruct vxor4B(vecS dst, vecS src) %{ 8846 predicate(n->as_Vector()->length_in_bytes() == 4); 8847 match(Set dst (XorV dst src)); 8848 format %{ "pxor $dst,$src\t! xor vectors (4 bytes)" %} 8849 ins_encode %{ 8850 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 8851 %} 8852 ins_pipe( pipe_slow ); 8853 %} 8854 8855 instruct vxor4B_reg(vecS dst, vecS src1, vecS src2) %{ 8856 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 8857 match(Set dst (XorV src1 src2)); 8858 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (4 bytes)" %} 8859 ins_encode %{ 8860 int vector_len = 0; 8861 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8862 %} 8863 ins_pipe( pipe_slow ); 8864 %} 8865 8866 instruct vxor4B_mem(vecS dst, vecS src, memory mem) %{ 8867 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 8868 match(Set dst (XorV src (LoadVector mem))); 8869 format %{ "vpxor $dst,$src,$mem\t! xor vectors (4 bytes)" %} 8870 ins_encode %{ 8871 int vector_len = 0; 8872 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8873 %} 8874 ins_pipe( pipe_slow ); 8875 %} 8876 8877 instruct vxor8B(vecD dst, vecD src) %{ 8878 predicate(n->as_Vector()->length_in_bytes() == 8); 8879 match(Set dst (XorV dst src)); 8880 format %{ "pxor $dst,$src\t! xor vectors (8 bytes)" %} 8881 ins_encode %{ 8882 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 8883 %} 8884 ins_pipe( pipe_slow ); 8885 %} 8886 8887 instruct vxor8B_reg(vecD dst, vecD src1, vecD src2) %{ 8888 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 8889 match(Set dst (XorV src1 src2)); 8890 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (8 bytes)" %} 8891 ins_encode %{ 8892 int vector_len = 0; 8893 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8894 %} 8895 ins_pipe( pipe_slow ); 8896 %} 8897 8898 instruct vxor8B_mem(vecD dst, vecD src, memory mem) %{ 8899 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 8900 match(Set dst (XorV src (LoadVector mem))); 8901 format %{ "vpxor $dst,$src,$mem\t! xor vectors (8 bytes)" %} 8902 ins_encode %{ 8903 int vector_len = 0; 8904 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8905 %} 8906 ins_pipe( pipe_slow ); 8907 %} 8908 8909 instruct vxor16B(vecX dst, vecX src) %{ 8910 predicate(n->as_Vector()->length_in_bytes() == 16); 8911 match(Set dst (XorV dst src)); 8912 format %{ "pxor $dst,$src\t! xor vectors (16 bytes)" %} 8913 ins_encode %{ 8914 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 8915 %} 8916 ins_pipe( pipe_slow ); 8917 %} 8918 8919 instruct vxor16B_reg(vecX dst, vecX src1, vecX src2) %{ 8920 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 8921 match(Set dst (XorV src1 src2)); 8922 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (16 bytes)" %} 8923 ins_encode %{ 8924 int vector_len = 0; 8925 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8926 %} 8927 ins_pipe( pipe_slow ); 8928 %} 8929 8930 instruct vxor16B_mem(vecX dst, vecX src, memory mem) %{ 8931 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 8932 match(Set dst (XorV src (LoadVector mem))); 8933 format %{ "vpxor $dst,$src,$mem\t! xor vectors (16 bytes)" %} 8934 ins_encode %{ 8935 int vector_len = 0; 8936 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8937 %} 8938 ins_pipe( pipe_slow ); 8939 %} 8940 8941 instruct vxor32B_reg(vecY dst, vecY src1, vecY src2) %{ 8942 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 8943 match(Set dst (XorV src1 src2)); 8944 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (32 bytes)" %} 8945 ins_encode %{ 8946 int vector_len = 1; 8947 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8948 %} 8949 ins_pipe( pipe_slow ); 8950 %} 8951 8952 instruct vxor32B_mem(vecY dst, vecY src, memory mem) %{ 8953 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 8954 match(Set dst (XorV src (LoadVector mem))); 8955 format %{ "vpxor $dst,$src,$mem\t! xor vectors (32 bytes)" %} 8956 ins_encode %{ 8957 int vector_len = 1; 8958 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8959 %} 8960 ins_pipe( pipe_slow ); 8961 %} 8962 8963 instruct vxor64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8964 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 8965 match(Set dst (XorV src1 src2)); 8966 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (64 bytes)" %} 8967 ins_encode %{ 8968 int vector_len = 2; 8969 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8970 %} 8971 ins_pipe( pipe_slow ); 8972 %} 8973 8974 instruct vxor64B_mem(vecZ dst, vecZ src, memory mem) %{ 8975 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 8976 match(Set dst (XorV src (LoadVector mem))); 8977 format %{ "vpxor $dst,$src,$mem\t! xor vectors (64 bytes)" %} 8978 ins_encode %{ 8979 int vector_len = 2; 8980 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8981 %} 8982 ins_pipe( pipe_slow ); 8983 %} 8984