1 // 2 // Copyright (c) 2011, 2014, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Common Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // archtecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // XMM registers. 512-bit registers or 8 words each, labeled (a)-p. 63 // Word a in each register holds a Float, words ab hold a Double. 64 // The whole registers are used in SSE4.2 version intrinsics, 65 // array copy stubs and superword operations (see UseSSE42Intrinsics, 66 // UseXMMForArrayCopy and UseSuperword flags). 67 // For pre EVEX enabled architectures: 68 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX) 69 // For EVEX enabled architectures: 70 // XMM8-XMM31 must be encoded with REX (EVEX for UseAVX). 71 // 72 // Linux ABI: No register preserved across function calls 73 // XMM0-XMM7 might hold parameters 74 // Windows ABI: XMM6-XMM31 preserved across function calls 75 // XMM0-XMM3 might hold parameters 76 77 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()); 78 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1)); 79 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2)); 80 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3)); 81 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4)); 82 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5)); 83 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6)); 84 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7)); 85 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8)); 86 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9)); 87 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10)); 88 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11)); 89 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12)); 90 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13)); 91 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14)); 92 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15)); 93 94 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()); 95 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1)); 96 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2)); 97 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3)); 98 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4)); 99 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5)); 100 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6)); 101 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7)); 102 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8)); 103 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9)); 104 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10)); 105 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11)); 106 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12)); 107 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13)); 108 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14)); 109 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15)); 110 111 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()); 112 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1)); 113 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2)); 114 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3)); 115 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4)); 116 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5)); 117 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6)); 118 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7)); 119 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8)); 120 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9)); 121 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10)); 122 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11)); 123 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12)); 124 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13)); 125 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14)); 126 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15)); 127 128 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()); 129 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1)); 130 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2)); 131 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3)); 132 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4)); 133 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5)); 134 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6)); 135 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7)); 136 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8)); 137 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9)); 138 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10)); 139 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11)); 140 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12)); 141 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13)); 142 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14)); 143 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15)); 144 145 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()); 146 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1)); 147 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2)); 148 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3)); 149 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4)); 150 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5)); 151 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6)); 152 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7)); 153 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8)); 154 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9)); 155 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10)); 156 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11)); 157 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12)); 158 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13)); 159 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14)); 160 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15)); 161 162 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()); 163 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1)); 164 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2)); 165 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3)); 166 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4)); 167 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5)); 168 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6)); 169 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7)); 170 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8)); 171 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9)); 172 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10)); 173 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11)); 174 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12)); 175 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13)); 176 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14)); 177 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15)); 178 179 #ifdef _WIN64 180 181 reg_def XMM6 ( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()); 182 reg_def XMM6b( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(1)); 183 reg_def XMM6c( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(2)); 184 reg_def XMM6d( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(3)); 185 reg_def XMM6e( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(4)); 186 reg_def XMM6f( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(5)); 187 reg_def XMM6g( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(6)); 188 reg_def XMM6h( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(7)); 189 reg_def XMM6i( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(8)); 190 reg_def XMM6j( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(9)); 191 reg_def XMM6k( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(10)); 192 reg_def XMM6l( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(11)); 193 reg_def XMM6m( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(12)); 194 reg_def XMM6n( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(13)); 195 reg_def XMM6o( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(14)); 196 reg_def XMM6p( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(15)); 197 198 reg_def XMM7 ( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()); 199 reg_def XMM7b( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(1)); 200 reg_def XMM7c( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(2)); 201 reg_def XMM7d( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(3)); 202 reg_def XMM7e( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(4)); 203 reg_def XMM7f( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(5)); 204 reg_def XMM7g( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(6)); 205 reg_def XMM7h( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(7)); 206 reg_def XMM7i( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(8)); 207 reg_def XMM7j( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(9)); 208 reg_def XMM7k( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(10)); 209 reg_def XMM7l( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(11)); 210 reg_def XMM7m( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(12)); 211 reg_def XMM7n( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(13)); 212 reg_def XMM7o( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(14)); 213 reg_def XMM7p( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(15)); 214 215 reg_def XMM8 ( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()); 216 reg_def XMM8b( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(1)); 217 reg_def XMM8c( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(2)); 218 reg_def XMM8d( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(3)); 219 reg_def XMM8e( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(4)); 220 reg_def XMM8f( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(5)); 221 reg_def XMM8g( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(6)); 222 reg_def XMM8h( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(7)); 223 reg_def XMM8i( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(8)); 224 reg_def XMM8j( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(9)); 225 reg_def XMM8k( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(10)); 226 reg_def XMM8l( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(11)); 227 reg_def XMM8m( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(12)); 228 reg_def XMM8n( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(13)); 229 reg_def XMM8o( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(14)); 230 reg_def XMM8p( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(15)); 231 232 reg_def XMM9 ( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()); 233 reg_def XMM9b( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(1)); 234 reg_def XMM9c( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(2)); 235 reg_def XMM9d( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(3)); 236 reg_def XMM9e( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(4)); 237 reg_def XMM9f( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(5)); 238 reg_def XMM9g( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(6)); 239 reg_def XMM9h( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(7)); 240 reg_def XMM9i( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(8)); 241 reg_def XMM9j( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(9)); 242 reg_def XMM9k( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(10)); 243 reg_def XMM9l( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(11)); 244 reg_def XMM9m( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(12)); 245 reg_def XMM9n( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(13)); 246 reg_def XMM9o( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(14)); 247 reg_def XMM9p( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(15)); 248 249 reg_def XMM10 ( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()); 250 reg_def XMM10b( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(1)); 251 reg_def XMM10c( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(2)); 252 reg_def XMM10d( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(3)); 253 reg_def XMM10e( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(4)); 254 reg_def XMM10f( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(5)); 255 reg_def XMM10g( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(6)); 256 reg_def XMM10h( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(7)); 257 reg_def XMM10i( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(8)); 258 reg_def XMM10j( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(9)); 259 reg_def XMM10k( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(10)); 260 reg_def XMM10l( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(11)); 261 reg_def XMM10m( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(12)); 262 reg_def XMM10n( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(13)); 263 reg_def XMM10o( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(14)); 264 reg_def XMM10p( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(15)); 265 266 reg_def XMM11 ( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()); 267 reg_def XMM11b( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(1)); 268 reg_def XMM11c( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(2)); 269 reg_def XMM11d( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(3)); 270 reg_def XMM11e( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(4)); 271 reg_def XMM11f( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(5)); 272 reg_def XMM11g( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(6)); 273 reg_def XMM11h( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(7)); 274 reg_def XMM11i( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(8)); 275 reg_def XMM11j( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(9)); 276 reg_def XMM11k( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(10)); 277 reg_def XMM11l( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(11)); 278 reg_def XMM11m( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(12)); 279 reg_def XMM11n( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(13)); 280 reg_def XMM11o( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(14)); 281 reg_def XMM11p( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(15)); 282 283 reg_def XMM12 ( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()); 284 reg_def XMM12b( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(1)); 285 reg_def XMM12c( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(2)); 286 reg_def XMM12d( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(3)); 287 reg_def XMM12e( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(4)); 288 reg_def XMM12f( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(5)); 289 reg_def XMM12g( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(6)); 290 reg_def XMM12h( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(7)); 291 reg_def XMM12i( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(8)); 292 reg_def XMM12j( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(9)); 293 reg_def XMM12k( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(10)); 294 reg_def XMM12l( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(11)); 295 reg_def XMM12m( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(12)); 296 reg_def XMM12n( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(13)); 297 reg_def XMM12o( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(14)); 298 reg_def XMM12p( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(15)); 299 300 reg_def XMM13 ( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()); 301 reg_def XMM13b( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(1)); 302 reg_def XMM13c( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(2)); 303 reg_def XMM13d( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(3)); 304 reg_def XMM13e( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(4)); 305 reg_def XMM13f( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(5)); 306 reg_def XMM13g( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(6)); 307 reg_def XMM13h( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(7)); 308 reg_def XMM13i( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(8)); 309 reg_def XMM13j( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(9)); 310 reg_def XMM13k( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(10)); 311 reg_def XMM13l( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(11)); 312 reg_def XMM13m( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(12)); 313 reg_def XMM13n( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(13)); 314 reg_def XMM13o( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(14)); 315 reg_def XMM13p( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(15)); 316 317 reg_def XMM14 ( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()); 318 reg_def XMM14b( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(1)); 319 reg_def XMM14c( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(2)); 320 reg_def XMM14d( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(3)); 321 reg_def XMM14e( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(4)); 322 reg_def XMM14f( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(5)); 323 reg_def XMM14g( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(6)); 324 reg_def XMM14h( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(7)); 325 reg_def XMM14i( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(8)); 326 reg_def XMM14j( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(9)); 327 reg_def XMM14k( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(10)); 328 reg_def XMM14l( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(11)); 329 reg_def XMM14m( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(12)); 330 reg_def XMM14n( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(13)); 331 reg_def XMM14o( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(14)); 332 reg_def XMM14p( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(15)); 333 334 reg_def XMM15 ( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()); 335 reg_def XMM15b( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(1)); 336 reg_def XMM15c( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(2)); 337 reg_def XMM15d( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(3)); 338 reg_def XMM15e( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(4)); 339 reg_def XMM15f( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(5)); 340 reg_def XMM15g( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(6)); 341 reg_def XMM15h( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(7)); 342 reg_def XMM15i( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(8)); 343 reg_def XMM15j( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(9)); 344 reg_def XMM15k( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(10)); 345 reg_def XMM15l( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(11)); 346 reg_def XMM15m( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(12)); 347 reg_def XMM15n( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(13)); 348 reg_def XMM15o( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(14)); 349 reg_def XMM15p( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(15)); 350 351 reg_def XMM16 ( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()); 352 reg_def XMM16b( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(1)); 353 reg_def XMM16c( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(2)); 354 reg_def XMM16d( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(3)); 355 reg_def XMM16e( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(4)); 356 reg_def XMM16f( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(5)); 357 reg_def XMM16g( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(6)); 358 reg_def XMM16h( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(7)); 359 reg_def XMM16i( SOC, SOE, Op_RegF, 16, xmm15->as_VMReg()->next(8)); 360 reg_def XMM16j( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(9)); 361 reg_def XMM16k( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(10)); 362 reg_def XMM16l( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(11)); 363 reg_def XMM16m( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(12)); 364 reg_def XMM16n( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(13)); 365 reg_def XMM16o( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(14)); 366 reg_def XMM16p( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(15)); 367 368 reg_def XMM17 ( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()); 369 reg_def XMM17b( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(1)); 370 reg_def XMM17c( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(2)); 371 reg_def XMM17d( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(3)); 372 reg_def XMM17e( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(4)); 373 reg_def XMM17f( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(5)); 374 reg_def XMM17g( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(6)); 375 reg_def XMM17h( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(7)); 376 reg_def XMM17i( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(8)); 377 reg_def XMM17j( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(9)); 378 reg_def XMM17k( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(10)); 379 reg_def XMM17l( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(11)); 380 reg_def XMM17m( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(12)); 381 reg_def XMM17n( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(13)); 382 reg_def XMM17o( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(14)); 383 reg_def XMM17p( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(15)); 384 385 reg_def XMM18 ( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()); 386 reg_def XMM18b( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(1)); 387 reg_def XMM18c( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(2)); 388 reg_def XMM18d( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(3)); 389 reg_def XMM18e( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(4)); 390 reg_def XMM18f( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(5)); 391 reg_def XMM18g( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(6)); 392 reg_def XMM18h( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(7)); 393 reg_def XMM18i( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(8)); 394 reg_def XMM18j( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(9)); 395 reg_def XMM18k( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(10)); 396 reg_def XMM18l( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(11)); 397 reg_def XMM18m( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(12)); 398 reg_def XMM18n( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(13)); 399 reg_def XMM18o( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(14)); 400 reg_def XMM18p( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(15)); 401 402 reg_def XMM19 ( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()); 403 reg_def XMM19b( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(1)); 404 reg_def XMM19c( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(2)); 405 reg_def XMM19d( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(3)); 406 reg_def XMM19e( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(4)); 407 reg_def XMM19f( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(5)); 408 reg_def XMM19g( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(6)); 409 reg_def XMM19h( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(7)); 410 reg_def XMM19i( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(8)); 411 reg_def XMM19j( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(9)); 412 reg_def XMM19k( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(10)); 413 reg_def XMM19l( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(11)); 414 reg_def XMM19m( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(12)); 415 reg_def XMM19n( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(13)); 416 reg_def XMM19o( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(14)); 417 reg_def XMM19p( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(15)); 418 419 reg_def XMM20 ( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()); 420 reg_def XMM20b( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(1)); 421 reg_def XMM20c( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(2)); 422 reg_def XMM20d( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(3)); 423 reg_def XMM20e( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(4)); 424 reg_def XMM20f( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(5)); 425 reg_def XMM20g( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(6)); 426 reg_def XMM20h( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(7)); 427 reg_def XMM20i( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(8)); 428 reg_def XMM20j( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(9)); 429 reg_def XMM20k( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(10)); 430 reg_def XMM20l( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(11)); 431 reg_def XMM20m( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(12)); 432 reg_def XMM20n( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(13)); 433 reg_def XMM20o( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(14)); 434 reg_def XMM20p( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(15)); 435 436 reg_def XMM21 ( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()); 437 reg_def XMM21b( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(1)); 438 reg_def XMM21c( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(2)); 439 reg_def XMM21d( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(3)); 440 reg_def XMM21e( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(4)); 441 reg_def XMM21f( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(5)); 442 reg_def XMM21g( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(6)); 443 reg_def XMM21h( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(7)); 444 reg_def XMM21i( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(8)); 445 reg_def XMM21j( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(9)); 446 reg_def XMM21k( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(10)); 447 reg_def XMM21l( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(11)); 448 reg_def XMM21m( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(12)); 449 reg_def XMM21n( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(13)); 450 reg_def XMM21o( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(14)); 451 reg_def XMM21p( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(15)); 452 453 reg_def XMM22 ( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()); 454 reg_def XMM22b( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(1)); 455 reg_def XMM22c( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(2)); 456 reg_def XMM22d( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(3)); 457 reg_def XMM22e( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(4)); 458 reg_def XMM22f( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(5)); 459 reg_def XMM22g( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(6)); 460 reg_def XMM22h( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(7)); 461 reg_def XMM22i( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(8)); 462 reg_def XMM22j( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(9)); 463 reg_def XMM22k( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(10)); 464 reg_def XMM22l( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(11)); 465 reg_def XMM22m( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(12)); 466 reg_def XMM22n( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(13)); 467 reg_def XMM22o( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(14)); 468 reg_def XMM22p( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(15)); 469 470 reg_def XMM23 ( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()); 471 reg_def XMM23b( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(1)); 472 reg_def XMM23c( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(2)); 473 reg_def XMM23d( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(3)); 474 reg_def XMM23e( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(4)); 475 reg_def XMM23f( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(5)); 476 reg_def XMM23g( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(6)); 477 reg_def XMM23h( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(7)); 478 reg_def XMM23i( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(8)); 479 reg_def XMM23j( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(9)); 480 reg_def XMM23k( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(10)); 481 reg_def XMM23l( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(11)); 482 reg_def XMM23m( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(12)); 483 reg_def XMM23n( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(13)); 484 reg_def XMM23o( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(14)); 485 reg_def XMM23p( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(15)); 486 487 reg_def XMM24 ( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()); 488 reg_def XMM24b( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(1)); 489 reg_def XMM24c( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(2)); 490 reg_def XMM24d( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(3)); 491 reg_def XMM24e( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(4)); 492 reg_def XMM24f( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(5)); 493 reg_def XMM24g( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(6)); 494 reg_def XMM24h( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(7)); 495 reg_def XMM24i( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(8)); 496 reg_def XMM24j( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(9)); 497 reg_def XMM24k( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(10)); 498 reg_def XMM24l( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(11)); 499 reg_def XMM24m( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(12)); 500 reg_def XMM24n( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(13)); 501 reg_def XMM24o( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(14)); 502 reg_def XMM24p( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(15)); 503 504 reg_def XMM25 ( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()); 505 reg_def XMM25b( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(1)); 506 reg_def XMM25c( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(2)); 507 reg_def XMM25d( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(3)); 508 reg_def XMM25e( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(4)); 509 reg_def XMM25f( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(5)); 510 reg_def XMM25g( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(6)); 511 reg_def XMM25h( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(7)); 512 reg_def XMM25i( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(8)); 513 reg_def XMM25j( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(9)); 514 reg_def XMM25k( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(10)); 515 reg_def XMM25l( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(11)); 516 reg_def XMM25m( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(12)); 517 reg_def XMM25n( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(13)); 518 reg_def XMM25o( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(14)); 519 reg_def XMM25p( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(15)); 520 521 reg_def XMM26 ( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()); 522 reg_def XMM26b( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(1)); 523 reg_def XMM26c( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(2)); 524 reg_def XMM26d( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(3)); 525 reg_def XMM26e( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(4)); 526 reg_def XMM26f( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(5)); 527 reg_def XMM26g( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(6)); 528 reg_def XMM26h( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(7)); 529 reg_def XMM26i( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(8)); 530 reg_def XMM26j( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(9)); 531 reg_def XMM26k( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(10)); 532 reg_def XMM26l( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(11)); 533 reg_def XMM26m( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(12)); 534 reg_def XMM26n( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(13)); 535 reg_def XMM26o( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(14)); 536 reg_def XMM26p( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(15)); 537 538 reg_def XMM27g( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(1)); 539 reg_def XMM27c( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(2)); 540 reg_def XMM27d( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(3)); 541 reg_def XMM27e( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(4)); 542 reg_def XMM27f( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(5)); 543 reg_def XMM27g( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(6)); 544 reg_def XMM27h( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(7)); 545 reg_def XMM27i( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(8)); 546 reg_def XMM27j( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(9)); 547 reg_def XMM27k( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(10)); 548 reg_def XMM27l( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(11)); 549 reg_def XMM27m( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(12)); 550 reg_def XMM27n( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(13)); 551 reg_def XMM27o( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(14)); 552 reg_def XMM27p( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(15)); 553 554 reg_def XMM28 ( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()); 555 reg_def XMM28b( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(1)); 556 reg_def XMM28c( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(2)); 557 reg_def XMM28d( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(3)); 558 reg_def XMM28e( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(4)); 559 reg_def XMM28f( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(5)); 560 reg_def XMM28g( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(6)); 561 reg_def XMM28h( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(7)); 562 reg_def XMM28i( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(8)); 563 reg_def XMM28j( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(9)); 564 reg_def XMM28k( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(10)); 565 reg_def XMM28l( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(11)); 566 reg_def XMM28m( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(12)); 567 reg_def XMM28n( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(13)); 568 reg_def XMM28o( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(14)); 569 reg_def XMM28p( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(15)); 570 571 reg_def XMM29 ( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()); 572 reg_def XMM29b( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(1)); 573 reg_def XMM29c( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(2)); 574 reg_def XMM29d( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(3)); 575 reg_def XMM29e( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(4)); 576 reg_def XMM29f( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(5)); 577 reg_def XMM29g( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(6)); 578 reg_def XMM29h( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(7)); 579 reg_def XMM29i( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(8)); 580 reg_def XMM29j( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(9)); 581 reg_def XMM29k( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(10)); 582 reg_def XMM29l( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(11)); 583 reg_def XMM29m( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(12)); 584 reg_def XMM29n( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(13)); 585 reg_def XMM29o( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(14)); 586 reg_def XMM29p( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(15)); 587 588 reg_def XMM30 ( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()); 589 reg_def XMM30b( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(1)); 590 reg_def XMM30c( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(2)); 591 reg_def XMM30d( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(3)); 592 reg_def XMM30e( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(4)); 593 reg_def XMM30f( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(5)); 594 reg_def XMM30g( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(6)); 595 reg_def XMM30h( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(7)); 596 reg_def XMM30i( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(8)); 597 reg_def XMM30j( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(9)); 598 reg_def XMM30k( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(10)); 599 reg_def XMM30l( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(11)); 600 reg_def XMM30m( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(12)); 601 reg_def XMM30n( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(13)); 602 reg_def XMM30o( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(14)); 603 reg_def XMM30p( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(15)); 604 605 reg_def XMM31 ( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()); 606 reg_def XMM31b( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(1)); 607 reg_def XMM31c( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(2)); 608 reg_def XMM31d( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(3)); 609 reg_def XMM31e( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(4)); 610 reg_def XMM31f( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(5)); 611 reg_def XMM31g( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(6)); 612 reg_def XMM31h( SOC, SOE, Op_RegF, 31, xmm31>-as_VMReg()->next(7)); 613 reg_def XMM31i( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(8)); 614 reg_def XMM31j( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(9)); 615 reg_def XMM31k( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(10)); 616 reg_def XMM31l( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(11)); 617 reg_def XMM31m( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(12)); 618 reg_def XMM31n( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(13)); 619 reg_def XMM31o( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(14)); 620 reg_def XMM31p( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(15)); 621 622 #else // _WIN64 623 624 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()); 625 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1)); 626 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2)); 627 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3)); 628 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4)); 629 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5)); 630 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6)); 631 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7)); 632 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8)); 633 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9)); 634 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10)); 635 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11)); 636 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12)); 637 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13)); 638 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14)); 639 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15)); 640 641 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()); 642 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1)); 643 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2)); 644 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3)); 645 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4)); 646 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5)); 647 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6)); 648 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7)); 649 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8)); 650 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9)); 651 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10)); 652 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11)); 653 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12)); 654 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13)); 655 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14)); 656 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15)); 657 658 #ifdef _LP64 659 660 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()); 661 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1)); 662 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2)); 663 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3)); 664 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4)); 665 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5)); 666 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6)); 667 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7)); 668 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8)); 669 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9)); 670 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10)); 671 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11)); 672 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12)); 673 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13)); 674 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14)); 675 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15)); 676 677 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()); 678 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1)); 679 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2)); 680 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3)); 681 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4)); 682 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5)); 683 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6)); 684 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7)); 685 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8)); 686 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9)); 687 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10)); 688 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11)); 689 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12)); 690 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13)); 691 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14)); 692 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15)); 693 694 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()); 695 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1)); 696 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2)); 697 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3)); 698 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4)); 699 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5)); 700 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6)); 701 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7)); 702 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8)); 703 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9)); 704 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10)); 705 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11)); 706 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12)); 707 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13)); 708 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14)); 709 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15)); 710 711 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()); 712 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1)); 713 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2)); 714 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3)); 715 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4)); 716 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5)); 717 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6)); 718 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7)); 719 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8)); 720 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9)); 721 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10)); 722 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11)); 723 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12)); 724 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13)); 725 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14)); 726 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15)); 727 728 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()); 729 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1)); 730 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2)); 731 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3)); 732 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4)); 733 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5)); 734 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6)); 735 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7)); 736 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8)); 737 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9)); 738 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10)); 739 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11)); 740 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12)); 741 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13)); 742 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14)); 743 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15)); 744 745 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()); 746 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1)); 747 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2)); 748 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3)); 749 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4)); 750 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5)); 751 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6)); 752 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7)); 753 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8)); 754 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9)); 755 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10)); 756 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11)); 757 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12)); 758 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13)); 759 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14)); 760 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15)); 761 762 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()); 763 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1)); 764 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2)); 765 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3)); 766 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4)); 767 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5)); 768 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6)); 769 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7)); 770 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8)); 771 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9)); 772 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10)); 773 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11)); 774 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12)); 775 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13)); 776 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14)); 777 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15)); 778 779 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()); 780 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1)); 781 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2)); 782 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3)); 783 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4)); 784 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5)); 785 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6)); 786 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7)); 787 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8)); 788 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9)); 789 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10)); 790 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11)); 791 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12)); 792 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13)); 793 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14)); 794 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15)); 795 796 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()); 797 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1)); 798 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2)); 799 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3)); 800 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4)); 801 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5)); 802 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6)); 803 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7)); 804 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8)); 805 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9)); 806 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10)); 807 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11)); 808 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12)); 809 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13)); 810 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14)); 811 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15)); 812 813 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()); 814 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1)); 815 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2)); 816 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3)); 817 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4)); 818 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5)); 819 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6)); 820 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7)); 821 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8)); 822 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9)); 823 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10)); 824 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11)); 825 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12)); 826 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13)); 827 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14)); 828 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15)); 829 830 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()); 831 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1)); 832 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2)); 833 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3)); 834 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4)); 835 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5)); 836 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6)); 837 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7)); 838 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8)); 839 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9)); 840 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10)); 841 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11)); 842 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12)); 843 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13)); 844 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14)); 845 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15)); 846 847 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()); 848 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1)); 849 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2)); 850 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3)); 851 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4)); 852 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5)); 853 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6)); 854 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7)); 855 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8)); 856 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9)); 857 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10)); 858 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11)); 859 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12)); 860 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13)); 861 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14)); 862 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15)); 863 864 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()); 865 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1)); 866 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2)); 867 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3)); 868 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4)); 869 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5)); 870 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6)); 871 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7)); 872 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8)); 873 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9)); 874 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10)); 875 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11)); 876 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12)); 877 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13)); 878 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14)); 879 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15)); 880 881 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()); 882 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1)); 883 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2)); 884 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3)); 885 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4)); 886 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5)); 887 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6)); 888 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7)); 889 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8)); 890 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9)); 891 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10)); 892 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11)); 893 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12)); 894 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13)); 895 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14)); 896 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15)); 897 898 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()); 899 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1)); 900 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2)); 901 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3)); 902 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4)); 903 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5)); 904 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6)); 905 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7)); 906 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8)); 907 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9)); 908 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10)); 909 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11)); 910 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12)); 911 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13)); 912 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14)); 913 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15)); 914 915 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()); 916 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1)); 917 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2)); 918 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3)); 919 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4)); 920 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5)); 921 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6)); 922 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7)); 923 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8)); 924 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9)); 925 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10)); 926 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11)); 927 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12)); 928 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13)); 929 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14)); 930 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15)); 931 932 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()); 933 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1)); 934 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2)); 935 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3)); 936 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4)); 937 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5)); 938 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6)); 939 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7)); 940 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8)); 941 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9)); 942 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10)); 943 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11)); 944 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12)); 945 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13)); 946 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14)); 947 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15)); 948 949 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()); 950 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1)); 951 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2)); 952 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3)); 953 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4)); 954 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5)); 955 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6)); 956 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7)); 957 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8)); 958 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9)); 959 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10)); 960 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11)); 961 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12)); 962 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13)); 963 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14)); 964 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15)); 965 966 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()); 967 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1)); 968 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2)); 969 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3)); 970 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4)); 971 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5)); 972 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6)); 973 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7)); 974 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8)); 975 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9)); 976 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10)); 977 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11)); 978 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12)); 979 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13)); 980 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14)); 981 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15)); 982 983 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()); 984 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1)); 985 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2)); 986 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3)); 987 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4)); 988 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5)); 989 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6)); 990 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7)); 991 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8)); 992 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9)); 993 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10)); 994 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11)); 995 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12)); 996 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13)); 997 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14)); 998 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15)); 999 1000 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()); 1001 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1)); 1002 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2)); 1003 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3)); 1004 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4)); 1005 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5)); 1006 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6)); 1007 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7)); 1008 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8)); 1009 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9)); 1010 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10)); 1011 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11)); 1012 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12)); 1013 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13)); 1014 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14)); 1015 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15)); 1016 1017 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()); 1018 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1)); 1019 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2)); 1020 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3)); 1021 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4)); 1022 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5)); 1023 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6)); 1024 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7)); 1025 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8)); 1026 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9)); 1027 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10)); 1028 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11)); 1029 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12)); 1030 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13)); 1031 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14)); 1032 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15)); 1033 1034 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()); 1035 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1)); 1036 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2)); 1037 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3)); 1038 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4)); 1039 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5)); 1040 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6)); 1041 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7)); 1042 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8)); 1043 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9)); 1044 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10)); 1045 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11)); 1046 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12)); 1047 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13)); 1048 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14)); 1049 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15)); 1050 1051 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()); 1052 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1)); 1053 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2)); 1054 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3)); 1055 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4)); 1056 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5)); 1057 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6)); 1058 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7)); 1059 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8)); 1060 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9)); 1061 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10)); 1062 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11)); 1063 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12)); 1064 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13)); 1065 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14)); 1066 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15)); 1067 1068 #endif // _LP64 1069 1070 #endif // _WIN64 1071 1072 #ifdef _LP64 1073 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad()); 1074 #else 1075 reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad()); 1076 #endif // _LP64 1077 1078 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1079 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1080 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1081 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1082 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1083 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1084 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1085 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1086 #ifdef _LP64 1087 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1088 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1089 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1090 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1091 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1092 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1093 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1094 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1095 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 1096 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 1097 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 1098 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 1099 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 1100 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 1101 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 1102 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 1103 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 1104 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 1105 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 1106 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 1107 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 1108 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 1109 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 1110 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 1111 #endif 1112 ); 1113 1114 // flags allocation class should be last. 1115 alloc_class chunk2(RFLAGS); 1116 1117 // Singleton class for condition codes 1118 reg_class int_flags(RFLAGS); 1119 1120 // Class for pre evex float registers 1121 reg_class float_reg_legacy(XMM0, 1122 XMM1, 1123 XMM2, 1124 XMM3, 1125 XMM4, 1126 XMM5, 1127 XMM6, 1128 XMM7 1129 #ifdef _LP64 1130 ,XMM8, 1131 XMM9, 1132 XMM10, 1133 XMM11, 1134 XMM12, 1135 XMM13, 1136 XMM14, 1137 XMM15 1138 #endif 1139 ); 1140 1141 // Class for evex float registers 1142 reg_class float_reg_evex(XMM0, 1143 XMM1, 1144 XMM2, 1145 XMM3, 1146 XMM4, 1147 XMM5, 1148 XMM6, 1149 XMM7 1150 #ifdef _LP64 1151 ,XMM8, 1152 XMM9, 1153 XMM10, 1154 XMM11, 1155 XMM12, 1156 XMM13, 1157 XMM14, 1158 XMM15, 1159 XMM16, 1160 XMM17, 1161 XMM18, 1162 XMM19, 1163 XMM20, 1164 XMM21, 1165 XMM22, 1166 XMM23, 1167 XMM24, 1168 XMM25, 1169 XMM26, 1170 XMM27, 1171 XMM28, 1172 XMM29, 1173 XMM30, 1174 XMM31 1175 #endif 1176 ); 1177 1178 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} ); 1179 1180 // Class for pre evex double registers 1181 reg_class double_reg_legacy(XMM0, XMM0b, 1182 XMM1, XMM1b, 1183 XMM2, XMM2b, 1184 XMM3, XMM3b, 1185 XMM4, XMM4b, 1186 XMM5, XMM5b, 1187 XMM6, XMM6b, 1188 XMM7, XMM7b 1189 #ifdef _LP64 1190 ,XMM8, XMM8b, 1191 XMM9, XMM9b, 1192 XMM10, XMM10b, 1193 XMM11, XMM11b, 1194 XMM12, XMM12b, 1195 XMM13, XMM13b, 1196 XMM14, XMM14b, 1197 XMM15, XMM15b 1198 #endif 1199 ); 1200 1201 // Class for evex double registers 1202 reg_class double_reg_evex(XMM0, XMM0b, 1203 XMM1, XMM1b, 1204 XMM2, XMM2b, 1205 XMM3, XMM3b, 1206 XMM4, XMM4b, 1207 XMM5, XMM5b, 1208 XMM6, XMM6b, 1209 XMM7, XMM7b 1210 #ifdef _LP64 1211 ,XMM8, XMM8b, 1212 XMM9, XMM9b, 1213 XMM10, XMM10b, 1214 XMM11, XMM11b, 1215 XMM12, XMM12b, 1216 XMM13, XMM13b, 1217 XMM14, XMM14b, 1218 XMM15, XMM15b, 1219 XMM16, XMM16b, 1220 XMM17, XMM17b, 1221 XMM18, XMM18b, 1222 XMM19, XMM19b, 1223 XMM20, XMM20b, 1224 XMM21, XMM21b, 1225 XMM22, XMM22b, 1226 XMM23, XMM23b, 1227 XMM24, XMM24b, 1228 XMM25, XMM25b, 1229 XMM26, XMM26b, 1230 XMM27, XMM27b, 1231 XMM28, XMM28b, 1232 XMM29, XMM29b, 1233 XMM30, XMM30b, 1234 XMM31, XMM31b 1235 #endif 1236 ); 1237 1238 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} ); 1239 1240 // Class for pre evex 32bit vector registers 1241 reg_class vectors_reg_legacy(XMM0, 1242 XMM1, 1243 XMM2, 1244 XMM3, 1245 XMM4, 1246 XMM5, 1247 XMM6, 1248 XMM7 1249 #ifdef _LP64 1250 ,XMM8, 1251 XMM9, 1252 XMM10, 1253 XMM11, 1254 XMM12, 1255 XMM13, 1256 XMM14, 1257 XMM15 1258 #endif 1259 ); 1260 1261 // Class for evex 32bit vector registers 1262 reg_class vectors_reg_evex(XMM0, 1263 XMM1, 1264 XMM2, 1265 XMM3, 1266 XMM4, 1267 XMM5, 1268 XMM6, 1269 XMM7 1270 #ifdef _LP64 1271 ,XMM8, 1272 XMM9, 1273 XMM10, 1274 XMM11, 1275 XMM12, 1276 XMM13, 1277 XMM14, 1278 XMM15, 1279 XMM16, 1280 XMM17, 1281 XMM18, 1282 XMM19, 1283 XMM20, 1284 XMM21, 1285 XMM22, 1286 XMM23, 1287 XMM24, 1288 XMM25, 1289 XMM26, 1290 XMM27, 1291 XMM28, 1292 XMM29, 1293 XMM30, 1294 XMM31 1295 #endif 1296 ); 1297 1298 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} ); 1299 1300 // Class for all 64bit vector registers 1301 reg_class vectord_reg_legacy(XMM0, XMM0b, 1302 XMM1, XMM1b, 1303 XMM2, XMM2b, 1304 XMM3, XMM3b, 1305 XMM4, XMM4b, 1306 XMM5, XMM5b, 1307 XMM6, XMM6b, 1308 XMM7, XMM7b 1309 #ifdef _LP64 1310 ,XMM8, XMM8b, 1311 XMM9, XMM9b, 1312 XMM10, XMM10b, 1313 XMM11, XMM11b, 1314 XMM12, XMM12b, 1315 XMM13, XMM13b, 1316 XMM14, XMM14b, 1317 XMM15, XMM15b 1318 #endif 1319 ); 1320 1321 // Class for all 64bit vector registers 1322 reg_class vectord_reg_evex(XMM0, XMM0b, 1323 XMM1, XMM1b, 1324 XMM2, XMM2b, 1325 XMM3, XMM3b, 1326 XMM4, XMM4b, 1327 XMM5, XMM5b, 1328 XMM6, XMM6b, 1329 XMM7, XMM7b 1330 #ifdef _LP64 1331 ,XMM8, XMM8b, 1332 XMM9, XMM9b, 1333 XMM10, XMM10b, 1334 XMM11, XMM11b, 1335 XMM12, XMM12b, 1336 XMM13, XMM13b, 1337 XMM14, XMM14b, 1338 XMM15, XMM15b, 1339 XMM16, XMM16b, 1340 XMM17, XMM17b, 1341 XMM18, XMM18b, 1342 XMM19, XMM19b, 1343 XMM20, XMM20b, 1344 XMM21, XMM21b, 1345 XMM22, XMM22b, 1346 XMM23, XMM23b, 1347 XMM24, XMM24b, 1348 XMM25, XMM25b, 1349 XMM26, XMM26b, 1350 XMM27, XMM27b, 1351 XMM28, XMM28b, 1352 XMM29, XMM29b, 1353 XMM30, XMM30b, 1354 XMM31, XMM31b 1355 #endif 1356 ); 1357 1358 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} ); 1359 1360 // Class for all 128bit vector registers 1361 reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, 1362 XMM1, XMM1b, XMM1c, XMM1d, 1363 XMM2, XMM2b, XMM2c, XMM2d, 1364 XMM3, XMM3b, XMM3c, XMM3d, 1365 XMM4, XMM4b, XMM4c, XMM4d, 1366 XMM5, XMM5b, XMM5c, XMM5d, 1367 XMM6, XMM6b, XMM6c, XMM6d, 1368 XMM7, XMM7b, XMM7c, XMM7d 1369 #ifdef _LP64 1370 ,XMM8, XMM8b, XMM8c, XMM8d, 1371 XMM9, XMM9b, XMM9c, XMM9d, 1372 XMM10, XMM10b, XMM10c, XMM10d, 1373 XMM11, XMM11b, XMM11c, XMM11d, 1374 XMM12, XMM12b, XMM12c, XMM12d, 1375 XMM13, XMM13b, XMM13c, XMM13d, 1376 XMM14, XMM14b, XMM14c, XMM14d, 1377 XMM15, XMM15b, XMM15c, XMM15d 1378 #endif 1379 ); 1380 1381 // Class for all 128bit vector registers 1382 reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, 1383 XMM1, XMM1b, XMM1c, XMM1d, 1384 XMM2, XMM2b, XMM2c, XMM2d, 1385 XMM3, XMM3b, XMM3c, XMM3d, 1386 XMM4, XMM4b, XMM4c, XMM4d, 1387 XMM5, XMM5b, XMM5c, XMM5d, 1388 XMM6, XMM6b, XMM6c, XMM6d, 1389 XMM7, XMM7b, XMM7c, XMM7d 1390 #ifdef _LP64 1391 ,XMM8, XMM8b, XMM8c, XMM8d, 1392 XMM9, XMM9b, XMM9c, XMM9d, 1393 XMM10, XMM10b, XMM10c, XMM10d, 1394 XMM11, XMM11b, XMM11c, XMM11d, 1395 XMM12, XMM12b, XMM12c, XMM12d, 1396 XMM13, XMM13b, XMM13c, XMM13d, 1397 XMM14, XMM14b, XMM14c, XMM14d, 1398 XMM15, XMM15b, XMM15c, XMM15d, 1399 XMM16, XMM16b, XMM16c, XMM16d, 1400 XMM17, XMM17b, XMM17c, XMM17d, 1401 XMM18, XMM18b, XMM18c, XMM18d, 1402 XMM19, XMM19b, XMM19c, XMM19d, 1403 XMM20, XMM20b, XMM20c, XMM20d, 1404 XMM21, XMM21b, XMM21c, XMM21d, 1405 XMM22, XMM22b, XMM22c, XMM22d, 1406 XMM23, XMM23b, XMM23c, XMM23d, 1407 XMM24, XMM24b, XMM24c, XMM24d, 1408 XMM25, XMM25b, XMM25c, XMM25d, 1409 XMM26, XMM26b, XMM26c, XMM26d, 1410 XMM27, XMM27b, XMM27c, XMM27d, 1411 XMM28, XMM28b, XMM28c, XMM28d, 1412 XMM29, XMM29b, XMM29c, XMM29d, 1413 XMM30, XMM30b, XMM30c, XMM30d, 1414 XMM31, XMM31b, XMM31c, XMM31d 1415 #endif 1416 ); 1417 1418 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} ); 1419 1420 // Class for all 256bit vector registers 1421 reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 1422 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 1423 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 1424 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 1425 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 1426 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 1427 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 1428 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 1429 #ifdef _LP64 1430 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 1431 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 1432 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 1433 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 1434 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 1435 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1436 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1437 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h 1438 #endif 1439 ); 1440 1441 // Class for all 256bit vector registers 1442 reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 1443 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 1444 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 1445 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 1446 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 1447 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 1448 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 1449 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 1450 #ifdef _LP64 1451 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 1452 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 1453 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 1454 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 1455 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 1456 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1457 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1458 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, 1459 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, 1460 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, 1461 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, 1462 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, 1463 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, 1464 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, 1465 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, 1466 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, 1467 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, 1468 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, 1469 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, 1470 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, 1471 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, 1472 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, 1473 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, 1474 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h 1475 #endif 1476 ); 1477 1478 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} ); 1479 1480 // Class for all 512bit vector registers 1481 reg_class vectorz_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1482 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1483 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1484 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1485 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1486 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1487 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1488 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1489 #ifdef _LP64 1490 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1491 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1492 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1493 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1494 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1495 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1496 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1497 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1498 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 1499 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 1500 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 1501 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 1502 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 1503 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 1504 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 1505 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 1506 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 1507 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 1508 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 1509 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 1510 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 1511 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 1512 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 1513 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 1514 #endif 1515 ); 1516 1517 %} 1518 1519 1520 //----------SOURCE BLOCK------------------------------------------------------- 1521 // This is a block of C++ code which provides values, functions, and 1522 // definitions necessary in the rest of the architecture description 1523 1524 source_hpp %{ 1525 // Header information of the source block. 1526 // Method declarations/definitions which are used outside 1527 // the ad-scope can conveniently be defined here. 1528 // 1529 // To keep related declarations/definitions/uses close together, 1530 // we switch between source %{ }% and source_hpp %{ }% freely as needed. 1531 1532 class NativeJump; 1533 1534 class CallStubImpl { 1535 1536 //-------------------------------------------------------------- 1537 //---< Used for optimization in Compile::shorten_branches >--- 1538 //-------------------------------------------------------------- 1539 1540 public: 1541 // Size of call trampoline stub. 1542 static uint size_call_trampoline() { 1543 return 0; // no call trampolines on this platform 1544 } 1545 1546 // number of relocations needed by a call trampoline stub 1547 static uint reloc_call_trampoline() { 1548 return 0; // no call trampolines on this platform 1549 } 1550 }; 1551 1552 class HandlerImpl { 1553 1554 public: 1555 1556 static int emit_exception_handler(CodeBuffer &cbuf); 1557 static int emit_deopt_handler(CodeBuffer& cbuf); 1558 1559 static uint size_exception_handler() { 1560 // NativeCall instruction size is the same as NativeJump. 1561 // exception handler starts out as jump and can be patched to 1562 // a call be deoptimization. (4932387) 1563 // Note that this value is also credited (in output.cpp) to 1564 // the size of the code section. 1565 return NativeJump::instruction_size; 1566 } 1567 1568 #ifdef _LP64 1569 static uint size_deopt_handler() { 1570 // three 5 byte instructions 1571 return 15; 1572 } 1573 #else 1574 static uint size_deopt_handler() { 1575 // NativeCall instruction size is the same as NativeJump. 1576 // exception handler starts out as jump and can be patched to 1577 // a call be deoptimization. (4932387) 1578 // Note that this value is also credited (in output.cpp) to 1579 // the size of the code section. 1580 return 5 + NativeJump::instruction_size; // pushl(); jmp; 1581 } 1582 #endif 1583 }; 1584 1585 %} // end source_hpp 1586 1587 source %{ 1588 1589 // Emit exception handler code. 1590 // Stuff framesize into a register and call a VM stub routine. 1591 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) { 1592 1593 // Note that the code buffer's insts_mark is always relative to insts. 1594 // That's why we must use the macroassembler to generate a handler. 1595 MacroAssembler _masm(&cbuf); 1596 address base = __ start_a_stub(size_exception_handler()); 1597 if (base == NULL) return 0; // CodeBuffer::expand failed 1598 int offset = __ offset(); 1599 __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point())); 1600 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); 1601 __ end_a_stub(); 1602 return offset; 1603 } 1604 1605 // Emit deopt handler code. 1606 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) { 1607 1608 // Note that the code buffer's insts_mark is always relative to insts. 1609 // That's why we must use the macroassembler to generate a handler. 1610 MacroAssembler _masm(&cbuf); 1611 address base = __ start_a_stub(size_deopt_handler()); 1612 if (base == NULL) return 0; // CodeBuffer::expand failed 1613 int offset = __ offset(); 1614 1615 #ifdef _LP64 1616 address the_pc = (address) __ pc(); 1617 Label next; 1618 // push a "the_pc" on the stack without destroying any registers 1619 // as they all may be live. 1620 1621 // push address of "next" 1622 __ call(next, relocInfo::none); // reloc none is fine since it is a disp32 1623 __ bind(next); 1624 // adjust it so it matches "the_pc" 1625 __ subptr(Address(rsp, 0), __ offset() - offset); 1626 #else 1627 InternalAddress here(__ pc()); 1628 __ pushptr(here.addr()); 1629 #endif 1630 1631 __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack())); 1632 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow"); 1633 __ end_a_stub(); 1634 return offset; 1635 } 1636 1637 1638 //============================================================================= 1639 1640 // Float masks come from different places depending on platform. 1641 #ifdef _LP64 1642 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); } 1643 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); } 1644 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); } 1645 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); } 1646 #else 1647 static address float_signmask() { return (address)float_signmask_pool; } 1648 static address float_signflip() { return (address)float_signflip_pool; } 1649 static address double_signmask() { return (address)double_signmask_pool; } 1650 static address double_signflip() { return (address)double_signflip_pool; } 1651 #endif 1652 1653 1654 const bool Matcher::match_rule_supported(int opcode) { 1655 if (!has_match_rule(opcode)) 1656 return false; 1657 1658 switch (opcode) { 1659 case Op_PopCountI: 1660 case Op_PopCountL: 1661 if (!UsePopCountInstruction) 1662 return false; 1663 break; 1664 case Op_MulVI: 1665 if ((UseSSE < 4) && (UseAVX < 1)) // only with SSE4_1 or AVX 1666 return false; 1667 break; 1668 case Op_MulVL: 1669 case Op_MulReductionVL: 1670 if (VM_Version::supports_avx512dq() == false) 1671 return false; 1672 case Op_AddReductionVL: 1673 if (UseAVX < 3) // only EVEX : vector connectivity becomes an issue here 1674 return false; 1675 case Op_AddReductionVI: 1676 if (UseSSE < 3) // requires at least SSE3 1677 return false; 1678 case Op_MulReductionVI: 1679 if (UseSSE < 4) // requires at least SSE4 1680 return false; 1681 case Op_AddReductionVF: 1682 case Op_AddReductionVD: 1683 case Op_MulReductionVF: 1684 case Op_MulReductionVD: 1685 if (UseSSE < 1) // requires at least SSE 1686 return false; 1687 break; 1688 case Op_CompareAndSwapL: 1689 #ifdef _LP64 1690 case Op_CompareAndSwapP: 1691 #endif 1692 if (!VM_Version::supports_cx8()) 1693 return false; 1694 break; 1695 } 1696 1697 return true; // Per default match rules are supported. 1698 } 1699 1700 // Max vector size in bytes. 0 if not supported. 1701 const int Matcher::vector_width_in_bytes(BasicType bt) { 1702 assert(is_java_primitive(bt), "only primitive type vectors"); 1703 if (UseSSE < 2) return 0; 1704 // SSE2 supports 128bit vectors for all types. 1705 // AVX2 supports 256bit vectors for all types. 1706 // AVX2/EVEX supports 512bit vectors for all types. 1707 int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16; 1708 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE. 1709 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE)) 1710 size = (UseAVX > 2) ? 64 : 32; 1711 // Use flag to limit vector size. 1712 size = MIN2(size,(int)MaxVectorSize); 1713 // Minimum 2 values in vector (or 4 for bytes). 1714 switch (bt) { 1715 case T_DOUBLE: 1716 case T_LONG: 1717 if (size < 16) return 0; 1718 case T_FLOAT: 1719 case T_INT: 1720 if (size < 8) return 0; 1721 case T_BOOLEAN: 1722 case T_BYTE: 1723 case T_CHAR: 1724 case T_SHORT: 1725 if (size < 4) return 0; 1726 break; 1727 default: 1728 ShouldNotReachHere(); 1729 } 1730 return size; 1731 } 1732 1733 // Limits on vector size (number of elements) loaded into vector. 1734 const int Matcher::max_vector_size(const BasicType bt) { 1735 return vector_width_in_bytes(bt)/type2aelembytes(bt); 1736 } 1737 const int Matcher::min_vector_size(const BasicType bt) { 1738 int max_size = max_vector_size(bt); 1739 // Min size which can be loaded into vector is 4 bytes. 1740 int size = (type2aelembytes(bt) == 1) ? 4 : 2; 1741 return MIN2(size,max_size); 1742 } 1743 1744 // Vector ideal reg corresponding to specidied size in bytes 1745 const int Matcher::vector_ideal_reg(int size) { 1746 assert(MaxVectorSize >= size, ""); 1747 switch(size) { 1748 case 4: return Op_VecS; 1749 case 8: return Op_VecD; 1750 case 16: return Op_VecX; 1751 case 32: return Op_VecY; 1752 case 64: return Op_VecZ; 1753 } 1754 ShouldNotReachHere(); 1755 return 0; 1756 } 1757 1758 // Only lowest bits of xmm reg are used for vector shift count. 1759 const int Matcher::vector_shift_count_ideal_reg(int size) { 1760 return Op_VecS; 1761 } 1762 1763 // x86 supports misaligned vectors store/load. 1764 const bool Matcher::misaligned_vectors_ok() { 1765 return !AlignVector; // can be changed by flag 1766 } 1767 1768 // x86 AES instructions are compatible with SunJCE expanded 1769 // keys, hence we do not need to pass the original key to stubs 1770 const bool Matcher::pass_original_key_for_aes() { 1771 return false; 1772 } 1773 1774 // Helper methods for MachSpillCopyNode::implementation(). 1775 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 1776 int src_hi, int dst_hi, uint ireg, outputStream* st) { 1777 // In 64-bit VM size calculation is very complex. Emitting instructions 1778 // into scratch buffer is used to get size in 64-bit VM. 1779 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 1780 assert(ireg == Op_VecS || // 32bit vector 1781 (src_lo & 1) == 0 && (src_lo + 1) == src_hi && 1782 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi, 1783 "no non-adjacent vector moves" ); 1784 if (cbuf) { 1785 MacroAssembler _masm(cbuf); 1786 int offset = __ offset(); 1787 switch (ireg) { 1788 case Op_VecS: // copy whole register 1789 case Op_VecD: 1790 case Op_VecX: 1791 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1792 break; 1793 case Op_VecY: 1794 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1795 break; 1796 case Op_VecZ: 1797 __ evmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2); 1798 break; 1799 default: 1800 ShouldNotReachHere(); 1801 } 1802 int size = __ offset() - offset; 1803 #ifdef ASSERT 1804 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1805 assert(!do_size || size == 4, "incorrect size calculattion"); 1806 #endif 1807 return size; 1808 #ifndef PRODUCT 1809 } else if (!do_size) { 1810 switch (ireg) { 1811 case Op_VecS: 1812 case Op_VecD: 1813 case Op_VecX: 1814 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 1815 break; 1816 case Op_VecY: 1817 case Op_VecZ: 1818 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 1819 break; 1820 default: 1821 ShouldNotReachHere(); 1822 } 1823 #endif 1824 } 1825 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 1826 return (UseAVX > 2) ? 6 : 4; 1827 } 1828 1829 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, 1830 int stack_offset, int reg, uint ireg, outputStream* st) { 1831 // In 64-bit VM size calculation is very complex. Emitting instructions 1832 // into scratch buffer is used to get size in 64-bit VM. 1833 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 1834 if (cbuf) { 1835 MacroAssembler _masm(cbuf); 1836 int offset = __ offset(); 1837 if (is_load) { 1838 switch (ireg) { 1839 case Op_VecS: 1840 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1841 break; 1842 case Op_VecD: 1843 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1844 break; 1845 case Op_VecX: 1846 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1847 break; 1848 case Op_VecY: 1849 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1850 break; 1851 case Op_VecZ: 1852 __ evmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2); 1853 break; 1854 default: 1855 ShouldNotReachHere(); 1856 } 1857 } else { // store 1858 switch (ireg) { 1859 case Op_VecS: 1860 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1861 break; 1862 case Op_VecD: 1863 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1864 break; 1865 case Op_VecX: 1866 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1867 break; 1868 case Op_VecY: 1869 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1870 break; 1871 case Op_VecZ: 1872 __ evmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2); 1873 break; 1874 default: 1875 ShouldNotReachHere(); 1876 } 1877 } 1878 int size = __ offset() - offset; 1879 #ifdef ASSERT 1880 int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : (UseAVX > 2) ? 6 : 4); 1881 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1882 assert(!do_size || size == (5+offset_size), "incorrect size calculattion"); 1883 #endif 1884 return size; 1885 #ifndef PRODUCT 1886 } else if (!do_size) { 1887 if (is_load) { 1888 switch (ireg) { 1889 case Op_VecS: 1890 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1891 break; 1892 case Op_VecD: 1893 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1894 break; 1895 case Op_VecX: 1896 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1897 break; 1898 case Op_VecY: 1899 case Op_VecZ: 1900 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1901 break; 1902 default: 1903 ShouldNotReachHere(); 1904 } 1905 } else { // store 1906 switch (ireg) { 1907 case Op_VecS: 1908 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1909 break; 1910 case Op_VecD: 1911 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1912 break; 1913 case Op_VecX: 1914 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1915 break; 1916 case Op_VecY: 1917 case Op_VecZ: 1918 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1919 break; 1920 default: 1921 ShouldNotReachHere(); 1922 } 1923 } 1924 #endif 1925 } 1926 int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : (UseAVX > 2) ? 6 : 4); 1927 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1928 return 5+offset_size; 1929 } 1930 1931 static inline jfloat replicate4_imm(int con, int width) { 1932 // Load a constant of "width" (in bytes) and replicate it to fill 32bit. 1933 assert(width == 1 || width == 2, "only byte or short types here"); 1934 int bit_width = width * 8; 1935 jint val = con; 1936 val &= (1 << bit_width) - 1; // mask off sign bits 1937 while(bit_width < 32) { 1938 val |= (val << bit_width); 1939 bit_width <<= 1; 1940 } 1941 jfloat fval = *((jfloat*) &val); // coerce to float type 1942 return fval; 1943 } 1944 1945 static inline jdouble replicate8_imm(int con, int width) { 1946 // Load a constant of "width" (in bytes) and replicate it to fill 64bit. 1947 assert(width == 1 || width == 2 || width == 4, "only byte, short or int types here"); 1948 int bit_width = width * 8; 1949 jlong val = con; 1950 val &= (((jlong) 1) << bit_width) - 1; // mask off sign bits 1951 while(bit_width < 64) { 1952 val |= (val << bit_width); 1953 bit_width <<= 1; 1954 } 1955 jdouble dval = *((jdouble*) &val); // coerce to double type 1956 return dval; 1957 } 1958 1959 #ifndef PRODUCT 1960 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const { 1961 st->print("nop \t# %d bytes pad for loops and calls", _count); 1962 } 1963 #endif 1964 1965 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const { 1966 MacroAssembler _masm(&cbuf); 1967 __ nop(_count); 1968 } 1969 1970 uint MachNopNode::size(PhaseRegAlloc*) const { 1971 return _count; 1972 } 1973 1974 #ifndef PRODUCT 1975 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const { 1976 st->print("# breakpoint"); 1977 } 1978 #endif 1979 1980 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const { 1981 MacroAssembler _masm(&cbuf); 1982 __ int3(); 1983 } 1984 1985 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const { 1986 return MachNode::size(ra_); 1987 } 1988 1989 %} 1990 1991 encode %{ 1992 1993 enc_class call_epilog %{ 1994 if (VerifyStackAtCalls) { 1995 // Check that stack depth is unchanged: find majik cookie on stack 1996 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word)); 1997 MacroAssembler _masm(&cbuf); 1998 Label L; 1999 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d); 2000 __ jccb(Assembler::equal, L); 2001 // Die if stack mismatch 2002 __ int3(); 2003 __ bind(L); 2004 } 2005 %} 2006 2007 %} 2008 2009 2010 //----------OPERANDS----------------------------------------------------------- 2011 // Operand definitions must precede instruction definitions for correct parsing 2012 // in the ADLC because operands constitute user defined types which are used in 2013 // instruction definitions. 2014 2015 // This one generically applies only for evex, so only one version 2016 operand vecZ() %{ 2017 constraint(ALLOC_IN_RC(vectorz_reg)); 2018 match(VecZ); 2019 2020 format %{ %} 2021 interface(REG_INTER); 2022 %} 2023 2024 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit) 2025 2026 // ============================================================================ 2027 2028 instruct ShouldNotReachHere() %{ 2029 match(Halt); 2030 format %{ "int3\t# ShouldNotReachHere" %} 2031 ins_encode %{ 2032 __ int3(); 2033 %} 2034 ins_pipe(pipe_slow); 2035 %} 2036 2037 // ============================================================================ 2038 2039 instruct addF_reg(regF dst, regF src) %{ 2040 predicate((UseSSE>=1) && (UseAVX == 0)); 2041 match(Set dst (AddF dst src)); 2042 2043 format %{ "addss $dst, $src" %} 2044 ins_cost(150); 2045 ins_encode %{ 2046 __ addss($dst$$XMMRegister, $src$$XMMRegister); 2047 %} 2048 ins_pipe(pipe_slow); 2049 %} 2050 2051 instruct addF_mem(regF dst, memory src) %{ 2052 predicate((UseSSE>=1) && (UseAVX == 0)); 2053 match(Set dst (AddF dst (LoadF src))); 2054 2055 format %{ "addss $dst, $src" %} 2056 ins_cost(150); 2057 ins_encode %{ 2058 __ addss($dst$$XMMRegister, $src$$Address); 2059 %} 2060 ins_pipe(pipe_slow); 2061 %} 2062 2063 instruct addF_imm(regF dst, immF con) %{ 2064 predicate((UseSSE>=1) && (UseAVX == 0)); 2065 match(Set dst (AddF dst con)); 2066 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2067 ins_cost(150); 2068 ins_encode %{ 2069 __ addss($dst$$XMMRegister, $constantaddress($con)); 2070 %} 2071 ins_pipe(pipe_slow); 2072 %} 2073 2074 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{ 2075 predicate(UseAVX > 0); 2076 match(Set dst (AddF src1 src2)); 2077 2078 format %{ "vaddss $dst, $src1, $src2" %} 2079 ins_cost(150); 2080 ins_encode %{ 2081 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2082 %} 2083 ins_pipe(pipe_slow); 2084 %} 2085 2086 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{ 2087 predicate(UseAVX > 0); 2088 match(Set dst (AddF src1 (LoadF src2))); 2089 2090 format %{ "vaddss $dst, $src1, $src2" %} 2091 ins_cost(150); 2092 ins_encode %{ 2093 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2094 %} 2095 ins_pipe(pipe_slow); 2096 %} 2097 2098 instruct addF_reg_imm(regF dst, regF src, immF con) %{ 2099 predicate(UseAVX > 0); 2100 match(Set dst (AddF src con)); 2101 2102 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2103 ins_cost(150); 2104 ins_encode %{ 2105 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2106 %} 2107 ins_pipe(pipe_slow); 2108 %} 2109 2110 instruct addD_reg(regD dst, regD src) %{ 2111 predicate((UseSSE>=2) && (UseAVX == 0)); 2112 match(Set dst (AddD dst src)); 2113 2114 format %{ "addsd $dst, $src" %} 2115 ins_cost(150); 2116 ins_encode %{ 2117 __ addsd($dst$$XMMRegister, $src$$XMMRegister); 2118 %} 2119 ins_pipe(pipe_slow); 2120 %} 2121 2122 instruct addD_mem(regD dst, memory src) %{ 2123 predicate((UseSSE>=2) && (UseAVX == 0)); 2124 match(Set dst (AddD dst (LoadD src))); 2125 2126 format %{ "addsd $dst, $src" %} 2127 ins_cost(150); 2128 ins_encode %{ 2129 __ addsd($dst$$XMMRegister, $src$$Address); 2130 %} 2131 ins_pipe(pipe_slow); 2132 %} 2133 2134 instruct addD_imm(regD dst, immD con) %{ 2135 predicate((UseSSE>=2) && (UseAVX == 0)); 2136 match(Set dst (AddD dst con)); 2137 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2138 ins_cost(150); 2139 ins_encode %{ 2140 __ addsd($dst$$XMMRegister, $constantaddress($con)); 2141 %} 2142 ins_pipe(pipe_slow); 2143 %} 2144 2145 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{ 2146 predicate(UseAVX > 0); 2147 match(Set dst (AddD src1 src2)); 2148 2149 format %{ "vaddsd $dst, $src1, $src2" %} 2150 ins_cost(150); 2151 ins_encode %{ 2152 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2153 %} 2154 ins_pipe(pipe_slow); 2155 %} 2156 2157 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{ 2158 predicate(UseAVX > 0); 2159 match(Set dst (AddD src1 (LoadD src2))); 2160 2161 format %{ "vaddsd $dst, $src1, $src2" %} 2162 ins_cost(150); 2163 ins_encode %{ 2164 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2165 %} 2166 ins_pipe(pipe_slow); 2167 %} 2168 2169 instruct addD_reg_imm(regD dst, regD src, immD con) %{ 2170 predicate(UseAVX > 0); 2171 match(Set dst (AddD src con)); 2172 2173 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2174 ins_cost(150); 2175 ins_encode %{ 2176 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2177 %} 2178 ins_pipe(pipe_slow); 2179 %} 2180 2181 instruct subF_reg(regF dst, regF src) %{ 2182 predicate((UseSSE>=1) && (UseAVX == 0)); 2183 match(Set dst (SubF dst src)); 2184 2185 format %{ "subss $dst, $src" %} 2186 ins_cost(150); 2187 ins_encode %{ 2188 __ subss($dst$$XMMRegister, $src$$XMMRegister); 2189 %} 2190 ins_pipe(pipe_slow); 2191 %} 2192 2193 instruct subF_mem(regF dst, memory src) %{ 2194 predicate((UseSSE>=1) && (UseAVX == 0)); 2195 match(Set dst (SubF dst (LoadF src))); 2196 2197 format %{ "subss $dst, $src" %} 2198 ins_cost(150); 2199 ins_encode %{ 2200 __ subss($dst$$XMMRegister, $src$$Address); 2201 %} 2202 ins_pipe(pipe_slow); 2203 %} 2204 2205 instruct subF_imm(regF dst, immF con) %{ 2206 predicate((UseSSE>=1) && (UseAVX == 0)); 2207 match(Set dst (SubF dst con)); 2208 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2209 ins_cost(150); 2210 ins_encode %{ 2211 __ subss($dst$$XMMRegister, $constantaddress($con)); 2212 %} 2213 ins_pipe(pipe_slow); 2214 %} 2215 2216 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{ 2217 predicate(UseAVX > 0); 2218 match(Set dst (SubF src1 src2)); 2219 2220 format %{ "vsubss $dst, $src1, $src2" %} 2221 ins_cost(150); 2222 ins_encode %{ 2223 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2224 %} 2225 ins_pipe(pipe_slow); 2226 %} 2227 2228 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{ 2229 predicate(UseAVX > 0); 2230 match(Set dst (SubF src1 (LoadF src2))); 2231 2232 format %{ "vsubss $dst, $src1, $src2" %} 2233 ins_cost(150); 2234 ins_encode %{ 2235 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2236 %} 2237 ins_pipe(pipe_slow); 2238 %} 2239 2240 instruct subF_reg_imm(regF dst, regF src, immF con) %{ 2241 predicate(UseAVX > 0); 2242 match(Set dst (SubF src con)); 2243 2244 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2245 ins_cost(150); 2246 ins_encode %{ 2247 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2248 %} 2249 ins_pipe(pipe_slow); 2250 %} 2251 2252 instruct subD_reg(regD dst, regD src) %{ 2253 predicate((UseSSE>=2) && (UseAVX == 0)); 2254 match(Set dst (SubD dst src)); 2255 2256 format %{ "subsd $dst, $src" %} 2257 ins_cost(150); 2258 ins_encode %{ 2259 __ subsd($dst$$XMMRegister, $src$$XMMRegister); 2260 %} 2261 ins_pipe(pipe_slow); 2262 %} 2263 2264 instruct subD_mem(regD dst, memory src) %{ 2265 predicate((UseSSE>=2) && (UseAVX == 0)); 2266 match(Set dst (SubD dst (LoadD src))); 2267 2268 format %{ "subsd $dst, $src" %} 2269 ins_cost(150); 2270 ins_encode %{ 2271 __ subsd($dst$$XMMRegister, $src$$Address); 2272 %} 2273 ins_pipe(pipe_slow); 2274 %} 2275 2276 instruct subD_imm(regD dst, immD con) %{ 2277 predicate((UseSSE>=2) && (UseAVX == 0)); 2278 match(Set dst (SubD dst con)); 2279 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2280 ins_cost(150); 2281 ins_encode %{ 2282 __ subsd($dst$$XMMRegister, $constantaddress($con)); 2283 %} 2284 ins_pipe(pipe_slow); 2285 %} 2286 2287 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{ 2288 predicate(UseAVX > 0); 2289 match(Set dst (SubD src1 src2)); 2290 2291 format %{ "vsubsd $dst, $src1, $src2" %} 2292 ins_cost(150); 2293 ins_encode %{ 2294 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2295 %} 2296 ins_pipe(pipe_slow); 2297 %} 2298 2299 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{ 2300 predicate(UseAVX > 0); 2301 match(Set dst (SubD src1 (LoadD src2))); 2302 2303 format %{ "vsubsd $dst, $src1, $src2" %} 2304 ins_cost(150); 2305 ins_encode %{ 2306 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2307 %} 2308 ins_pipe(pipe_slow); 2309 %} 2310 2311 instruct subD_reg_imm(regD dst, regD src, immD con) %{ 2312 predicate(UseAVX > 0); 2313 match(Set dst (SubD src con)); 2314 2315 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2316 ins_cost(150); 2317 ins_encode %{ 2318 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2319 %} 2320 ins_pipe(pipe_slow); 2321 %} 2322 2323 instruct mulF_reg(regF dst, regF src) %{ 2324 predicate((UseSSE>=1) && (UseAVX == 0)); 2325 match(Set dst (MulF dst src)); 2326 2327 format %{ "mulss $dst, $src" %} 2328 ins_cost(150); 2329 ins_encode %{ 2330 __ mulss($dst$$XMMRegister, $src$$XMMRegister); 2331 %} 2332 ins_pipe(pipe_slow); 2333 %} 2334 2335 instruct mulF_mem(regF dst, memory src) %{ 2336 predicate((UseSSE>=1) && (UseAVX == 0)); 2337 match(Set dst (MulF dst (LoadF src))); 2338 2339 format %{ "mulss $dst, $src" %} 2340 ins_cost(150); 2341 ins_encode %{ 2342 __ mulss($dst$$XMMRegister, $src$$Address); 2343 %} 2344 ins_pipe(pipe_slow); 2345 %} 2346 2347 instruct mulF_imm(regF dst, immF con) %{ 2348 predicate((UseSSE>=1) && (UseAVX == 0)); 2349 match(Set dst (MulF dst con)); 2350 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2351 ins_cost(150); 2352 ins_encode %{ 2353 __ mulss($dst$$XMMRegister, $constantaddress($con)); 2354 %} 2355 ins_pipe(pipe_slow); 2356 %} 2357 2358 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{ 2359 predicate(UseAVX > 0); 2360 match(Set dst (MulF src1 src2)); 2361 2362 format %{ "vmulss $dst, $src1, $src2" %} 2363 ins_cost(150); 2364 ins_encode %{ 2365 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2366 %} 2367 ins_pipe(pipe_slow); 2368 %} 2369 2370 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{ 2371 predicate(UseAVX > 0); 2372 match(Set dst (MulF src1 (LoadF src2))); 2373 2374 format %{ "vmulss $dst, $src1, $src2" %} 2375 ins_cost(150); 2376 ins_encode %{ 2377 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2378 %} 2379 ins_pipe(pipe_slow); 2380 %} 2381 2382 instruct mulF_reg_imm(regF dst, regF src, immF con) %{ 2383 predicate(UseAVX > 0); 2384 match(Set dst (MulF src con)); 2385 2386 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2387 ins_cost(150); 2388 ins_encode %{ 2389 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2390 %} 2391 ins_pipe(pipe_slow); 2392 %} 2393 2394 instruct mulD_reg(regD dst, regD src) %{ 2395 predicate((UseSSE>=2) && (UseAVX == 0)); 2396 match(Set dst (MulD dst src)); 2397 2398 format %{ "mulsd $dst, $src" %} 2399 ins_cost(150); 2400 ins_encode %{ 2401 __ mulsd($dst$$XMMRegister, $src$$XMMRegister); 2402 %} 2403 ins_pipe(pipe_slow); 2404 %} 2405 2406 instruct mulD_mem(regD dst, memory src) %{ 2407 predicate((UseSSE>=2) && (UseAVX == 0)); 2408 match(Set dst (MulD dst (LoadD src))); 2409 2410 format %{ "mulsd $dst, $src" %} 2411 ins_cost(150); 2412 ins_encode %{ 2413 __ mulsd($dst$$XMMRegister, $src$$Address); 2414 %} 2415 ins_pipe(pipe_slow); 2416 %} 2417 2418 instruct mulD_imm(regD dst, immD con) %{ 2419 predicate((UseSSE>=2) && (UseAVX == 0)); 2420 match(Set dst (MulD dst con)); 2421 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2422 ins_cost(150); 2423 ins_encode %{ 2424 __ mulsd($dst$$XMMRegister, $constantaddress($con)); 2425 %} 2426 ins_pipe(pipe_slow); 2427 %} 2428 2429 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{ 2430 predicate(UseAVX > 0); 2431 match(Set dst (MulD src1 src2)); 2432 2433 format %{ "vmulsd $dst, $src1, $src2" %} 2434 ins_cost(150); 2435 ins_encode %{ 2436 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2437 %} 2438 ins_pipe(pipe_slow); 2439 %} 2440 2441 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{ 2442 predicate(UseAVX > 0); 2443 match(Set dst (MulD src1 (LoadD src2))); 2444 2445 format %{ "vmulsd $dst, $src1, $src2" %} 2446 ins_cost(150); 2447 ins_encode %{ 2448 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2449 %} 2450 ins_pipe(pipe_slow); 2451 %} 2452 2453 instruct mulD_reg_imm(regD dst, regD src, immD con) %{ 2454 predicate(UseAVX > 0); 2455 match(Set dst (MulD src con)); 2456 2457 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2458 ins_cost(150); 2459 ins_encode %{ 2460 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2461 %} 2462 ins_pipe(pipe_slow); 2463 %} 2464 2465 instruct divF_reg(regF dst, regF src) %{ 2466 predicate((UseSSE>=1) && (UseAVX == 0)); 2467 match(Set dst (DivF dst src)); 2468 2469 format %{ "divss $dst, $src" %} 2470 ins_cost(150); 2471 ins_encode %{ 2472 __ divss($dst$$XMMRegister, $src$$XMMRegister); 2473 %} 2474 ins_pipe(pipe_slow); 2475 %} 2476 2477 instruct divF_mem(regF dst, memory src) %{ 2478 predicate((UseSSE>=1) && (UseAVX == 0)); 2479 match(Set dst (DivF dst (LoadF src))); 2480 2481 format %{ "divss $dst, $src" %} 2482 ins_cost(150); 2483 ins_encode %{ 2484 __ divss($dst$$XMMRegister, $src$$Address); 2485 %} 2486 ins_pipe(pipe_slow); 2487 %} 2488 2489 instruct divF_imm(regF dst, immF con) %{ 2490 predicate((UseSSE>=1) && (UseAVX == 0)); 2491 match(Set dst (DivF dst con)); 2492 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2493 ins_cost(150); 2494 ins_encode %{ 2495 __ divss($dst$$XMMRegister, $constantaddress($con)); 2496 %} 2497 ins_pipe(pipe_slow); 2498 %} 2499 2500 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{ 2501 predicate(UseAVX > 0); 2502 match(Set dst (DivF src1 src2)); 2503 2504 format %{ "vdivss $dst, $src1, $src2" %} 2505 ins_cost(150); 2506 ins_encode %{ 2507 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2508 %} 2509 ins_pipe(pipe_slow); 2510 %} 2511 2512 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{ 2513 predicate(UseAVX > 0); 2514 match(Set dst (DivF src1 (LoadF src2))); 2515 2516 format %{ "vdivss $dst, $src1, $src2" %} 2517 ins_cost(150); 2518 ins_encode %{ 2519 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2520 %} 2521 ins_pipe(pipe_slow); 2522 %} 2523 2524 instruct divF_reg_imm(regF dst, regF src, immF con) %{ 2525 predicate(UseAVX > 0); 2526 match(Set dst (DivF src con)); 2527 2528 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2529 ins_cost(150); 2530 ins_encode %{ 2531 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2532 %} 2533 ins_pipe(pipe_slow); 2534 %} 2535 2536 instruct divD_reg(regD dst, regD src) %{ 2537 predicate((UseSSE>=2) && (UseAVX == 0)); 2538 match(Set dst (DivD dst src)); 2539 2540 format %{ "divsd $dst, $src" %} 2541 ins_cost(150); 2542 ins_encode %{ 2543 __ divsd($dst$$XMMRegister, $src$$XMMRegister); 2544 %} 2545 ins_pipe(pipe_slow); 2546 %} 2547 2548 instruct divD_mem(regD dst, memory src) %{ 2549 predicate((UseSSE>=2) && (UseAVX == 0)); 2550 match(Set dst (DivD dst (LoadD src))); 2551 2552 format %{ "divsd $dst, $src" %} 2553 ins_cost(150); 2554 ins_encode %{ 2555 __ divsd($dst$$XMMRegister, $src$$Address); 2556 %} 2557 ins_pipe(pipe_slow); 2558 %} 2559 2560 instruct divD_imm(regD dst, immD con) %{ 2561 predicate((UseSSE>=2) && (UseAVX == 0)); 2562 match(Set dst (DivD dst con)); 2563 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2564 ins_cost(150); 2565 ins_encode %{ 2566 __ divsd($dst$$XMMRegister, $constantaddress($con)); 2567 %} 2568 ins_pipe(pipe_slow); 2569 %} 2570 2571 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{ 2572 predicate(UseAVX > 0); 2573 match(Set dst (DivD src1 src2)); 2574 2575 format %{ "vdivsd $dst, $src1, $src2" %} 2576 ins_cost(150); 2577 ins_encode %{ 2578 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2579 %} 2580 ins_pipe(pipe_slow); 2581 %} 2582 2583 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{ 2584 predicate(UseAVX > 0); 2585 match(Set dst (DivD src1 (LoadD src2))); 2586 2587 format %{ "vdivsd $dst, $src1, $src2" %} 2588 ins_cost(150); 2589 ins_encode %{ 2590 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2591 %} 2592 ins_pipe(pipe_slow); 2593 %} 2594 2595 instruct divD_reg_imm(regD dst, regD src, immD con) %{ 2596 predicate(UseAVX > 0); 2597 match(Set dst (DivD src con)); 2598 2599 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2600 ins_cost(150); 2601 ins_encode %{ 2602 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2603 %} 2604 ins_pipe(pipe_slow); 2605 %} 2606 2607 instruct absF_reg(regF dst) %{ 2608 predicate((UseSSE>=1) && (UseAVX == 0)); 2609 match(Set dst (AbsF dst)); 2610 ins_cost(150); 2611 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %} 2612 ins_encode %{ 2613 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask())); 2614 %} 2615 ins_pipe(pipe_slow); 2616 %} 2617 2618 instruct absF_reg_reg(regF dst, regF src) %{ 2619 predicate(UseAVX > 0); 2620 match(Set dst (AbsF src)); 2621 ins_cost(150); 2622 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 2623 ins_encode %{ 2624 int vector_len = 0; 2625 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 2626 ExternalAddress(float_signmask()), vector_len); 2627 %} 2628 ins_pipe(pipe_slow); 2629 %} 2630 2631 instruct absD_reg(regD dst) %{ 2632 predicate((UseSSE>=2) && (UseAVX == 0)); 2633 match(Set dst (AbsD dst)); 2634 ins_cost(150); 2635 format %{ "andpd $dst, [0x7fffffffffffffff]\t" 2636 "# abs double by sign masking" %} 2637 ins_encode %{ 2638 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask())); 2639 %} 2640 ins_pipe(pipe_slow); 2641 %} 2642 2643 instruct absD_reg_reg(regD dst, regD src) %{ 2644 predicate(UseAVX > 0); 2645 match(Set dst (AbsD src)); 2646 ins_cost(150); 2647 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 2648 "# abs double by sign masking" %} 2649 ins_encode %{ 2650 int vector_len = 0; 2651 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 2652 ExternalAddress(double_signmask()), vector_len); 2653 %} 2654 ins_pipe(pipe_slow); 2655 %} 2656 2657 instruct negF_reg(regF dst) %{ 2658 predicate((UseSSE>=1) && (UseAVX == 0)); 2659 match(Set dst (NegF dst)); 2660 ins_cost(150); 2661 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %} 2662 ins_encode %{ 2663 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip())); 2664 %} 2665 ins_pipe(pipe_slow); 2666 %} 2667 2668 instruct negF_reg_reg(regF dst, regF src) %{ 2669 predicate(UseAVX > 0); 2670 match(Set dst (NegF src)); 2671 ins_cost(150); 2672 format %{ "vxorps $dst, $src, [0x80000000]\t# neg float by sign flipping" %} 2673 ins_encode %{ 2674 int vector_len = 0; 2675 __ vxorps($dst$$XMMRegister, $src$$XMMRegister, 2676 ExternalAddress(float_signflip()), vector_len); 2677 %} 2678 ins_pipe(pipe_slow); 2679 %} 2680 2681 instruct negD_reg(regD dst) %{ 2682 predicate((UseSSE>=2) && (UseAVX == 0)); 2683 match(Set dst (NegD dst)); 2684 ins_cost(150); 2685 format %{ "xorpd $dst, [0x8000000000000000]\t" 2686 "# neg double by sign flipping" %} 2687 ins_encode %{ 2688 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip())); 2689 %} 2690 ins_pipe(pipe_slow); 2691 %} 2692 2693 instruct negD_reg_reg(regD dst, regD src) %{ 2694 predicate(UseAVX > 0); 2695 match(Set dst (NegD src)); 2696 ins_cost(150); 2697 format %{ "vxorpd $dst, $src, [0x8000000000000000]\t" 2698 "# neg double by sign flipping" %} 2699 ins_encode %{ 2700 int vector_len = 0; 2701 __ vxorpd($dst$$XMMRegister, $src$$XMMRegister, 2702 ExternalAddress(double_signflip()), vector_len); 2703 %} 2704 ins_pipe(pipe_slow); 2705 %} 2706 2707 instruct sqrtF_reg(regF dst, regF src) %{ 2708 predicate(UseSSE>=1); 2709 match(Set dst (ConvD2F (SqrtD (ConvF2D src)))); 2710 2711 format %{ "sqrtss $dst, $src" %} 2712 ins_cost(150); 2713 ins_encode %{ 2714 __ sqrtss($dst$$XMMRegister, $src$$XMMRegister); 2715 %} 2716 ins_pipe(pipe_slow); 2717 %} 2718 2719 instruct sqrtF_mem(regF dst, memory src) %{ 2720 predicate(UseSSE>=1); 2721 match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF src))))); 2722 2723 format %{ "sqrtss $dst, $src" %} 2724 ins_cost(150); 2725 ins_encode %{ 2726 __ sqrtss($dst$$XMMRegister, $src$$Address); 2727 %} 2728 ins_pipe(pipe_slow); 2729 %} 2730 2731 instruct sqrtF_imm(regF dst, immF con) %{ 2732 predicate(UseSSE>=1); 2733 match(Set dst (ConvD2F (SqrtD (ConvF2D con)))); 2734 format %{ "sqrtss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2735 ins_cost(150); 2736 ins_encode %{ 2737 __ sqrtss($dst$$XMMRegister, $constantaddress($con)); 2738 %} 2739 ins_pipe(pipe_slow); 2740 %} 2741 2742 instruct sqrtD_reg(regD dst, regD src) %{ 2743 predicate(UseSSE>=2); 2744 match(Set dst (SqrtD src)); 2745 2746 format %{ "sqrtsd $dst, $src" %} 2747 ins_cost(150); 2748 ins_encode %{ 2749 __ sqrtsd($dst$$XMMRegister, $src$$XMMRegister); 2750 %} 2751 ins_pipe(pipe_slow); 2752 %} 2753 2754 instruct sqrtD_mem(regD dst, memory src) %{ 2755 predicate(UseSSE>=2); 2756 match(Set dst (SqrtD (LoadD src))); 2757 2758 format %{ "sqrtsd $dst, $src" %} 2759 ins_cost(150); 2760 ins_encode %{ 2761 __ sqrtsd($dst$$XMMRegister, $src$$Address); 2762 %} 2763 ins_pipe(pipe_slow); 2764 %} 2765 2766 instruct sqrtD_imm(regD dst, immD con) %{ 2767 predicate(UseSSE>=2); 2768 match(Set dst (SqrtD con)); 2769 format %{ "sqrtsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2770 ins_cost(150); 2771 ins_encode %{ 2772 __ sqrtsd($dst$$XMMRegister, $constantaddress($con)); 2773 %} 2774 ins_pipe(pipe_slow); 2775 %} 2776 2777 // ====================VECTOR INSTRUCTIONS===================================== 2778 2779 // Load vectors (4 bytes long) 2780 instruct loadV4(vecS dst, memory mem) %{ 2781 predicate(n->as_LoadVector()->memory_size() == 4); 2782 match(Set dst (LoadVector mem)); 2783 ins_cost(125); 2784 format %{ "movd $dst,$mem\t! load vector (4 bytes)" %} 2785 ins_encode %{ 2786 __ movdl($dst$$XMMRegister, $mem$$Address); 2787 %} 2788 ins_pipe( pipe_slow ); 2789 %} 2790 2791 // Load vectors (8 bytes long) 2792 instruct loadV8(vecD dst, memory mem) %{ 2793 predicate(n->as_LoadVector()->memory_size() == 8); 2794 match(Set dst (LoadVector mem)); 2795 ins_cost(125); 2796 format %{ "movq $dst,$mem\t! load vector (8 bytes)" %} 2797 ins_encode %{ 2798 __ movq($dst$$XMMRegister, $mem$$Address); 2799 %} 2800 ins_pipe( pipe_slow ); 2801 %} 2802 2803 // Load vectors (16 bytes long) 2804 instruct loadV16(vecX dst, memory mem) %{ 2805 predicate(n->as_LoadVector()->memory_size() == 16); 2806 match(Set dst (LoadVector mem)); 2807 ins_cost(125); 2808 format %{ "movdqu $dst,$mem\t! load vector (16 bytes)" %} 2809 ins_encode %{ 2810 __ movdqu($dst$$XMMRegister, $mem$$Address); 2811 %} 2812 ins_pipe( pipe_slow ); 2813 %} 2814 2815 // Load vectors (32 bytes long) 2816 instruct loadV32(vecY dst, memory mem) %{ 2817 predicate(n->as_LoadVector()->memory_size() == 32); 2818 match(Set dst (LoadVector mem)); 2819 ins_cost(125); 2820 format %{ "vmovdqu $dst,$mem\t! load vector (32 bytes)" %} 2821 ins_encode %{ 2822 __ vmovdqu($dst$$XMMRegister, $mem$$Address); 2823 %} 2824 ins_pipe( pipe_slow ); 2825 %} 2826 2827 // Load vectors (64 bytes long) 2828 instruct loadV64(vecZ dst, memory mem) %{ 2829 predicate(n->as_LoadVector()->memory_size() == 64); 2830 match(Set dst (LoadVector mem)); 2831 ins_cost(125); 2832 format %{ "vmovdqu $dst k0,$mem\t! load vector (64 bytes)" %} 2833 ins_encode %{ 2834 int vector_len = 2; 2835 __ evmovdqu($dst$$XMMRegister, $mem$$Address, vector_len); 2836 %} 2837 ins_pipe( pipe_slow ); 2838 %} 2839 2840 // Store vectors 2841 instruct storeV4(memory mem, vecS src) %{ 2842 predicate(n->as_StoreVector()->memory_size() == 4); 2843 match(Set mem (StoreVector mem src)); 2844 ins_cost(145); 2845 format %{ "movd $mem,$src\t! store vector (4 bytes)" %} 2846 ins_encode %{ 2847 __ movdl($mem$$Address, $src$$XMMRegister); 2848 %} 2849 ins_pipe( pipe_slow ); 2850 %} 2851 2852 instruct storeV8(memory mem, vecD src) %{ 2853 predicate(n->as_StoreVector()->memory_size() == 8); 2854 match(Set mem (StoreVector mem src)); 2855 ins_cost(145); 2856 format %{ "movq $mem,$src\t! store vector (8 bytes)" %} 2857 ins_encode %{ 2858 __ movq($mem$$Address, $src$$XMMRegister); 2859 %} 2860 ins_pipe( pipe_slow ); 2861 %} 2862 2863 instruct storeV16(memory mem, vecX src) %{ 2864 predicate(n->as_StoreVector()->memory_size() == 16); 2865 match(Set mem (StoreVector mem src)); 2866 ins_cost(145); 2867 format %{ "movdqu $mem,$src\t! store vector (16 bytes)" %} 2868 ins_encode %{ 2869 __ movdqu($mem$$Address, $src$$XMMRegister); 2870 %} 2871 ins_pipe( pipe_slow ); 2872 %} 2873 2874 instruct storeV32(memory mem, vecY src) %{ 2875 predicate(n->as_StoreVector()->memory_size() == 32); 2876 match(Set mem (StoreVector mem src)); 2877 ins_cost(145); 2878 format %{ "vmovdqu $mem,$src\t! store vector (32 bytes)" %} 2879 ins_encode %{ 2880 __ vmovdqu($mem$$Address, $src$$XMMRegister); 2881 %} 2882 ins_pipe( pipe_slow ); 2883 %} 2884 2885 instruct storeV64(memory mem, vecZ src) %{ 2886 predicate(n->as_StoreVector()->memory_size() == 64); 2887 match(Set mem (StoreVector mem src)); 2888 ins_cost(145); 2889 format %{ "vmovdqu $mem k0,$src\t! store vector (64 bytes)" %} 2890 ins_encode %{ 2891 int vector_len = 2; 2892 __ evmovdqu($mem$$Address, $src$$XMMRegister, vector_len); 2893 %} 2894 ins_pipe( pipe_slow ); 2895 %} 2896 2897 // Replicate byte scalar to be vector 2898 instruct Repl4B(vecS dst, rRegI src) %{ 2899 predicate(n->as_Vector()->length() == 4); 2900 match(Set dst (ReplicateB src)); 2901 format %{ "movd $dst,$src\n\t" 2902 "punpcklbw $dst,$dst\n\t" 2903 "pshuflw $dst,$dst,0x00\t! replicate4B" %} 2904 ins_encode %{ 2905 __ movdl($dst$$XMMRegister, $src$$Register); 2906 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 2907 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2908 %} 2909 ins_pipe( pipe_slow ); 2910 %} 2911 2912 instruct Repl8B(vecD dst, rRegI src) %{ 2913 predicate(n->as_Vector()->length() == 8); 2914 match(Set dst (ReplicateB src)); 2915 format %{ "movd $dst,$src\n\t" 2916 "punpcklbw $dst,$dst\n\t" 2917 "pshuflw $dst,$dst,0x00\t! replicate8B" %} 2918 ins_encode %{ 2919 __ movdl($dst$$XMMRegister, $src$$Register); 2920 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 2921 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2922 %} 2923 ins_pipe( pipe_slow ); 2924 %} 2925 2926 instruct Repl16B(vecX dst, rRegI src) %{ 2927 predicate(n->as_Vector()->length() == 16); 2928 match(Set dst (ReplicateB src)); 2929 format %{ "movd $dst,$src\n\t" 2930 "punpcklbw $dst,$dst\n\t" 2931 "pshuflw $dst,$dst,0x00\n\t" 2932 "punpcklqdq $dst,$dst\t! replicate16B" %} 2933 ins_encode %{ 2934 __ movdl($dst$$XMMRegister, $src$$Register); 2935 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 2936 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2937 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2938 %} 2939 ins_pipe( pipe_slow ); 2940 %} 2941 2942 instruct Repl32B(vecY dst, rRegI src) %{ 2943 predicate(n->as_Vector()->length() == 32); 2944 match(Set dst (ReplicateB src)); 2945 format %{ "movd $dst,$src\n\t" 2946 "punpcklbw $dst,$dst\n\t" 2947 "pshuflw $dst,$dst,0x00\n\t" 2948 "punpcklqdq $dst,$dst\n\t" 2949 "vinserti128h $dst,$dst,$dst\t! replicate32B" %} 2950 ins_encode %{ 2951 __ movdl($dst$$XMMRegister, $src$$Register); 2952 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 2953 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2954 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2955 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2956 %} 2957 ins_pipe( pipe_slow ); 2958 %} 2959 2960 instruct Repl64B(vecZ dst, rRegI src) %{ 2961 predicate(n->as_Vector()->length() == 64); 2962 match(Set dst (ReplicateB src)); 2963 format %{ "movd $dst,$src\n\t" 2964 "punpcklbw $dst,$dst\n\t" 2965 "pshuflw $dst,$dst,0x00\n\t" 2966 "punpcklqdq $dst,$dst\n\t" 2967 "vinserti128h $dst,$dst,$dst\t! lower replicate32B\n\t" 2968 "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate632B" %} 2969 ins_encode %{ 2970 __ movdl($dst$$XMMRegister, $src$$Register); 2971 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 2972 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2973 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2974 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2975 __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2976 %} 2977 ins_pipe( pipe_slow ); 2978 %} 2979 2980 // Replicate byte scalar immediate to be vector by loading from const table. 2981 instruct Repl4B_imm(vecS dst, immI con) %{ 2982 predicate(n->as_Vector()->length() == 4); 2983 match(Set dst (ReplicateB con)); 2984 format %{ "movdl $dst,[$constantaddress]\t! replicate4B($con)" %} 2985 ins_encode %{ 2986 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 1))); 2987 %} 2988 ins_pipe( pipe_slow ); 2989 %} 2990 2991 instruct Repl8B_imm(vecD dst, immI con) %{ 2992 predicate(n->as_Vector()->length() == 8); 2993 match(Set dst (ReplicateB con)); 2994 format %{ "movq $dst,[$constantaddress]\t! replicate8B($con)" %} 2995 ins_encode %{ 2996 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 2997 %} 2998 ins_pipe( pipe_slow ); 2999 %} 3000 3001 instruct Repl16B_imm(vecX dst, immI con) %{ 3002 predicate(n->as_Vector()->length() == 16); 3003 match(Set dst (ReplicateB con)); 3004 format %{ "movq $dst,[$constantaddress]\n\t" 3005 "punpcklqdq $dst,$dst\t! replicate16B($con)" %} 3006 ins_encode %{ 3007 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3008 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3009 %} 3010 ins_pipe( pipe_slow ); 3011 %} 3012 3013 instruct Repl32B_imm(vecY dst, immI con) %{ 3014 predicate(n->as_Vector()->length() == 32); 3015 match(Set dst (ReplicateB con)); 3016 format %{ "movq $dst,[$constantaddress]\n\t" 3017 "punpcklqdq $dst,$dst\n\t" 3018 "vinserti128h $dst,$dst,$dst\t! lreplicate32B($con)" %} 3019 ins_encode %{ 3020 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3021 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3022 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3023 %} 3024 ins_pipe( pipe_slow ); 3025 %} 3026 3027 instruct Repl64B_imm(vecZ dst, immI con) %{ 3028 predicate(n->as_Vector()->length() == 64); 3029 match(Set dst (ReplicateB con)); 3030 format %{ "movq $dst,[$constantaddress]\n\t" 3031 "punpcklqdq $dst,$dst\n\t" 3032 "vinserti128h $dst,$dst,$dst\t! lower replicate32B($con)\n\t" 3033 "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate32B($con)" %} 3034 ins_encode %{ 3035 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3036 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3037 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3038 __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3039 %} 3040 ins_pipe( pipe_slow ); 3041 %} 3042 3043 // Replicate byte scalar zero to be vector 3044 instruct Repl4B_zero(vecS dst, immI0 zero) %{ 3045 predicate(n->as_Vector()->length() == 4); 3046 match(Set dst (ReplicateB zero)); 3047 format %{ "pxor $dst,$dst\t! replicate4B zero" %} 3048 ins_encode %{ 3049 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3050 %} 3051 ins_pipe( fpu_reg_reg ); 3052 %} 3053 3054 instruct Repl8B_zero(vecD dst, immI0 zero) %{ 3055 predicate(n->as_Vector()->length() == 8); 3056 match(Set dst (ReplicateB zero)); 3057 format %{ "pxor $dst,$dst\t! replicate8B zero" %} 3058 ins_encode %{ 3059 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3060 %} 3061 ins_pipe( fpu_reg_reg ); 3062 %} 3063 3064 instruct Repl16B_zero(vecX dst, immI0 zero) %{ 3065 predicate(n->as_Vector()->length() == 16); 3066 match(Set dst (ReplicateB zero)); 3067 format %{ "pxor $dst,$dst\t! replicate16B zero" %} 3068 ins_encode %{ 3069 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3070 %} 3071 ins_pipe( fpu_reg_reg ); 3072 %} 3073 3074 instruct Repl32B_zero(vecY dst, immI0 zero) %{ 3075 predicate(n->as_Vector()->length() == 32); 3076 match(Set dst (ReplicateB zero)); 3077 format %{ "vpxor $dst,$dst,$dst\t! replicate32B zero" %} 3078 ins_encode %{ 3079 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3080 int vector_len = 1; 3081 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3082 %} 3083 ins_pipe( fpu_reg_reg ); 3084 %} 3085 3086 instruct Repl64B_zero(vecZ dst, immI0 zero) %{ 3087 predicate(n->as_Vector()->length() == 64); 3088 match(Set dst (ReplicateB zero)); 3089 format %{ "vpxor $dst k0,$dst,$dst\t! replicate64B zero" %} 3090 ins_encode %{ 3091 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 3092 int vector_len = 2; 3093 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3094 %} 3095 ins_pipe( fpu_reg_reg ); 3096 %} 3097 3098 // Replicate char/short (2 byte) scalar to be vector 3099 instruct Repl2S(vecS dst, rRegI src) %{ 3100 predicate(n->as_Vector()->length() == 2); 3101 match(Set dst (ReplicateS src)); 3102 format %{ "movd $dst,$src\n\t" 3103 "pshuflw $dst,$dst,0x00\t! replicate2S" %} 3104 ins_encode %{ 3105 __ movdl($dst$$XMMRegister, $src$$Register); 3106 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3107 %} 3108 ins_pipe( fpu_reg_reg ); 3109 %} 3110 3111 instruct Repl4S(vecD dst, rRegI src) %{ 3112 predicate(n->as_Vector()->length() == 4); 3113 match(Set dst (ReplicateS src)); 3114 format %{ "movd $dst,$src\n\t" 3115 "pshuflw $dst,$dst,0x00\t! replicate4S" %} 3116 ins_encode %{ 3117 __ movdl($dst$$XMMRegister, $src$$Register); 3118 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3119 %} 3120 ins_pipe( fpu_reg_reg ); 3121 %} 3122 3123 instruct Repl8S(vecX dst, rRegI src) %{ 3124 predicate(n->as_Vector()->length() == 8); 3125 match(Set dst (ReplicateS src)); 3126 format %{ "movd $dst,$src\n\t" 3127 "pshuflw $dst,$dst,0x00\n\t" 3128 "punpcklqdq $dst,$dst\t! replicate8S" %} 3129 ins_encode %{ 3130 __ movdl($dst$$XMMRegister, $src$$Register); 3131 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3132 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3133 %} 3134 ins_pipe( pipe_slow ); 3135 %} 3136 3137 instruct Repl16S(vecY dst, rRegI src) %{ 3138 predicate(n->as_Vector()->length() == 16); 3139 match(Set dst (ReplicateS src)); 3140 format %{ "movd $dst,$src\n\t" 3141 "pshuflw $dst,$dst,0x00\n\t" 3142 "punpcklqdq $dst,$dst\n\t" 3143 "vinserti128h $dst,$dst,$dst\t! replicate16S" %} 3144 ins_encode %{ 3145 __ movdl($dst$$XMMRegister, $src$$Register); 3146 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3147 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3148 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3149 %} 3150 ins_pipe( pipe_slow ); 3151 %} 3152 3153 instruct Repl32S(vecZ dst, rRegI src) %{ 3154 predicate(n->as_Vector()->length() == 32); 3155 match(Set dst (ReplicateS src)); 3156 format %{ "movd $dst,$src\n\t" 3157 "pshuflw $dst,$dst,0x00\n\t" 3158 "punpcklqdq $dst,$dst\n\t" 3159 "vinserti128h $dst,$dst,$dst\t! lower replicate16S\n\t" 3160 "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate16S" %} 3161 ins_encode %{ 3162 __ movdl($dst$$XMMRegister, $src$$Register); 3163 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3164 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3165 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3166 __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3167 %} 3168 ins_pipe( pipe_slow ); 3169 %} 3170 3171 // Replicate char/short (2 byte) scalar immediate to be vector by loading from const table. 3172 instruct Repl2S_imm(vecS dst, immI con) %{ 3173 predicate(n->as_Vector()->length() == 2); 3174 match(Set dst (ReplicateS con)); 3175 format %{ "movdl $dst,[$constantaddress]\t! replicate2S($con)" %} 3176 ins_encode %{ 3177 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 2))); 3178 %} 3179 ins_pipe( fpu_reg_reg ); 3180 %} 3181 3182 instruct Repl4S_imm(vecD dst, immI con) %{ 3183 predicate(n->as_Vector()->length() == 4); 3184 match(Set dst (ReplicateS con)); 3185 format %{ "movq $dst,[$constantaddress]\t! replicate4S($con)" %} 3186 ins_encode %{ 3187 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3188 %} 3189 ins_pipe( fpu_reg_reg ); 3190 %} 3191 3192 instruct Repl8S_imm(vecX dst, immI con) %{ 3193 predicate(n->as_Vector()->length() == 8); 3194 match(Set dst (ReplicateS con)); 3195 format %{ "movq $dst,[$constantaddress]\n\t" 3196 "punpcklqdq $dst,$dst\t! replicate8S($con)" %} 3197 ins_encode %{ 3198 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3199 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3200 %} 3201 ins_pipe( pipe_slow ); 3202 %} 3203 3204 instruct Repl16S_imm(vecY dst, immI con) %{ 3205 predicate(n->as_Vector()->length() == 16); 3206 match(Set dst (ReplicateS con)); 3207 format %{ "movq $dst,[$constantaddress]\n\t" 3208 "punpcklqdq $dst,$dst\n\t" 3209 "vinserti128h $dst,$dst,$dst\t! replicate16S($con)" %} 3210 ins_encode %{ 3211 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3212 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3213 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3214 %} 3215 ins_pipe( pipe_slow ); 3216 %} 3217 3218 instruct Repl32S_imm(vecZ dst, immI con) %{ 3219 predicate(n->as_Vector()->length() == 32); 3220 match(Set dst (ReplicateS con)); 3221 format %{ "movq $dst,[$constantaddress]\n\t" 3222 "punpcklqdq $dst,$dst\n\t" 3223 "vinserti128h $dst,$dst,$dst\t! lower replicate16S($con)\n\t" 3224 "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate16S($con)" %} 3225 ins_encode %{ 3226 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3227 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3228 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3229 __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3230 %} 3231 ins_pipe( pipe_slow ); 3232 %} 3233 3234 // Replicate char/short (2 byte) scalar zero to be vector 3235 instruct Repl2S_zero(vecS dst, immI0 zero) %{ 3236 predicate(n->as_Vector()->length() == 2); 3237 match(Set dst (ReplicateS zero)); 3238 format %{ "pxor $dst,$dst\t! replicate2S zero" %} 3239 ins_encode %{ 3240 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3241 %} 3242 ins_pipe( fpu_reg_reg ); 3243 %} 3244 3245 instruct Repl4S_zero(vecD dst, immI0 zero) %{ 3246 predicate(n->as_Vector()->length() == 4); 3247 match(Set dst (ReplicateS zero)); 3248 format %{ "pxor $dst,$dst\t! replicate4S zero" %} 3249 ins_encode %{ 3250 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3251 %} 3252 ins_pipe( fpu_reg_reg ); 3253 %} 3254 3255 instruct Repl8S_zero(vecX dst, immI0 zero) %{ 3256 predicate(n->as_Vector()->length() == 8); 3257 match(Set dst (ReplicateS zero)); 3258 format %{ "pxor $dst,$dst\t! replicate8S zero" %} 3259 ins_encode %{ 3260 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3261 %} 3262 ins_pipe( fpu_reg_reg ); 3263 %} 3264 3265 instruct Repl16S_zero(vecY dst, immI0 zero) %{ 3266 predicate(n->as_Vector()->length() == 16); 3267 match(Set dst (ReplicateS zero)); 3268 format %{ "vpxor $dst,$dst,$dst\t! replicate16S zero" %} 3269 ins_encode %{ 3270 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3271 int vector_len = 1; 3272 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3273 %} 3274 ins_pipe( fpu_reg_reg ); 3275 %} 3276 3277 instruct Repl32S_zero(vecZ dst, immI0 zero) %{ 3278 predicate(n->as_Vector()->length() == 32); 3279 match(Set dst (ReplicateS zero)); 3280 format %{ "vpxor $dst k0,$dst,$dst\t! replicate32S zero" %} 3281 ins_encode %{ 3282 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 3283 int vector_len = 2; 3284 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3285 %} 3286 ins_pipe( fpu_reg_reg ); 3287 %} 3288 3289 // Replicate integer (4 byte) scalar to be vector 3290 instruct Repl2I(vecD dst, rRegI src) %{ 3291 predicate(n->as_Vector()->length() == 2); 3292 match(Set dst (ReplicateI src)); 3293 format %{ "movd $dst,$src\n\t" 3294 "pshufd $dst,$dst,0x00\t! replicate2I" %} 3295 ins_encode %{ 3296 __ movdl($dst$$XMMRegister, $src$$Register); 3297 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3298 %} 3299 ins_pipe( fpu_reg_reg ); 3300 %} 3301 3302 instruct Repl4I(vecX dst, rRegI src) %{ 3303 predicate(n->as_Vector()->length() == 4); 3304 match(Set dst (ReplicateI src)); 3305 format %{ "movd $dst,$src\n\t" 3306 "pshufd $dst,$dst,0x00\t! replicate4I" %} 3307 ins_encode %{ 3308 __ movdl($dst$$XMMRegister, $src$$Register); 3309 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3310 %} 3311 ins_pipe( pipe_slow ); 3312 %} 3313 3314 instruct Repl8I(vecY dst, rRegI src) %{ 3315 predicate(n->as_Vector()->length() == 8); 3316 match(Set dst (ReplicateI src)); 3317 format %{ "movd $dst,$src\n\t" 3318 "pshufd $dst,$dst,0x00\n\t" 3319 "vinserti128h $dst,$dst,$dst\t! replicate8I" %} 3320 ins_encode %{ 3321 __ movdl($dst$$XMMRegister, $src$$Register); 3322 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3323 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3324 %} 3325 ins_pipe( pipe_slow ); 3326 %} 3327 3328 instruct Repl16I(vecZ dst, rRegI src) %{ 3329 predicate(n->as_Vector()->length() == 16); 3330 match(Set dst (ReplicateI src)); 3331 format %{ "movd $dst,$src\n\t" 3332 "pshufd $dst,$dst,0x00\n\t" 3333 "vinserti128h $dst,$dst,$dst\t! lower replicate8I\n\t" 3334 "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate8I" %} 3335 ins_encode %{ 3336 __ movdl($dst$$XMMRegister, $src$$Register); 3337 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3338 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3339 __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3340 %} 3341 ins_pipe( pipe_slow ); 3342 %} 3343 3344 // Replicate integer (4 byte) scalar immediate to be vector by loading from const table. 3345 instruct Repl2I_imm(vecD dst, immI con) %{ 3346 predicate(n->as_Vector()->length() == 2); 3347 match(Set dst (ReplicateI con)); 3348 format %{ "movq $dst,[$constantaddress]\t! replicate2I($con)" %} 3349 ins_encode %{ 3350 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3351 %} 3352 ins_pipe( fpu_reg_reg ); 3353 %} 3354 3355 instruct Repl4I_imm(vecX dst, immI con) %{ 3356 predicate(n->as_Vector()->length() == 4); 3357 match(Set dst (ReplicateI con)); 3358 format %{ "movq $dst,[$constantaddress]\t! replicate4I($con)\n\t" 3359 "punpcklqdq $dst,$dst" %} 3360 ins_encode %{ 3361 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3362 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3363 %} 3364 ins_pipe( pipe_slow ); 3365 %} 3366 3367 instruct Repl8I_imm(vecY dst, immI con) %{ 3368 predicate(n->as_Vector()->length() == 8); 3369 match(Set dst (ReplicateI con)); 3370 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 3371 "punpcklqdq $dst,$dst\n\t" 3372 "vinserti128h $dst,$dst,$dst" %} 3373 ins_encode %{ 3374 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3375 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3376 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3377 %} 3378 ins_pipe( pipe_slow ); 3379 %} 3380 3381 instruct Repl16I_imm(vecZ dst, immI con) %{ 3382 predicate(n->as_Vector()->length() == 16); 3383 match(Set dst (ReplicateI con)); 3384 format %{ "movq $dst,[$constantaddress]\t! replicate16I($con)\n\t" 3385 "punpcklqdq $dst,$dst\n\t" 3386 "vinserti128h $dst,$dst,$dst\n\t" 3387 "vinserti64x4h $dst k0,$dst,$dst" %} 3388 ins_encode %{ 3389 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3390 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3391 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3392 __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3393 %} 3394 ins_pipe( pipe_slow ); 3395 %} 3396 3397 // Integer could be loaded into xmm register directly from memory. 3398 instruct Repl2I_mem(vecD dst, memory mem) %{ 3399 predicate(n->as_Vector()->length() == 2); 3400 match(Set dst (ReplicateI (LoadI mem))); 3401 format %{ "movd $dst,$mem\n\t" 3402 "pshufd $dst,$dst,0x00\t! replicate2I" %} 3403 ins_encode %{ 3404 __ movdl($dst$$XMMRegister, $mem$$Address); 3405 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3406 %} 3407 ins_pipe( fpu_reg_reg ); 3408 %} 3409 3410 instruct Repl4I_mem(vecX dst, memory mem) %{ 3411 predicate(n->as_Vector()->length() == 4); 3412 match(Set dst (ReplicateI (LoadI mem))); 3413 format %{ "movd $dst,$mem\n\t" 3414 "pshufd $dst,$dst,0x00\t! replicate4I" %} 3415 ins_encode %{ 3416 __ movdl($dst$$XMMRegister, $mem$$Address); 3417 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3418 %} 3419 ins_pipe( pipe_slow ); 3420 %} 3421 3422 instruct Repl8I_mem(vecY dst, memory mem) %{ 3423 predicate(n->as_Vector()->length() == 8); 3424 match(Set dst (ReplicateI (LoadI mem))); 3425 format %{ "movd $dst,$mem\n\t" 3426 "pshufd $dst,$dst,0x00\n\t" 3427 "vinserti128h $dst,$dst,$dst\t! replicate8I" %} 3428 ins_encode %{ 3429 __ movdl($dst$$XMMRegister, $mem$$Address); 3430 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3431 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3432 %} 3433 ins_pipe( pipe_slow ); 3434 %} 3435 3436 instruct Repl16I_mem(vecZ dst, memory mem) %{ 3437 predicate(n->as_Vector()->length() == 16); 3438 match(Set dst (ReplicateI (LoadI mem))); 3439 format %{ "movd $dst,$mem\n\t" 3440 "pshufd $dst,$dst,0x00\n\t" 3441 "vinserti128h $dst,$dst,$dst\t! lower replicate8I\n\t" 3442 "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate8I" %} 3443 ins_encode %{ 3444 __ movdl($dst$$XMMRegister, $mem$$Address); 3445 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3446 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3447 __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3448 %} 3449 ins_pipe( pipe_slow ); 3450 %} 3451 3452 // Replicate integer (4 byte) scalar zero to be vector 3453 instruct Repl2I_zero(vecD dst, immI0 zero) %{ 3454 predicate(n->as_Vector()->length() == 2); 3455 match(Set dst (ReplicateI zero)); 3456 format %{ "pxor $dst,$dst\t! replicate2I" %} 3457 ins_encode %{ 3458 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3459 %} 3460 ins_pipe( fpu_reg_reg ); 3461 %} 3462 3463 instruct Repl4I_zero(vecX dst, immI0 zero) %{ 3464 predicate(n->as_Vector()->length() == 4); 3465 match(Set dst (ReplicateI zero)); 3466 format %{ "pxor $dst,$dst\t! replicate4I zero)" %} 3467 ins_encode %{ 3468 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3469 %} 3470 ins_pipe( fpu_reg_reg ); 3471 %} 3472 3473 instruct Repl8I_zero(vecY dst, immI0 zero) %{ 3474 predicate(n->as_Vector()->length() == 8); 3475 match(Set dst (ReplicateI zero)); 3476 format %{ "vpxor $dst,$dst,$dst\t! replicate8I zero" %} 3477 ins_encode %{ 3478 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3479 int vector_len = 1; 3480 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3481 %} 3482 ins_pipe( fpu_reg_reg ); 3483 %} 3484 3485 instruct Repl16I_zero(vecZ dst, immI0 zero) %{ 3486 predicate(n->as_Vector()->length() == 16); 3487 match(Set dst (ReplicateI zero)); 3488 format %{ "vpxor $dst k0,$dst,$dst\t! replicate16I zero" %} 3489 ins_encode %{ 3490 // Use vxorpd since AVX does not have vpxor for 512-bit (AVX2 will have it). 3491 int vector_len = 2; 3492 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3493 %} 3494 ins_pipe( fpu_reg_reg ); 3495 %} 3496 3497 // Replicate long (8 byte) scalar to be vector 3498 #ifdef _LP64 3499 instruct Repl2L(vecX dst, rRegL src) %{ 3500 predicate(n->as_Vector()->length() == 2); 3501 match(Set dst (ReplicateL src)); 3502 format %{ "movdq $dst,$src\n\t" 3503 "punpcklqdq $dst,$dst\t! replicate2L" %} 3504 ins_encode %{ 3505 __ movdq($dst$$XMMRegister, $src$$Register); 3506 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3507 %} 3508 ins_pipe( pipe_slow ); 3509 %} 3510 3511 instruct Repl4L(vecY dst, rRegL src) %{ 3512 predicate(n->as_Vector()->length() == 4); 3513 match(Set dst (ReplicateL src)); 3514 format %{ "movdq $dst,$src\n\t" 3515 "punpcklqdq $dst,$dst\n\t" 3516 "vinserti128h $dst,$dst,$dst\t! replicate4L" %} 3517 ins_encode %{ 3518 __ movdq($dst$$XMMRegister, $src$$Register); 3519 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3520 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3521 %} 3522 ins_pipe( pipe_slow ); 3523 %} 3524 3525 instruct Repl8L(vecZ dst, rRegL src) %{ 3526 predicate(n->as_Vector()->length() == 8); 3527 match(Set dst (ReplicateL src)); 3528 format %{ "movdq $dst,$src\n\t" 3529 "punpcklqdq $dst,$dst\n\t" 3530 "vinserti128h $dst,$dst,$dst\t! lower replicate4L\n\t" 3531 "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate4L" %} 3532 ins_encode %{ 3533 __ movdq($dst$$XMMRegister, $src$$Register); 3534 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3535 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3536 __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3537 %} 3538 ins_pipe( pipe_slow ); 3539 %} 3540 #else // _LP64 3541 instruct Repl2L(vecX dst, eRegL src, regD tmp) %{ 3542 predicate(n->as_Vector()->length() == 2); 3543 match(Set dst (ReplicateL src)); 3544 effect(TEMP dst, USE src, TEMP tmp); 3545 format %{ "movdl $dst,$src.lo\n\t" 3546 "movdl $tmp,$src.hi\n\t" 3547 "punpckldq $dst,$tmp\n\t" 3548 "punpcklqdq $dst,$dst\t! replicate2L"%} 3549 ins_encode %{ 3550 __ movdl($dst$$XMMRegister, $src$$Register); 3551 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3552 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3553 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3554 %} 3555 ins_pipe( pipe_slow ); 3556 %} 3557 3558 instruct Repl4L(vecY dst, eRegL src, regD tmp) %{ 3559 predicate(n->as_Vector()->length() == 4); 3560 match(Set dst (ReplicateL src)); 3561 effect(TEMP dst, USE src, TEMP tmp); 3562 format %{ "movdl $dst,$src.lo\n\t" 3563 "movdl $tmp,$src.hi\n\t" 3564 "punpckldq $dst,$tmp\n\t" 3565 "punpcklqdq $dst,$dst\n\t" 3566 "vinserti128h $dst,$dst,$dst\t! replicate4L" %} 3567 ins_encode %{ 3568 __ movdl($dst$$XMMRegister, $src$$Register); 3569 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3570 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3571 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3572 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3573 %} 3574 ins_pipe( pipe_slow ); 3575 %} 3576 3577 instruct Repl8L(vecZ dst, eRegL src, regD tmp) %{ 3578 predicate(n->as_Vector()->length() == 4); 3579 match(Set dst (ReplicateL src)); 3580 effect(TEMP dst, USE src, TEMP tmp); 3581 format %{ "movdl $dst,$src.lo\n\t" 3582 "movdl $tmp,$src.hi\n\t" 3583 "punpckldq $dst,$tmp\n\t" 3584 "vinserti128h $dst,$dst,$dst\t! lower replicate4L\n\t" 3585 "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate4L" %} 3586 ins_encode %{ 3587 __ movdl($dst$$XMMRegister, $src$$Register); 3588 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3589 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3590 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3591 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3592 __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3593 %} 3594 ins_pipe( pipe_slow ); 3595 %} 3596 #endif // _LP64 3597 3598 // Replicate long (8 byte) scalar immediate to be vector by loading from const table. 3599 instruct Repl2L_imm(vecX dst, immL con) %{ 3600 predicate(n->as_Vector()->length() == 2); 3601 match(Set dst (ReplicateL con)); 3602 format %{ "movq $dst,[$constantaddress]\n\t" 3603 "punpcklqdq $dst,$dst\t! replicate2L($con)" %} 3604 ins_encode %{ 3605 __ movq($dst$$XMMRegister, $constantaddress($con)); 3606 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3607 %} 3608 ins_pipe( pipe_slow ); 3609 %} 3610 3611 instruct Repl4L_imm(vecY dst, immL con) %{ 3612 predicate(n->as_Vector()->length() == 4); 3613 match(Set dst (ReplicateL con)); 3614 format %{ "movq $dst,[$constantaddress]\n\t" 3615 "punpcklqdq $dst,$dst\n\t" 3616 "vinserti128h $dst,$dst,$dst\t! replicate4L($con)" %} 3617 ins_encode %{ 3618 __ movq($dst$$XMMRegister, $constantaddress($con)); 3619 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3620 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3621 %} 3622 ins_pipe( pipe_slow ); 3623 %} 3624 3625 instruct Repl8L_imm(vecZ dst, immL con) %{ 3626 predicate(n->as_Vector()->length() == 8); 3627 match(Set dst (ReplicateL con)); 3628 format %{ "movq $dst,[$constantaddress]\n\t" 3629 "punpcklqdq $dst,$dst\n\t" 3630 "vinserti128h $dst,$dst,$dst\t! lower replicate4L($con)\n\t" 3631 "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate4L($con)" %} 3632 ins_encode %{ 3633 __ movq($dst$$XMMRegister, $constantaddress($con)); 3634 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3635 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3636 __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3637 %} 3638 ins_pipe( pipe_slow ); 3639 %} 3640 3641 // Long could be loaded into xmm register directly from memory. 3642 instruct Repl2L_mem(vecX dst, memory mem) %{ 3643 predicate(n->as_Vector()->length() == 2); 3644 match(Set dst (ReplicateL (LoadL mem))); 3645 format %{ "movq $dst,$mem\n\t" 3646 "punpcklqdq $dst,$dst\t! replicate2L" %} 3647 ins_encode %{ 3648 __ movq($dst$$XMMRegister, $mem$$Address); 3649 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3650 %} 3651 ins_pipe( pipe_slow ); 3652 %} 3653 3654 instruct Repl4L_mem(vecY dst, memory mem) %{ 3655 predicate(n->as_Vector()->length() == 4); 3656 match(Set dst (ReplicateL (LoadL mem))); 3657 format %{ "movq $dst,$mem\n\t" 3658 "punpcklqdq $dst,$dst\n\t" 3659 "vinserti128h $dst,$dst,$dst\t! replicate4L" %} 3660 ins_encode %{ 3661 __ movq($dst$$XMMRegister, $mem$$Address); 3662 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3663 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3664 %} 3665 ins_pipe( pipe_slow ); 3666 %} 3667 3668 instruct Repl8L_mem(vecZ dst, memory mem) %{ 3669 predicate(n->as_Vector()->length() == 8); 3670 match(Set dst (ReplicateL (LoadL mem))); 3671 format %{ "movq $dst,$mem\n\t" 3672 "punpcklqdq $dst,$dst\n\t" 3673 "vinserti128h $dst,$dst,$dst\t! lower replicate4L\n\t" 3674 "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate4L" %} 3675 ins_encode %{ 3676 __ movq($dst$$XMMRegister, $mem$$Address); 3677 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3678 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3679 __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3680 %} 3681 ins_pipe( pipe_slow ); 3682 %} 3683 3684 // Replicate long (8 byte) scalar zero to be vector 3685 instruct Repl2L_zero(vecX dst, immL0 zero) %{ 3686 predicate(n->as_Vector()->length() == 2); 3687 match(Set dst (ReplicateL zero)); 3688 format %{ "pxor $dst,$dst\t! replicate2L zero" %} 3689 ins_encode %{ 3690 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3691 %} 3692 ins_pipe( fpu_reg_reg ); 3693 %} 3694 3695 instruct Repl4L_zero(vecY dst, immL0 zero) %{ 3696 predicate(n->as_Vector()->length() == 4); 3697 match(Set dst (ReplicateL zero)); 3698 format %{ "vpxor $dst,$dst,$dst\t! replicate4L zero" %} 3699 ins_encode %{ 3700 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3701 int vector_len = 1; 3702 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3703 %} 3704 ins_pipe( fpu_reg_reg ); 3705 %} 3706 3707 instruct Repl8L_zero(vecZ dst, immL0 zero) %{ 3708 predicate(n->as_Vector()->length() == 8); 3709 match(Set dst (ReplicateL zero)); 3710 format %{ "vpxor $dst k0,$dst,$dst\t! replicate8L zero" %} 3711 ins_encode %{ 3712 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 3713 int vector_len = 2; 3714 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3715 %} 3716 ins_pipe( fpu_reg_reg ); 3717 %} 3718 3719 // Replicate float (4 byte) scalar to be vector 3720 instruct Repl2F(vecD dst, regF src) %{ 3721 predicate(n->as_Vector()->length() == 2); 3722 match(Set dst (ReplicateF src)); 3723 format %{ "pshufd $dst,$dst,0x00\t! replicate2F" %} 3724 ins_encode %{ 3725 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3726 %} 3727 ins_pipe( fpu_reg_reg ); 3728 %} 3729 3730 instruct Repl4F(vecX dst, regF src) %{ 3731 predicate(n->as_Vector()->length() == 4); 3732 match(Set dst (ReplicateF src)); 3733 format %{ "pshufd $dst,$dst,0x00\t! replicate4F" %} 3734 ins_encode %{ 3735 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3736 %} 3737 ins_pipe( pipe_slow ); 3738 %} 3739 3740 instruct Repl8F(vecY dst, regF src) %{ 3741 predicate(n->as_Vector()->length() == 8); 3742 match(Set dst (ReplicateF src)); 3743 format %{ "pshufd $dst,$src,0x00\n\t" 3744 "vinsertf128h $dst,$dst,$dst\t! replicate8F" %} 3745 ins_encode %{ 3746 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3747 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3748 %} 3749 ins_pipe( pipe_slow ); 3750 %} 3751 3752 instruct Repl16F(vecZ dst, regF src) %{ 3753 predicate(n->as_Vector()->length() == 16); 3754 match(Set dst (ReplicateF src)); 3755 format %{ "pshufd $dst,$src,0x00\n\t" 3756 "vinsertf128h $dst,$dst,$dst\t! lower replicate8F\n\t" 3757 "vinsertf64x4h $dst k0,$dst,$dst\t! lower replicate8F" %} 3758 ins_encode %{ 3759 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3760 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3761 __ vinsertf64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3762 %} 3763 ins_pipe( pipe_slow ); 3764 %} 3765 3766 // Replicate float (4 byte) scalar zero to be vector 3767 instruct Repl2F_zero(vecD dst, immF0 zero) %{ 3768 predicate(n->as_Vector()->length() == 2); 3769 match(Set dst (ReplicateF zero)); 3770 format %{ "xorps $dst,$dst\t! replicate2F zero" %} 3771 ins_encode %{ 3772 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 3773 %} 3774 ins_pipe( fpu_reg_reg ); 3775 %} 3776 3777 instruct Repl4F_zero(vecX dst, immF0 zero) %{ 3778 predicate(n->as_Vector()->length() == 4); 3779 match(Set dst (ReplicateF zero)); 3780 format %{ "xorps $dst,$dst\t! replicate4F zero" %} 3781 ins_encode %{ 3782 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 3783 %} 3784 ins_pipe( fpu_reg_reg ); 3785 %} 3786 3787 instruct Repl8F_zero(vecY dst, immF0 zero) %{ 3788 predicate(n->as_Vector()->length() == 8); 3789 match(Set dst (ReplicateF zero)); 3790 format %{ "vxorps $dst,$dst,$dst\t! replicate8F zero" %} 3791 ins_encode %{ 3792 int vector_len = 1; 3793 __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3794 %} 3795 ins_pipe( fpu_reg_reg ); 3796 %} 3797 3798 instruct Repl16F_zero(vecZ dst, immF0 zero) %{ 3799 predicate(n->as_Vector()->length() == 16); 3800 match(Set dst (ReplicateF zero)); 3801 format %{ "vxorps $dst k0,$dst,$dst\t! replicate16F zero" %} 3802 ins_encode %{ 3803 int vector_len = 2; 3804 __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3805 %} 3806 ins_pipe( fpu_reg_reg ); 3807 %} 3808 3809 // Replicate double (8 bytes) scalar to be vector 3810 instruct Repl2D(vecX dst, regD src) %{ 3811 predicate(n->as_Vector()->length() == 2); 3812 match(Set dst (ReplicateD src)); 3813 format %{ "pshufd $dst,$src,0x44\t! replicate2D" %} 3814 ins_encode %{ 3815 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 3816 %} 3817 ins_pipe( pipe_slow ); 3818 %} 3819 3820 instruct Repl4D(vecY dst, regD src) %{ 3821 predicate(n->as_Vector()->length() == 4); 3822 match(Set dst (ReplicateD src)); 3823 format %{ "pshufd $dst,$src,0x44\n\t" 3824 "vinsertf128h $dst,$dst,$dst\t! replicate4D" %} 3825 ins_encode %{ 3826 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 3827 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3828 %} 3829 ins_pipe( pipe_slow ); 3830 %} 3831 3832 instruct Repl8D(vecZ dst, regD src) %{ 3833 predicate(n->as_Vector()->length() == 8); 3834 match(Set dst (ReplicateD src)); 3835 format %{ "pshufd $dst,$src,0x44\n\t" 3836 "vinsertf128h $dst,$dst,$dst\t! lower replicate4D\n\t" 3837 "vinsertf64x4h $dst k0,$dst,$dst\t! upper replicate4D" %} 3838 ins_encode %{ 3839 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 3840 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3841 __ vinsertf64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3842 %} 3843 ins_pipe( pipe_slow ); 3844 %} 3845 3846 // Replicate double (8 byte) scalar zero to be vector 3847 instruct Repl2D_zero(vecX dst, immD0 zero) %{ 3848 predicate(n->as_Vector()->length() == 2); 3849 match(Set dst (ReplicateD zero)); 3850 format %{ "xorpd $dst,$dst\t! replicate2D zero" %} 3851 ins_encode %{ 3852 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister); 3853 %} 3854 ins_pipe( fpu_reg_reg ); 3855 %} 3856 3857 instruct Repl4D_zero(vecY dst, immD0 zero) %{ 3858 predicate(n->as_Vector()->length() == 4); 3859 match(Set dst (ReplicateD zero)); 3860 format %{ "vxorpd $dst,$dst,$dst,vect256\t! replicate4D zero" %} 3861 ins_encode %{ 3862 int vector_len = 1; 3863 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3864 %} 3865 ins_pipe( fpu_reg_reg ); 3866 %} 3867 3868 instruct Repl8D_zero(vecZ dst, immD0 zero) %{ 3869 predicate(n->as_Vector()->length() == 8); 3870 match(Set dst (ReplicateD zero)); 3871 format %{ "vxorpd $dst k0,$dst,$dst,vect512\t! replicate8D zero" %} 3872 ins_encode %{ 3873 int vector_len = 2; 3874 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3875 %} 3876 ins_pipe( fpu_reg_reg ); 3877 %} 3878 3879 // ====================REDUCTION ARITHMETIC======================================= 3880 3881 instruct rsadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 3882 predicate(UseSSE > 2 && UseAVX == 0); 3883 match(Set dst (AddReductionVI src1 src2)); 3884 effect(TEMP tmp2, TEMP tmp); 3885 format %{ "movdqu $tmp2,$src2\n\t" 3886 "phaddd $tmp2,$tmp2\n\t" 3887 "movd $tmp,$src1\n\t" 3888 "paddd $tmp,$tmp2\n\t" 3889 "movd $dst,$tmp\t! add reduction2I" %} 3890 ins_encode %{ 3891 __ movdqu($tmp2$$XMMRegister, $src2$$XMMRegister); 3892 __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister); 3893 __ movdl($tmp$$XMMRegister, $src1$$Register); 3894 __ paddd($tmp$$XMMRegister, $tmp2$$XMMRegister); 3895 __ movdl($dst$$Register, $tmp$$XMMRegister); 3896 %} 3897 ins_pipe( pipe_slow ); 3898 %} 3899 3900 instruct rvadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 3901 predicate(UseAVX > 0 && UseAVX < 3); 3902 match(Set dst (AddReductionVI src1 src2)); 3903 effect(TEMP tmp, TEMP tmp2); 3904 format %{ "vphaddd $tmp,$src2,$src2\n\t" 3905 "movd $tmp2,$src1\n\t" 3906 "vpaddd $tmp2,$tmp2,$tmp\n\t" 3907 "movd $dst,$tmp2\t! add reduction2I" %} 3908 ins_encode %{ 3909 int vector_len = 0; 3910 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 3911 __ movdl($tmp2$$XMMRegister, $src1$$Register); 3912 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 3913 __ movdl($dst$$Register, $tmp2$$XMMRegister); 3914 %} 3915 ins_pipe( pipe_slow ); 3916 %} 3917 3918 instruct rvadd2I_reduction_reg_evex(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 3919 predicate(UseAVX > 2); 3920 match(Set dst (AddReductionVI src1 src2)); 3921 effect(TEMP tmp, TEMP tmp2); 3922 format %{ "pshufd $tmp2,$src2,0x1\n\t" 3923 "vpaddd $tmp,$src2,$tmp2\n\t" 3924 "movd $tmp2,$src1\n\t" 3925 "vpaddd $tmp2,$tmp,$tmp2\n\t" 3926 "movd $dst,$tmp2\t! add reduction2I" %} 3927 ins_encode %{ 3928 int vector_len = 0; 3929 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 3930 __ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 3931 __ movdl($tmp2$$XMMRegister, $src1$$Register); 3932 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 3933 __ movdl($dst$$Register, $tmp2$$XMMRegister); 3934 %} 3935 ins_pipe( pipe_slow ); 3936 %} 3937 3938 instruct rsadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 3939 predicate(UseSSE > 2 && UseAVX == 0); 3940 match(Set dst (AddReductionVI src1 src2)); 3941 effect(TEMP tmp2, TEMP tmp); 3942 format %{ "movdqu $tmp2,$src2\n\t" 3943 "phaddd $tmp2,$tmp2\n\t" 3944 "phaddd $tmp2,$tmp2\n\t" 3945 "movd $tmp,$src1\n\t" 3946 "paddd $tmp,$tmp2\n\t" 3947 "movd $dst,$tmp\t! add reduction4I" %} 3948 ins_encode %{ 3949 __ movdqu($tmp2$$XMMRegister, $src2$$XMMRegister); 3950 __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister); 3951 __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister); 3952 __ movdl($tmp$$XMMRegister, $src1$$Register); 3953 __ paddd($tmp$$XMMRegister, $tmp2$$XMMRegister); 3954 __ movdl($dst$$Register, $tmp$$XMMRegister); 3955 %} 3956 ins_pipe( pipe_slow ); 3957 %} 3958 3959 instruct rvadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 3960 predicate(UseAVX > 0 && UseAVX < 3); 3961 match(Set dst (AddReductionVI src1 src2)); 3962 effect(TEMP tmp, TEMP tmp2); 3963 format %{ "vphaddd $tmp,$src2,$src2\n\t" 3964 "vphaddd $tmp,$tmp,$tmp2\n\t" 3965 "movd $tmp2,$src1\n\t" 3966 "vpaddd $tmp2,$tmp2,$tmp\n\t" 3967 "movd $dst,$tmp2\t! add reduction4I" %} 3968 ins_encode %{ 3969 int vector_len = 0; 3970 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 3971 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 3972 __ movdl($tmp2$$XMMRegister, $src1$$Register); 3973 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 3974 __ movdl($dst$$Register, $tmp2$$XMMRegister); 3975 %} 3976 ins_pipe( pipe_slow ); 3977 %} 3978 3979 instruct rvadd4I_reduction_reg_evex(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 3980 predicate(UseAVX > 2); 3981 match(Set dst (AddReductionVI src1 src2)); 3982 effect(TEMP tmp, TEMP tmp2); 3983 format %{ "pshufd $tmp2,$src2,0xE\n\t" 3984 "vpaddd $tmp,$src2,$tmp2\n\t" 3985 "pshufd $tmp2,$tmp,0x1\n\t" 3986 "vpaddd $tmp,$tmp,$tmp2\n\t" 3987 "movd $tmp2,$src1\n\t" 3988 "vpaddd $tmp2,$tmp,$tmp2\n\t" 3989 "movd $dst,$tmp2\t! add reduction4I" %} 3990 ins_encode %{ 3991 int vector_len = 0; 3992 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 3993 __ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 3994 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 3995 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 3996 __ movdl($tmp2$$XMMRegister, $src1$$Register); 3997 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 3998 __ movdl($dst$$Register, $tmp2$$XMMRegister); 3999 %} 4000 ins_pipe( pipe_slow ); 4001 %} 4002 4003 instruct rvadd8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 4004 predicate(UseAVX > 0 && UseAVX < 3); 4005 match(Set dst (AddReductionVI src1 src2)); 4006 effect(TEMP tmp, TEMP tmp2); 4007 format %{ "vphaddd $tmp,$src2,$src2\n\t" 4008 "vphaddd $tmp,$tmp,$tmp2\n\t" 4009 "vextracti128 $tmp2,$tmp\n\t" 4010 "vpaddd $tmp,$tmp,$tmp2\n\t" 4011 "movd $tmp2,$src1\n\t" 4012 "vpaddd $tmp2,$tmp2,$tmp\n\t" 4013 "movd $dst,$tmp2\t! add reduction8I" %} 4014 ins_encode %{ 4015 int vector_len = 1; 4016 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 4017 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4018 __ vextracti128h($tmp2$$XMMRegister, $tmp$$XMMRegister); 4019 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4020 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4021 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4022 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4023 %} 4024 ins_pipe( pipe_slow ); 4025 %} 4026 4027 instruct rvadd8I_reduction_reg_evex(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 4028 predicate(UseAVX > 2); 4029 match(Set dst (AddReductionVI src1 src2)); 4030 effect(TEMP tmp, TEMP tmp2); 4031 format %{ "vextracti128 $tmp,$src2\n\t" 4032 "vpaddd $tmp,$tmp,$src2\n\t" 4033 "pshufd $tmp2,$tmp,0xE\n\t" 4034 "vpaddd $tmp,$tmp,$tmp2\n\t" 4035 "pshufd $tmp2,$tmp,0x1\n\t" 4036 "vpaddd $tmp,$tmp,$tmp2\n\t" 4037 "movd $tmp2,$src1\n\t" 4038 "vpaddd $tmp2,$tmp,$tmp2\n\t" 4039 "movd $dst,$tmp2\t! add reduction8I" %} 4040 ins_encode %{ 4041 int vector_len = 0; 4042 __ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister); 4043 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 4044 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 4045 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4046 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 4047 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4048 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4049 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4050 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4051 %} 4052 ins_pipe( pipe_slow ); 4053 %} 4054 4055 instruct rvadd16I_reduction_reg_evex(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ 4056 predicate(UseAVX > 2); 4057 match(Set dst (AddReductionVI src1 src2)); 4058 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 4059 format %{ "vextracti64x4 $tmp3,$src2\n\t" 4060 "vpaddd $tmp3,$tmp3,$src2\n\t" 4061 "vextracti128 $tmp,$tmp3\n\t" 4062 "vpaddd $tmp,$tmp,$tmp3\n\t" 4063 "pshufd $tmp2,$tmp,0xE\n\t" 4064 "vpaddd $tmp,$tmp,$tmp2\n\t" 4065 "pshufd $tmp2,$tmp,0x1\n\t" 4066 "vpaddd $tmp,$tmp,$tmp2\n\t" 4067 "movd $tmp2,$src1\n\t" 4068 "vpaddd $tmp2,$tmp,$tmp2\n\t" 4069 "movd $dst,$tmp2\t! mul reduction16I" %} 4070 ins_encode %{ 4071 __ vextracti64x4h($tmp3$$XMMRegister, $src2$$XMMRegister); 4072 __ vpaddd($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); 4073 __ vextracti128h($tmp$$XMMRegister, $tmp3$$XMMRegister); 4074 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); 4075 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 4076 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4077 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 4078 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4079 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4080 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4081 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4082 %} 4083 ins_pipe( pipe_slow ); 4084 %} 4085 4086 #ifdef _LP64 4087 instruct rvadd2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, regF tmp, regF tmp2) %{ 4088 predicate(UseAVX > 2); 4089 match(Set dst (AddReductionVL src1 src2)); 4090 effect(TEMP tmp, TEMP tmp2); 4091 format %{ "pshufd $tmp2,$src2,0xE\n\t" 4092 "vpaddq $tmp,$src2,$tmp2\n\t" 4093 "movdq $tmp2,$src1\n\t" 4094 "vpaddq $tmp2,$tmp,$tmp2\n\t" 4095 "movdq $dst,$tmp2\t! add reduction2L" %} 4096 ins_encode %{ 4097 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 4098 __ vpaddq($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, 0); 4099 __ movdq($tmp2$$XMMRegister, $src1$$Register); 4100 __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4101 __ movdq($dst$$Register, $tmp2$$XMMRegister); 4102 %} 4103 ins_pipe( pipe_slow ); 4104 %} 4105 4106 instruct rvadd4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{ 4107 predicate(UseAVX > 2); 4108 match(Set dst (AddReductionVL src1 src2)); 4109 effect(TEMP tmp, TEMP tmp2); 4110 format %{ "vextracti64x2 $tmp,$src2, 0x1\n\t" 4111 "vpaddq $tmp2,$tmp,$src2\n\t" 4112 "pshufd $tmp,$tmp2,0xE\n\t" 4113 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4114 "movdq $tmp,$src1\n\t" 4115 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4116 "movdq $dst,$tmp2\t! add reduction4L" %} 4117 ins_encode %{ 4118 __ vextracti64x2h($tmp$$XMMRegister, $src2$$XMMRegister, 0x1); 4119 __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); 4120 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 4121 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4122 __ movdq($tmp$$XMMRegister, $src1$$Register); 4123 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4124 __ movdq($dst$$Register, $tmp2$$XMMRegister); 4125 %} 4126 ins_pipe( pipe_slow ); 4127 %} 4128 4129 instruct rvadd8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{ 4130 predicate(UseAVX > 2); 4131 match(Set dst (AddReductionVL src1 src2)); 4132 effect(TEMP tmp, TEMP tmp2); 4133 format %{ "vextracti64x4 $tmp2,$src2\n\t" 4134 "vpaddq $tmp2,$tmp2,$src2\n\t" 4135 "vextracti128 $tmp,$tmp2\n\t" 4136 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4137 "pshufd $tmp,$tmp2,0xE\n\t" 4138 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4139 "movdq $tmp,$src1\n\t" 4140 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4141 "movdq $dst,$tmp2\t! add reduction8L" %} 4142 ins_encode %{ 4143 __ vextracti64x4h($tmp2$$XMMRegister, $src2$$XMMRegister); 4144 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 4145 __ vextracti128h($tmp$$XMMRegister, $tmp2$$XMMRegister); 4146 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4147 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 4148 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4149 __ movdq($tmp$$XMMRegister, $src1$$Register); 4150 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4151 __ movdq($dst$$Register, $tmp2$$XMMRegister); 4152 %} 4153 ins_pipe( pipe_slow ); 4154 %} 4155 #endif 4156 4157 instruct rsadd2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{ 4158 predicate(UseSSE >= 1 && UseAVX == 0); 4159 match(Set dst (AddReductionVF src1 src2)); 4160 effect(TEMP tmp, TEMP tmp2); 4161 format %{ "movdqu $tmp,$src1\n\t" 4162 "addss $tmp,$src2\n\t" 4163 "pshufd $tmp2,$src2,0x01\n\t" 4164 "addss $tmp,$tmp2\n\t" 4165 "movdqu $dst,$tmp\t! add reduction2F" %} 4166 ins_encode %{ 4167 __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); 4168 __ addss($tmp$$XMMRegister, $src2$$XMMRegister); 4169 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01); 4170 __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister); 4171 __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister); 4172 %} 4173 ins_pipe( pipe_slow ); 4174 %} 4175 4176 instruct rvadd2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{ 4177 predicate(UseAVX > 0); 4178 match(Set dst (AddReductionVF src1 src2)); 4179 effect(TEMP tmp2, TEMP tmp); 4180 format %{ "vaddss $tmp2,$src1,$src2\n\t" 4181 "pshufd $tmp,$src2,0x01\n\t" 4182 "vaddss $dst,$tmp2,$tmp\t! add reduction2F" %} 4183 ins_encode %{ 4184 __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 4185 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 4186 __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4187 %} 4188 ins_pipe( pipe_slow ); 4189 %} 4190 4191 instruct rsadd4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{ 4192 predicate(UseSSE >= 1 && UseAVX == 0); 4193 match(Set dst (AddReductionVF src1 src2)); 4194 effect(TEMP tmp, TEMP tmp2); 4195 format %{ "movdqu $tmp,$src1\n\t" 4196 "addss $tmp,$src2\n\t" 4197 "pshufd $tmp2,$src2,0x01\n\t" 4198 "addss $tmp,$tmp2\n\t" 4199 "pshufd $tmp2,$src2,0x02\n\t" 4200 "addss $tmp,$tmp2\n\t" 4201 "pshufd $tmp2,$src2,0x03\n\t" 4202 "addss $tmp,$tmp2\n\t" 4203 "movdqu $dst,$tmp\t! add reduction4F" %} 4204 ins_encode %{ 4205 __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); 4206 __ addss($tmp$$XMMRegister, $src2$$XMMRegister); 4207 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01); 4208 __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister); 4209 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x02); 4210 __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister); 4211 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x03); 4212 __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister); 4213 __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister); 4214 %} 4215 ins_pipe( pipe_slow ); 4216 %} 4217 4218 instruct rvadd4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{ 4219 predicate(UseAVX > 0); 4220 match(Set dst (AddReductionVF src1 src2)); 4221 effect(TEMP tmp, TEMP tmp2); 4222 format %{ "vaddss $tmp2,$src1,$src2\n\t" 4223 "pshufd $tmp,$src2,0x01\n\t" 4224 "vaddss $tmp2,$tmp2,$tmp\n\t" 4225 "pshufd $tmp,$src2,0x02\n\t" 4226 "vaddss $tmp2,$tmp2,$tmp\n\t" 4227 "pshufd $tmp,$src2,0x03\n\t" 4228 "vaddss $dst,$tmp2,$tmp\t! add reduction4F" %} 4229 ins_encode %{ 4230 __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 4231 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 4232 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4233 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 4234 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4235 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 4236 __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4237 %} 4238 ins_pipe( pipe_slow ); 4239 %} 4240 4241 instruct radd8F_reduction_reg(regF dst, regF src1, vecY src2, regF tmp, regF tmp2, regF tmp3) %{ 4242 predicate(UseAVX > 0); 4243 match(Set dst (AddReductionVF src1 src2)); 4244 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 4245 format %{ "vaddss $tmp2,$src1,$src2\n\t" 4246 "pshufd $tmp,$src2,0x01\n\t" 4247 "vaddss $tmp2,$tmp2,$tmp\n\t" 4248 "pshufd $tmp,$src2,0x02\n\t" 4249 "vaddss $tmp2,$tmp2,$tmp\n\t" 4250 "pshufd $tmp,$src2,0x03\n\t" 4251 "vaddss $tmp2,$tmp2,$tmp\n\t" 4252 "vextractf128 $tmp3,$src2\n\t" 4253 "vaddss $tmp2,$tmp2,$tmp3\n\t" 4254 "pshufd $tmp,$tmp3,0x01\n\t" 4255 "vaddss $tmp2,$tmp2,$tmp\n\t" 4256 "pshufd $tmp,$tmp3,0x02\n\t" 4257 "vaddss $tmp2,$tmp2,$tmp\n\t" 4258 "pshufd $tmp,$tmp3,0x03\n\t" 4259 "vaddss $dst,$tmp2,$tmp\t! add reduction8F" %} 4260 ins_encode %{ 4261 __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 4262 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 4263 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4264 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 4265 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4266 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 4267 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4268 __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister); 4269 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 4270 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); 4271 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4272 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); 4273 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4274 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); 4275 __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4276 %} 4277 ins_pipe( pipe_slow ); 4278 %} 4279 4280 instruct radd16F_reduction_reg(regF dst, regF src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ 4281 predicate(UseAVX > 2); 4282 match(Set dst (AddReductionVF src1 src2)); 4283 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 4284 format %{ "vaddss $tmp2,$src1,$src2\n\t" 4285 "pshufd $tmp,$src2,0x01\n\t" 4286 "vaddss $tmp2,$tmp2,$tmp\n\t" 4287 "pshufd $tmp,$src2,0x02\n\t" 4288 "vaddss $tmp2,$tmp2,$tmp\n\t" 4289 "pshufd $tmp,$src2,0x03\n\t" 4290 "vaddss $tmp2,$tmp2,$tmp\n\t" 4291 "vextractf64x2 $tmp3,$src2, 0x1\n\t" 4292 "vaddss $tmp2,$tmp2,$tmp3\n\t" 4293 "pshufd $tmp,$tmp3,0x01\n\t" 4294 "vaddss $tmp2,$tmp2,$tmp\n\t" 4295 "pshufd $tmp,$tmp3,0x02\n\t" 4296 "vaddss $tmp2,$tmp2,$tmp\n\t" 4297 "pshufd $tmp,$tmp3,0x03\n\t" 4298 "vaddss $tmp2,$tmp2,$tmp\n\t" 4299 "vextractf64x2 $tmp3,$src2, 0x2\n\t" 4300 "vaddss $tmp2,$tmp2,$tmp3\n\t" 4301 "pshufd $tmp,$tmp3,0x01\n\t" 4302 "vaddss $tmp2,$tmp2,$tmp\n\t" 4303 "pshufd $tmp,$tmp3,0x02\n\t" 4304 "vaddss $tmp2,$tmp2,$tmp\n\t" 4305 "pshufd $tmp,$tmp3,0x03\n\t" 4306 "vaddss $tmp2,$tmp2,$tmp\n\t" 4307 "vextractf64x2 $tmp3,$src2, 0x3\n\t" 4308 "vaddss $tmp2,$tmp2,$tmp3\n\t" 4309 "pshufd $tmp,$tmp3,0x01\n\t" 4310 "vaddss $tmp2,$tmp2,$tmp\n\t" 4311 "pshufd $tmp,$tmp3,0x02\n\t" 4312 "vaddss $tmp2,$tmp2,$tmp\n\t" 4313 "pshufd $tmp,$tmp3,0x03\n\t" 4314 "vaddss $dst,$tmp2,$tmp\t! add reduction16F" %} 4315 ins_encode %{ 4316 __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 4317 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 4318 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4319 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 4320 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4321 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 4322 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4323 __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1); 4324 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 4325 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); 4326 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4327 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); 4328 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4329 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); 4330 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4331 __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x2); 4332 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 4333 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); 4334 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4335 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); 4336 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4337 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); 4338 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4339 __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x3); 4340 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 4341 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); 4342 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4343 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); 4344 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4345 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); 4346 __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4347 %} 4348 ins_pipe( pipe_slow ); 4349 %} 4350 4351 instruct rsadd2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp) %{ 4352 predicate(UseSSE >= 1 && UseAVX == 0); 4353 match(Set dst (AddReductionVD src1 src2)); 4354 effect(TEMP tmp, TEMP dst); 4355 format %{ "movdqu $tmp,$src1\n\t" 4356 "addsd $tmp,$src2\n\t" 4357 "pshufd $dst,$src2,0xE\n\t" 4358 "addsd $dst,$tmp\t! add reduction2D" %} 4359 ins_encode %{ 4360 __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); 4361 __ addsd($tmp$$XMMRegister, $src2$$XMMRegister); 4362 __ pshufd($dst$$XMMRegister, $src2$$XMMRegister, 0xE); 4363 __ addsd($dst$$XMMRegister, $tmp$$XMMRegister); 4364 %} 4365 ins_pipe( pipe_slow ); 4366 %} 4367 4368 instruct rvadd2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp, regD tmp2) %{ 4369 predicate(UseAVX > 0); 4370 match(Set dst (AddReductionVD src1 src2)); 4371 effect(TEMP tmp, TEMP tmp2); 4372 format %{ "vaddsd $tmp2,$src1,$src2\n\t" 4373 "pshufd $tmp,$src2,0xE\n\t" 4374 "vaddsd $dst,$tmp2,$tmp\t! add reduction2D" %} 4375 ins_encode %{ 4376 __ vaddsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 4377 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 4378 __ vaddsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4379 %} 4380 ins_pipe( pipe_slow ); 4381 %} 4382 4383 instruct rvadd4D_reduction_reg(regD dst, regD src1, vecY src2, regD tmp, regD tmp2, regD tmp3) %{ 4384 predicate(UseAVX > 0); 4385 match(Set dst (AddReductionVD src1 src2)); 4386 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 4387 format %{ "vaddsd $tmp2,$src1,$src2\n\t" 4388 "pshufd $tmp,$src2,0xE\n\t" 4389 "vaddsd $tmp2,$tmp2,$tmp\n\t" 4390 "vextractf128 $tmp3,$src2\n\t" 4391 "vaddsd $tmp2,$tmp2,$tmp3\n\t" 4392 "pshufd $tmp,$tmp3,0xE\n\t" 4393 "vaddsd $dst,$tmp2,$tmp\t! add reduction4D" %} 4394 ins_encode %{ 4395 __ vaddsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 4396 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 4397 __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4398 __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister); 4399 __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 4400 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); 4401 __ vaddsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4402 %} 4403 ins_pipe( pipe_slow ); 4404 %} 4405 4406 instruct rvadd8D_reduction_reg(regD dst, regD src1, vecZ src2, regD tmp, regD tmp2, regD tmp3) %{ 4407 predicate(UseAVX > 2); 4408 match(Set dst (AddReductionVD src1 src2)); 4409 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 4410 format %{ "vaddsd $tmp2,$src1,$src2\n\t" 4411 "pshufd $tmp,$src2,0xE\n\t" 4412 "vaddsd $tmp2,$tmp2,$tmp\n\t" 4413 "vextractf64x2 $tmp3,$src2, 0x1\n\t" 4414 "vaddsd $tmp2,$tmp2,$tmp3\n\t" 4415 "pshufd $tmp,$tmp3,0xE\n\t" 4416 "vaddsd $tmp2,$tmp2,$tmp\n\t" 4417 "vextractf64x2 $tmp3,$src2, 0x2\n\t" 4418 "vaddsd $tmp2,$tmp2,$tmp3\n\t" 4419 "pshufd $tmp,$tmp3,0xE\n\t" 4420 "vaddsd $tmp2,$tmp2,$tmp\n\t" 4421 "vextractf64x2 $tmp3,$src2, 0x3\n\t" 4422 "vaddsd $tmp2,$tmp2,$tmp3\n\t" 4423 "pshufd $tmp,$tmp3,0xE\n\t" 4424 "vaddsd $dst,$tmp2,$tmp\t! add reduction8D" %} 4425 ins_encode %{ 4426 __ vaddsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 4427 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 4428 __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4429 __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1); 4430 __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 4431 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); 4432 __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4433 __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x2); 4434 __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 4435 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); 4436 __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4437 __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x3); 4438 __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 4439 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); 4440 __ vaddsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4441 %} 4442 ins_pipe( pipe_slow ); 4443 %} 4444 4445 instruct rsmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 4446 predicate(UseSSE > 3 && UseAVX == 0); 4447 match(Set dst (MulReductionVI src1 src2)); 4448 effect(TEMP tmp, TEMP tmp2); 4449 format %{ "pshufd $tmp2,$src2,0x1\n\t" 4450 "pmulld $tmp2,$src2\n\t" 4451 "movd $tmp,$src1\n\t" 4452 "pmulld $tmp2,$tmp\n\t" 4453 "movd $dst,$tmp2\t! mul reduction2I" %} 4454 ins_encode %{ 4455 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 4456 __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister); 4457 __ movdl($tmp$$XMMRegister, $src1$$Register); 4458 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 4459 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4460 %} 4461 ins_pipe( pipe_slow ); 4462 %} 4463 4464 instruct rvmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 4465 predicate(UseAVX > 0); 4466 match(Set dst (MulReductionVI src1 src2)); 4467 effect(TEMP tmp, TEMP tmp2); 4468 format %{ "pshufd $tmp2,$src2,0x1\n\t" 4469 "vpmulld $tmp,$src2,$tmp2\n\t" 4470 "movd $tmp2,$src1\n\t" 4471 "vpmulld $tmp2,$tmp,$tmp2\n\t" 4472 "movd $dst,$tmp2\t! mul reduction2I" %} 4473 ins_encode %{ 4474 int vector_len = 0; 4475 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 4476 __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4477 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4478 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4479 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4480 %} 4481 ins_pipe( pipe_slow ); 4482 %} 4483 4484 instruct rsmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 4485 predicate(UseSSE > 3 && UseAVX == 0); 4486 match(Set dst (MulReductionVI src1 src2)); 4487 effect(TEMP tmp, TEMP tmp2); 4488 format %{ "pshufd $tmp2,$src2,0xE\n\t" 4489 "pmulld $tmp2,$src2\n\t" 4490 "pshufd $tmp,$tmp2,0x1\n\t" 4491 "pmulld $tmp2,$tmp\n\t" 4492 "movd $tmp,$src1\n\t" 4493 "pmulld $tmp2,$tmp\n\t" 4494 "movd $dst,$tmp2\t! mul reduction4I" %} 4495 ins_encode %{ 4496 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 4497 __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister); 4498 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x1); 4499 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 4500 __ movdl($tmp$$XMMRegister, $src1$$Register); 4501 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 4502 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4503 %} 4504 ins_pipe( pipe_slow ); 4505 %} 4506 4507 instruct rvmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 4508 predicate(UseAVX > 0); 4509 match(Set dst (MulReductionVI src1 src2)); 4510 effect(TEMP tmp, TEMP tmp2); 4511 format %{ "pshufd $tmp2,$src2,0xE\n\t" 4512 "vpmulld $tmp,$src2,$tmp2\n\t" 4513 "pshufd $tmp2,$tmp,0x1\n\t" 4514 "vpmulld $tmp,$tmp,$tmp2\n\t" 4515 "movd $tmp2,$src1\n\t" 4516 "vpmulld $tmp2,$tmp,$tmp2\n\t" 4517 "movd $dst,$tmp2\t! mul reduction4I" %} 4518 ins_encode %{ 4519 int vector_len = 0; 4520 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 4521 __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4522 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 4523 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4524 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4525 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4526 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4527 %} 4528 ins_pipe( pipe_slow ); 4529 %} 4530 4531 instruct rvmul8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 4532 predicate(UseAVX > 0); 4533 match(Set dst (MulReductionVI src1 src2)); 4534 effect(TEMP tmp, TEMP tmp2); 4535 format %{ "vextracti128 $tmp,$src2\n\t" 4536 "vpmulld $tmp,$tmp,$src2\n\t" 4537 "pshufd $tmp2,$tmp,0xE\n\t" 4538 "vpmulld $tmp,$tmp,$tmp2\n\t" 4539 "pshufd $tmp2,$tmp,0x1\n\t" 4540 "vpmulld $tmp,$tmp,$tmp2\n\t" 4541 "movd $tmp2,$src1\n\t" 4542 "vpmulld $tmp2,$tmp,$tmp2\n\t" 4543 "movd $dst,$tmp2\t! mul reduction8I" %} 4544 ins_encode %{ 4545 int vector_len = 0; 4546 __ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister); 4547 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 4548 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 4549 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4550 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 4551 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4552 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4553 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4554 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4555 %} 4556 ins_pipe( pipe_slow ); 4557 %} 4558 4559 instruct rvmul16I_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ 4560 predicate(UseAVX > 2); 4561 match(Set dst (MulReductionVI src1 src2)); 4562 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 4563 format %{ "vextracti64x4 $tmp3,$src2\n\t" 4564 "vpmulld $tmp3,$tmp3,$src2\n\t" 4565 "vextracti128 $tmp,$tmp3\n\t" 4566 "vpmulld $tmp,$tmp,$src2\n\t" 4567 "pshufd $tmp2,$tmp,0xE\n\t" 4568 "vpmulld $tmp,$tmp,$tmp2\n\t" 4569 "pshufd $tmp2,$tmp,0x1\n\t" 4570 "vpmulld $tmp,$tmp,$tmp2\n\t" 4571 "movd $tmp2,$src1\n\t" 4572 "vpmulld $tmp2,$tmp,$tmp2\n\t" 4573 "movd $dst,$tmp2\t! mul reduction16I" %} 4574 ins_encode %{ 4575 __ vextracti64x4h($tmp3$$XMMRegister, $src2$$XMMRegister); 4576 __ vpmulld($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); 4577 __ vextracti128h($tmp$$XMMRegister, $tmp3$$XMMRegister); 4578 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); 4579 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 4580 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4581 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 4582 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4583 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4584 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4585 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4586 %} 4587 ins_pipe( pipe_slow ); 4588 %} 4589 4590 #ifdef _LP64 4591 instruct rvmul2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, regF tmp, regF tmp2) %{ 4592 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 4593 match(Set dst (MulReductionVL src1 src2)); 4594 effect(TEMP tmp, TEMP tmp2); 4595 format %{ "pshufd $tmp2,$src2,0xE\n\t" 4596 "vpmullq $tmp,$src2,$tmp2\n\t" 4597 "movdq $tmp2,$src1\n\t" 4598 "vpmullq $tmp2,$tmp,$tmp2\n\t" 4599 "movdq $dst,$tmp2\t! mul reduction2L" %} 4600 ins_encode %{ 4601 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 4602 __ vpmullq($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, 0); 4603 __ movdq($tmp2$$XMMRegister, $src1$$Register); 4604 __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4605 __ movdq($dst$$Register, $tmp2$$XMMRegister); 4606 %} 4607 ins_pipe( pipe_slow ); 4608 %} 4609 4610 instruct rvmul4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{ 4611 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 4612 match(Set dst (MulReductionVL src1 src2)); 4613 effect(TEMP tmp, TEMP tmp2); 4614 format %{ "vextracti64x2 $tmp,$src2, 0x1\n\t" 4615 "vpmullq $tmp2,$tmp,$src2\n\t" 4616 "pshufd $tmp,$tmp2,0xE\n\t" 4617 "vpmullq $tmp2,$tmp2,$tmp\n\t" 4618 "movdq $tmp,$src1\n\t" 4619 "vpmullq $tmp2,$tmp2,$tmp\n\t" 4620 "movdq $dst,$tmp2\t! mul reduction4L" %} 4621 ins_encode %{ 4622 __ vextracti64x2h($tmp$$XMMRegister, $src2$$XMMRegister, 0x1); 4623 __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); 4624 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 4625 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4626 __ movdq($tmp$$XMMRegister, $src1$$Register); 4627 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4628 __ movdq($dst$$Register, $tmp2$$XMMRegister); 4629 %} 4630 ins_pipe( pipe_slow ); 4631 %} 4632 4633 instruct rvmul8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{ 4634 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 4635 match(Set dst (MulReductionVL src1 src2)); 4636 effect(TEMP tmp, TEMP tmp2); 4637 format %{ "vextracti64x4 $tmp2,$src2\n\t" 4638 "vpmullq $tmp2,$tmp2,$src2\n\t" 4639 "vextracti128 $tmp,$tmp2\n\t" 4640 "vpmullq $tmp2,$tmp2,$tmp\n\t" 4641 "pshufd $tmp,$tmp2,0xE\n\t" 4642 "vpmullq $tmp2,$tmp2,$tmp\n\t" 4643 "movdq $tmp,$src1\n\t" 4644 "vpmullq $tmp2,$tmp2,$tmp\n\t" 4645 "movdq $dst,$tmp2\t! mul reduction8L" %} 4646 ins_encode %{ 4647 __ vextracti64x4h($tmp2$$XMMRegister, $src2$$XMMRegister); 4648 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 4649 __ vextracti128h($tmp$$XMMRegister, $tmp2$$XMMRegister); 4650 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4651 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 4652 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4653 __ movdq($tmp$$XMMRegister, $src1$$Register); 4654 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4655 __ movdq($dst$$Register, $tmp2$$XMMRegister); 4656 %} 4657 ins_pipe( pipe_slow ); 4658 %} 4659 #endif 4660 4661 instruct rsmul2F_reduction(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{ 4662 predicate(UseSSE >= 1 && UseAVX == 0); 4663 match(Set dst (MulReductionVF src1 src2)); 4664 effect(TEMP tmp, TEMP tmp2); 4665 format %{ "movdqu $tmp,$src1\n\t" 4666 "mulss $tmp,$src2\n\t" 4667 "pshufd $tmp2,$src2,0x01\n\t" 4668 "mulss $tmp,$tmp2\n\t" 4669 "movdqu $dst,$tmp\t! mul reduction2F" %} 4670 ins_encode %{ 4671 __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); 4672 __ mulss($tmp$$XMMRegister, $src2$$XMMRegister); 4673 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01); 4674 __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister); 4675 __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister); 4676 %} 4677 ins_pipe( pipe_slow ); 4678 %} 4679 4680 instruct rvmul2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{ 4681 predicate(UseAVX > 0); 4682 match(Set dst (MulReductionVF src1 src2)); 4683 effect(TEMP tmp, TEMP tmp2); 4684 format %{ "vmulss $tmp2,$src1,$src2\n\t" 4685 "pshufd $tmp,$src2,0x01\n\t" 4686 "vmulss $dst,$tmp2,$tmp\t! mul reduction2F" %} 4687 ins_encode %{ 4688 __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 4689 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 4690 __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4691 %} 4692 ins_pipe( pipe_slow ); 4693 %} 4694 4695 instruct rsmul4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{ 4696 predicate(UseSSE >= 1 && UseAVX == 0); 4697 match(Set dst (MulReductionVF src1 src2)); 4698 effect(TEMP tmp, TEMP tmp2); 4699 format %{ "movdqu $tmp,$src1\n\t" 4700 "mulss $tmp,$src2\n\t" 4701 "pshufd $tmp2,$src2,0x01\n\t" 4702 "mulss $tmp,$tmp2\n\t" 4703 "pshufd $tmp2,$src2,0x02\n\t" 4704 "mulss $tmp,$tmp2\n\t" 4705 "pshufd $tmp2,$src2,0x03\n\t" 4706 "mulss $tmp,$tmp2\n\t" 4707 "movdqu $dst,$tmp\t! mul reduction4F" %} 4708 ins_encode %{ 4709 __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); 4710 __ mulss($tmp$$XMMRegister, $src2$$XMMRegister); 4711 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01); 4712 __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister); 4713 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x02); 4714 __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister); 4715 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x03); 4716 __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister); 4717 __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister); 4718 %} 4719 ins_pipe( pipe_slow ); 4720 %} 4721 4722 instruct rvmul4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{ 4723 predicate(UseAVX > 0); 4724 match(Set dst (MulReductionVF src1 src2)); 4725 effect(TEMP tmp, TEMP tmp2); 4726 format %{ "vmulss $tmp2,$src1,$src2\n\t" 4727 "pshufd $tmp,$src2,0x01\n\t" 4728 "vmulss $tmp2,$tmp2,$tmp\n\t" 4729 "pshufd $tmp,$src2,0x02\n\t" 4730 "vmulss $tmp2,$tmp2,$tmp\n\t" 4731 "pshufd $tmp,$src2,0x03\n\t" 4732 "vmulss $dst,$tmp2,$tmp\t! mul reduction4F" %} 4733 ins_encode %{ 4734 __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 4735 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 4736 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4737 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 4738 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4739 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 4740 __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4741 %} 4742 ins_pipe( pipe_slow ); 4743 %} 4744 4745 instruct rvmul8F_reduction_reg(regF dst, regF src1, vecY src2, regF tmp, regF tmp2, regF tmp3) %{ 4746 predicate(UseAVX > 0); 4747 match(Set dst (MulReductionVF src1 src2)); 4748 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 4749 format %{ "vmulss $tmp2,$src1,$src2\n\t" 4750 "pshufd $tmp,$src2,0x01\n\t" 4751 "vmulss $tmp2,$tmp2,$tmp\n\t" 4752 "pshufd $tmp,$src2,0x02\n\t" 4753 "vmulss $tmp2,$tmp2,$tmp\n\t" 4754 "pshufd $tmp,$src2,0x03\n\t" 4755 "vmulss $tmp2,$tmp2,$tmp\n\t" 4756 "vextractf128 $tmp3,$src2\n\t" 4757 "vmulss $tmp2,$tmp2,$tmp3\n\t" 4758 "pshufd $tmp,$tmp3,0x01\n\t" 4759 "vmulss $tmp2,$tmp2,$tmp\n\t" 4760 "pshufd $tmp,$tmp3,0x02\n\t" 4761 "vmulss $tmp2,$tmp2,$tmp\n\t" 4762 "pshufd $tmp,$tmp3,0x03\n\t" 4763 "vmulss $dst,$tmp2,$tmp\t! mul reduction8F" %} 4764 ins_encode %{ 4765 __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 4766 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 4767 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4768 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 4769 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4770 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 4771 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4772 __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister); 4773 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 4774 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); 4775 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4776 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); 4777 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4778 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); 4779 __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4780 %} 4781 ins_pipe( pipe_slow ); 4782 %} 4783 4784 instruct rvmul16F_reduction_reg(regF dst, regF src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ 4785 predicate(UseAVX > 2); 4786 match(Set dst (MulReductionVF src1 src2)); 4787 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 4788 format %{ "vmulss $tmp2,$src1,$src2\n\t" 4789 "pshufd $tmp,$src2,0x01\n\t" 4790 "vmulss $tmp2,$tmp2,$tmp\n\t" 4791 "pshufd $tmp,$src2,0x02\n\t" 4792 "vmulss $tmp2,$tmp2,$tmp\n\t" 4793 "pshufd $tmp,$src2,0x03\n\t" 4794 "vmulss $tmp2,$tmp2,$tmp\n\t" 4795 "vextractf32x4 $tmp3,$src2, 0x1\n\t" 4796 "vmulss $tmp2,$tmp2,$tmp3\n\t" 4797 "pshufd $tmp,$tmp3,0x01\n\t" 4798 "vmulss $tmp2,$tmp2,$tmp\n\t" 4799 "pshufd $tmp,$tmp3,0x02\n\t" 4800 "vmulss $tmp2,$tmp2,$tmp\n\t" 4801 "pshufd $tmp,$tmp3,0x03\n\t" 4802 "vmulss $tmp2,$tmp2,$tmp\n\t" 4803 "vextractf32x4 $tmp3,$src2, 0x2\n\t" 4804 "vmulss $tmp2,$tmp2,$tmp3\n\t" 4805 "pshufd $tmp,$tmp3,0x01\n\t" 4806 "vmulss $tmp2,$tmp2,$tmp\n\t" 4807 "pshufd $tmp,$tmp3,0x02\n\t" 4808 "vmulss $tmp2,$tmp2,$tmp\n\t" 4809 "pshufd $tmp,$tmp3,0x03\n\t" 4810 "vmulss $tmp2,$tmp2,$tmp\n\t" 4811 "vextractf32x4 $tmp3,$src2, 0x3\n\t" 4812 "vmulss $tmp2,$tmp2,$tmp3\n\t" 4813 "pshufd $tmp,$tmp3,0x01\n\t" 4814 "vmulss $tmp2,$tmp2,$tmp\n\t" 4815 "pshufd $tmp,$tmp3,0x02\n\t" 4816 "vmulss $tmp2,$tmp2,$tmp\n\t" 4817 "pshufd $tmp,$tmp3,0x03\n\t" 4818 "vmulss $dst,$tmp2,$tmp\t! mul reduction16F" %} 4819 ins_encode %{ 4820 __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 4821 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 4822 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4823 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 4824 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4825 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 4826 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4827 __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1); 4828 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 4829 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); 4830 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4831 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); 4832 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4833 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); 4834 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4835 __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x2); 4836 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 4837 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); 4838 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4839 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); 4840 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4841 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); 4842 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4843 __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x3); 4844 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 4845 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); 4846 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4847 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); 4848 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4849 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); 4850 __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4851 %} 4852 ins_pipe( pipe_slow ); 4853 %} 4854 4855 instruct rsmul2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp) %{ 4856 predicate(UseSSE >= 1 && UseAVX == 0); 4857 match(Set dst (MulReductionVD src1 src2)); 4858 effect(TEMP tmp, TEMP dst); 4859 format %{ "movdqu $tmp,$src1\n\t" 4860 "mulsd $tmp,$src2\n\t" 4861 "pshufd $dst,$src2,0xE\n\t" 4862 "mulsd $dst,$tmp\t! mul reduction2D" %} 4863 ins_encode %{ 4864 __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); 4865 __ mulsd($tmp$$XMMRegister, $src2$$XMMRegister); 4866 __ pshufd($dst$$XMMRegister, $src2$$XMMRegister, 0xE); 4867 __ mulsd($dst$$XMMRegister, $tmp$$XMMRegister); 4868 %} 4869 ins_pipe( pipe_slow ); 4870 %} 4871 4872 instruct rvmul2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp, regD tmp2) %{ 4873 predicate(UseAVX > 0); 4874 match(Set dst (MulReductionVD src1 src2)); 4875 effect(TEMP tmp, TEMP tmp2); 4876 format %{ "vmulsd $tmp2,$src1,$src2\n\t" 4877 "pshufd $tmp,$src2,0xE\n\t" 4878 "vmulsd $dst,$tmp2,$tmp\t! mul reduction2D" %} 4879 ins_encode %{ 4880 __ vmulsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 4881 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 4882 __ vmulsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4883 %} 4884 ins_pipe( pipe_slow ); 4885 %} 4886 4887 instruct rvmul4D_reduction_reg(regD dst, regD src1, vecY src2, regD tmp, regD tmp2, regD tmp3) %{ 4888 predicate(UseAVX > 0); 4889 match(Set dst (MulReductionVD src1 src2)); 4890 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 4891 format %{ "vmulsd $tmp2,$src1,$src2\n\t" 4892 "pshufd $tmp,$src2,0xE\n\t" 4893 "vmulsd $tmp2,$tmp2,$tmp\n\t" 4894 "vextractf128 $tmp3,$src2\n\t" 4895 "vmulsd $tmp2,$tmp2,$tmp3\n\t" 4896 "pshufd $tmp,$tmp3,0xE\n\t" 4897 "vmulsd $dst,$tmp2,$tmp\t! mul reduction4D" %} 4898 ins_encode %{ 4899 __ vmulsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 4900 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 4901 __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4902 __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister); 4903 __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 4904 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); 4905 __ vmulsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4906 %} 4907 ins_pipe( pipe_slow ); 4908 %} 4909 4910 instruct rvmul8D_reduction_reg(regD dst, regD src1, vecZ src2, regD tmp, regD tmp2, regD tmp3) %{ 4911 predicate(UseAVX > 2); 4912 match(Set dst (MulReductionVD src1 src2)); 4913 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 4914 format %{ "vmulsd $tmp2,$src1,$src2\n\t" 4915 "pshufd $tmp,$src2,0xE\n\t" 4916 "vmulsd $tmp2,$tmp2,$tmp\n\t" 4917 "vextractf64x2 $tmp3,$src2, 0x1\n\t" 4918 "vmulsd $tmp2,$tmp2,$tmp3\n\t" 4919 "pshufd $tmp,$src2,0xE\n\t" 4920 "vmulsd $tmp2,$tmp2,$tmp\n\t" 4921 "vextractf64x2 $tmp3,$src2, 0x2\n\t" 4922 "vmulsd $tmp2,$tmp2,$tmp3\n\t" 4923 "pshufd $tmp,$tmp3,0xE\n\t" 4924 "vmulsd $tmp2,$tmp2,$tmp\n\t" 4925 "vextractf64x2 $tmp3,$src2, 0x3\n\t" 4926 "vmulsd $tmp2,$tmp2,$tmp3\n\t" 4927 "pshufd $tmp,$tmp3,0xE\n\t" 4928 "vmulsd $dst,$tmp2,$tmp\t! mul reduction8D" %} 4929 ins_encode %{ 4930 __ vmulsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 4931 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 4932 __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4933 __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1); 4934 __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 4935 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); 4936 __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4937 __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x2); 4938 __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 4939 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); 4940 __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4941 __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x3); 4942 __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 4943 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); 4944 __ vmulsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4945 %} 4946 ins_pipe( pipe_slow ); 4947 %} 4948 4949 // ====================VECTOR ARITHMETIC======================================= 4950 4951 // --------------------------------- ADD -------------------------------------- 4952 4953 // Bytes vector add 4954 instruct vadd4B(vecS dst, vecS src) %{ 4955 predicate(n->as_Vector()->length() == 4); 4956 match(Set dst (AddVB dst src)); 4957 format %{ "paddb $dst,$src\t! add packed4B" %} 4958 ins_encode %{ 4959 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 4960 %} 4961 ins_pipe( pipe_slow ); 4962 %} 4963 4964 instruct vadd4B_reg(vecS dst, vecS src1, vecS src2) %{ 4965 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4966 match(Set dst (AddVB src1 src2)); 4967 format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %} 4968 ins_encode %{ 4969 int vector_len = 0; 4970 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 4971 %} 4972 ins_pipe( pipe_slow ); 4973 %} 4974 4975 instruct vadd8B(vecD dst, vecD src) %{ 4976 predicate(n->as_Vector()->length() == 8); 4977 match(Set dst (AddVB dst src)); 4978 format %{ "paddb $dst,$src\t! add packed8B" %} 4979 ins_encode %{ 4980 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 4981 %} 4982 ins_pipe( pipe_slow ); 4983 %} 4984 4985 instruct vadd8B_reg(vecD dst, vecD src1, vecD src2) %{ 4986 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 4987 match(Set dst (AddVB src1 src2)); 4988 format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %} 4989 ins_encode %{ 4990 int vector_len = 0; 4991 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 4992 %} 4993 ins_pipe( pipe_slow ); 4994 %} 4995 4996 instruct vadd16B(vecX dst, vecX src) %{ 4997 predicate(n->as_Vector()->length() == 16); 4998 match(Set dst (AddVB dst src)); 4999 format %{ "paddb $dst,$src\t! add packed16B" %} 5000 ins_encode %{ 5001 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5002 %} 5003 ins_pipe( pipe_slow ); 5004 %} 5005 5006 instruct vadd16B_reg(vecX dst, vecX src1, vecX src2) %{ 5007 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 5008 match(Set dst (AddVB src1 src2)); 5009 format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %} 5010 ins_encode %{ 5011 int vector_len = 0; 5012 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5013 %} 5014 ins_pipe( pipe_slow ); 5015 %} 5016 5017 instruct vadd16B_mem(vecX dst, vecX src, memory mem) %{ 5018 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 5019 match(Set dst (AddVB src (LoadVector mem))); 5020 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} 5021 ins_encode %{ 5022 int vector_len = 0; 5023 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5024 %} 5025 ins_pipe( pipe_slow ); 5026 %} 5027 5028 instruct vadd32B_reg(vecY dst, vecY src1, vecY src2) %{ 5029 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 5030 match(Set dst (AddVB src1 src2)); 5031 format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %} 5032 ins_encode %{ 5033 int vector_len = 1; 5034 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5035 %} 5036 ins_pipe( pipe_slow ); 5037 %} 5038 5039 instruct vadd32B_mem(vecY dst, vecY src, memory mem) %{ 5040 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 5041 match(Set dst (AddVB src (LoadVector mem))); 5042 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} 5043 ins_encode %{ 5044 int vector_len = 1; 5045 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5046 %} 5047 ins_pipe( pipe_slow ); 5048 %} 5049 5050 instruct vadd64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 5051 predicate(UseAVX > 2 && n->as_Vector()->length() == 64); 5052 match(Set dst (AddVB src1 src2)); 5053 format %{ "vpaddb $dst,$src1,$src2\t! add packed64B" %} 5054 ins_encode %{ 5055 int vector_len = 2; 5056 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5057 %} 5058 ins_pipe( pipe_slow ); 5059 %} 5060 5061 instruct vadd64B_mem(vecZ dst, vecZ src, memory mem) %{ 5062 predicate(UseAVX > 2 && n->as_Vector()->length() == 64); 5063 match(Set dst (AddVB src (LoadVector mem))); 5064 format %{ "vpaddb $dst,$src,$mem\t! add packed64B" %} 5065 ins_encode %{ 5066 int vector_len = 2; 5067 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5068 %} 5069 ins_pipe( pipe_slow ); 5070 %} 5071 5072 // Shorts/Chars vector add 5073 instruct vadd2S(vecS dst, vecS src) %{ 5074 predicate(n->as_Vector()->length() == 2); 5075 match(Set dst (AddVS dst src)); 5076 format %{ "paddw $dst,$src\t! add packed2S" %} 5077 ins_encode %{ 5078 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 5079 %} 5080 ins_pipe( pipe_slow ); 5081 %} 5082 5083 instruct vadd2S_reg(vecS dst, vecS src1, vecS src2) %{ 5084 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 5085 match(Set dst (AddVS src1 src2)); 5086 format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %} 5087 ins_encode %{ 5088 int vector_len = 0; 5089 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5090 %} 5091 ins_pipe( pipe_slow ); 5092 %} 5093 5094 instruct vadd4S(vecD dst, vecD src) %{ 5095 predicate(n->as_Vector()->length() == 4); 5096 match(Set dst (AddVS dst src)); 5097 format %{ "paddw $dst,$src\t! add packed4S" %} 5098 ins_encode %{ 5099 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 5100 %} 5101 ins_pipe( pipe_slow ); 5102 %} 5103 5104 instruct vadd4S_reg(vecD dst, vecD src1, vecD src2) %{ 5105 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5106 match(Set dst (AddVS src1 src2)); 5107 format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %} 5108 ins_encode %{ 5109 int vector_len = 0; 5110 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5111 %} 5112 ins_pipe( pipe_slow ); 5113 %} 5114 5115 instruct vadd8S(vecX dst, vecX src) %{ 5116 predicate(n->as_Vector()->length() == 8); 5117 match(Set dst (AddVS dst src)); 5118 format %{ "paddw $dst,$src\t! add packed8S" %} 5119 ins_encode %{ 5120 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 5121 %} 5122 ins_pipe( pipe_slow ); 5123 %} 5124 5125 instruct vadd8S_reg(vecX dst, vecX src1, vecX src2) %{ 5126 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 5127 match(Set dst (AddVS src1 src2)); 5128 format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %} 5129 ins_encode %{ 5130 int vector_len = 0; 5131 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5132 %} 5133 ins_pipe( pipe_slow ); 5134 %} 5135 5136 instruct vadd8S_mem(vecX dst, vecX src, memory mem) %{ 5137 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 5138 match(Set dst (AddVS src (LoadVector mem))); 5139 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} 5140 ins_encode %{ 5141 int vector_len = 0; 5142 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5143 %} 5144 ins_pipe( pipe_slow ); 5145 %} 5146 5147 instruct vadd16S_reg(vecY dst, vecY src1, vecY src2) %{ 5148 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 5149 match(Set dst (AddVS src1 src2)); 5150 format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %} 5151 ins_encode %{ 5152 int vector_len = 1; 5153 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5154 %} 5155 ins_pipe( pipe_slow ); 5156 %} 5157 5158 instruct vadd16S_mem(vecY dst, vecY src, memory mem) %{ 5159 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 5160 match(Set dst (AddVS src (LoadVector mem))); 5161 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} 5162 ins_encode %{ 5163 int vector_len = 1; 5164 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5165 %} 5166 ins_pipe( pipe_slow ); 5167 %} 5168 5169 instruct vadd32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 5170 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); 5171 match(Set dst (AddVS src1 src2)); 5172 format %{ "vpaddw $dst,$src1,$src2\t! add packed32S" %} 5173 ins_encode %{ 5174 int vector_len = 2; 5175 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5176 %} 5177 ins_pipe( pipe_slow ); 5178 %} 5179 5180 instruct vadd32S_mem(vecZ dst, vecZ src, memory mem) %{ 5181 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); 5182 match(Set dst (AddVS src (LoadVector mem))); 5183 format %{ "vpaddw $dst,$src,$mem\t! add packed32S" %} 5184 ins_encode %{ 5185 int vector_len = 2; 5186 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5187 %} 5188 ins_pipe( pipe_slow ); 5189 %} 5190 5191 // Integers vector add 5192 instruct vadd2I(vecD dst, vecD src) %{ 5193 predicate(n->as_Vector()->length() == 2); 5194 match(Set dst (AddVI dst src)); 5195 format %{ "paddd $dst,$src\t! add packed2I" %} 5196 ins_encode %{ 5197 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 5198 %} 5199 ins_pipe( pipe_slow ); 5200 %} 5201 5202 instruct vadd2I_reg(vecD dst, vecD src1, vecD src2) %{ 5203 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 5204 match(Set dst (AddVI src1 src2)); 5205 format %{ "vpaddd $dst,$src1,$src2\t! add packed2I" %} 5206 ins_encode %{ 5207 int vector_len = 0; 5208 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5209 %} 5210 ins_pipe( pipe_slow ); 5211 %} 5212 5213 instruct vadd4I(vecX dst, vecX src) %{ 5214 predicate(n->as_Vector()->length() == 4); 5215 match(Set dst (AddVI dst src)); 5216 format %{ "paddd $dst,$src\t! add packed4I" %} 5217 ins_encode %{ 5218 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 5219 %} 5220 ins_pipe( pipe_slow ); 5221 %} 5222 5223 instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{ 5224 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5225 match(Set dst (AddVI src1 src2)); 5226 format %{ "vpaddd $dst,$src1,$src2\t! add packed4I" %} 5227 ins_encode %{ 5228 int vector_len = 0; 5229 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5230 %} 5231 ins_pipe( pipe_slow ); 5232 %} 5233 5234 instruct vadd4I_mem(vecX dst, vecX src, memory mem) %{ 5235 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5236 match(Set dst (AddVI src (LoadVector mem))); 5237 format %{ "vpaddd $dst,$src,$mem\t! add packed4I" %} 5238 ins_encode %{ 5239 int vector_len = 0; 5240 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5241 %} 5242 ins_pipe( pipe_slow ); 5243 %} 5244 5245 instruct vadd8I_reg(vecY dst, vecY src1, vecY src2) %{ 5246 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 5247 match(Set dst (AddVI src1 src2)); 5248 format %{ "vpaddd $dst,$src1,$src2\t! add packed8I" %} 5249 ins_encode %{ 5250 int vector_len = 1; 5251 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5252 %} 5253 ins_pipe( pipe_slow ); 5254 %} 5255 5256 instruct vadd8I_mem(vecY dst, vecY src, memory mem) %{ 5257 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 5258 match(Set dst (AddVI src (LoadVector mem))); 5259 format %{ "vpaddd $dst,$src,$mem\t! add packed8I" %} 5260 ins_encode %{ 5261 int vector_len = 1; 5262 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5263 %} 5264 ins_pipe( pipe_slow ); 5265 %} 5266 5267 instruct vadd16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 5268 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 5269 match(Set dst (AddVI src1 src2)); 5270 format %{ "vpaddd $dst,$src1,$src2\t! add packed16I" %} 5271 ins_encode %{ 5272 int vector_len = 2; 5273 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5274 %} 5275 ins_pipe( pipe_slow ); 5276 %} 5277 5278 instruct vadd16I_mem(vecZ dst, vecZ src, memory mem) %{ 5279 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 5280 match(Set dst (AddVI src (LoadVector mem))); 5281 format %{ "vpaddd $dst,$src,$mem\t! add packed16I" %} 5282 ins_encode %{ 5283 int vector_len = 2; 5284 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5285 %} 5286 ins_pipe( pipe_slow ); 5287 %} 5288 5289 // Longs vector add 5290 instruct vadd2L(vecX dst, vecX src) %{ 5291 predicate(n->as_Vector()->length() == 2); 5292 match(Set dst (AddVL dst src)); 5293 format %{ "paddq $dst,$src\t! add packed2L" %} 5294 ins_encode %{ 5295 __ paddq($dst$$XMMRegister, $src$$XMMRegister); 5296 %} 5297 ins_pipe( pipe_slow ); 5298 %} 5299 5300 instruct vadd2L_reg(vecX dst, vecX src1, vecX src2) %{ 5301 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 5302 match(Set dst (AddVL src1 src2)); 5303 format %{ "vpaddq $dst,$src1,$src2\t! add packed2L" %} 5304 ins_encode %{ 5305 int vector_len = 0; 5306 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5307 %} 5308 ins_pipe( pipe_slow ); 5309 %} 5310 5311 instruct vadd2L_mem(vecX dst, vecX src, memory mem) %{ 5312 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 5313 match(Set dst (AddVL src (LoadVector mem))); 5314 format %{ "vpaddq $dst,$src,$mem\t! add packed2L" %} 5315 ins_encode %{ 5316 int vector_len = 0; 5317 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5318 %} 5319 ins_pipe( pipe_slow ); 5320 %} 5321 5322 instruct vadd4L_reg(vecY dst, vecY src1, vecY src2) %{ 5323 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 5324 match(Set dst (AddVL src1 src2)); 5325 format %{ "vpaddq $dst,$src1,$src2\t! add packed4L" %} 5326 ins_encode %{ 5327 int vector_len = 1; 5328 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5329 %} 5330 ins_pipe( pipe_slow ); 5331 %} 5332 5333 instruct vadd4L_mem(vecY dst, vecY src, memory mem) %{ 5334 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 5335 match(Set dst (AddVL src (LoadVector mem))); 5336 format %{ "vpaddq $dst,$src,$mem\t! add packed4L" %} 5337 ins_encode %{ 5338 int vector_len = 1; 5339 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5340 %} 5341 ins_pipe( pipe_slow ); 5342 %} 5343 5344 instruct vadd8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 5345 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 5346 match(Set dst (AddVL src1 src2)); 5347 format %{ "vpaddq $dst,$src1,$src2\t! add packed8L" %} 5348 ins_encode %{ 5349 int vector_len = 2; 5350 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5351 %} 5352 ins_pipe( pipe_slow ); 5353 %} 5354 5355 instruct vadd8L_mem(vecZ dst, vecZ src, memory mem) %{ 5356 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 5357 match(Set dst (AddVL src (LoadVector mem))); 5358 format %{ "vpaddq $dst,$src,$mem\t! add packed8L" %} 5359 ins_encode %{ 5360 int vector_len = 2; 5361 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5362 %} 5363 ins_pipe( pipe_slow ); 5364 %} 5365 5366 // Floats vector add 5367 instruct vadd2F(vecD dst, vecD src) %{ 5368 predicate(n->as_Vector()->length() == 2); 5369 match(Set dst (AddVF dst src)); 5370 format %{ "addps $dst,$src\t! add packed2F" %} 5371 ins_encode %{ 5372 __ addps($dst$$XMMRegister, $src$$XMMRegister); 5373 %} 5374 ins_pipe( pipe_slow ); 5375 %} 5376 5377 instruct vadd2F_reg(vecD dst, vecD src1, vecD src2) %{ 5378 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 5379 match(Set dst (AddVF src1 src2)); 5380 format %{ "vaddps $dst,$src1,$src2\t! add packed2F" %} 5381 ins_encode %{ 5382 int vector_len = 0; 5383 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5384 %} 5385 ins_pipe( pipe_slow ); 5386 %} 5387 5388 instruct vadd4F(vecX dst, vecX src) %{ 5389 predicate(n->as_Vector()->length() == 4); 5390 match(Set dst (AddVF dst src)); 5391 format %{ "addps $dst,$src\t! add packed4F" %} 5392 ins_encode %{ 5393 __ addps($dst$$XMMRegister, $src$$XMMRegister); 5394 %} 5395 ins_pipe( pipe_slow ); 5396 %} 5397 5398 instruct vadd4F_reg(vecX dst, vecX src1, vecX src2) %{ 5399 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5400 match(Set dst (AddVF src1 src2)); 5401 format %{ "vaddps $dst,$src1,$src2\t! add packed4F" %} 5402 ins_encode %{ 5403 int vector_len = 0; 5404 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5405 %} 5406 ins_pipe( pipe_slow ); 5407 %} 5408 5409 instruct vadd4F_mem(vecX dst, vecX src, memory mem) %{ 5410 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5411 match(Set dst (AddVF src (LoadVector mem))); 5412 format %{ "vaddps $dst,$src,$mem\t! add packed4F" %} 5413 ins_encode %{ 5414 int vector_len = 0; 5415 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5416 %} 5417 ins_pipe( pipe_slow ); 5418 %} 5419 5420 instruct vadd8F_reg(vecY dst, vecY src1, vecY src2) %{ 5421 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 5422 match(Set dst (AddVF src1 src2)); 5423 format %{ "vaddps $dst,$src1,$src2\t! add packed8F" %} 5424 ins_encode %{ 5425 int vector_len = 1; 5426 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5427 %} 5428 ins_pipe( pipe_slow ); 5429 %} 5430 5431 instruct vadd8F_mem(vecY dst, vecY src, memory mem) %{ 5432 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 5433 match(Set dst (AddVF src (LoadVector mem))); 5434 format %{ "vaddps $dst,$src,$mem\t! add packed8F" %} 5435 ins_encode %{ 5436 int vector_len = 1; 5437 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5438 %} 5439 ins_pipe( pipe_slow ); 5440 %} 5441 5442 instruct vadd16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 5443 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 5444 match(Set dst (AddVF src1 src2)); 5445 format %{ "vaddps $dst,$src1,$src2\t! add packed16F" %} 5446 ins_encode %{ 5447 int vector_len = 2; 5448 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5449 %} 5450 ins_pipe( pipe_slow ); 5451 %} 5452 5453 instruct vadd16F_mem(vecZ dst, vecZ src, memory mem) %{ 5454 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 5455 match(Set dst (AddVF src (LoadVector mem))); 5456 format %{ "vaddps $dst,$src,$mem\t! add packed16F" %} 5457 ins_encode %{ 5458 int vector_len = 2; 5459 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5460 %} 5461 ins_pipe( pipe_slow ); 5462 %} 5463 5464 // Doubles vector add 5465 instruct vadd2D(vecX dst, vecX src) %{ 5466 predicate(n->as_Vector()->length() == 2); 5467 match(Set dst (AddVD dst src)); 5468 format %{ "addpd $dst,$src\t! add packed2D" %} 5469 ins_encode %{ 5470 __ addpd($dst$$XMMRegister, $src$$XMMRegister); 5471 %} 5472 ins_pipe( pipe_slow ); 5473 %} 5474 5475 instruct vadd2D_reg(vecX dst, vecX src1, vecX src2) %{ 5476 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 5477 match(Set dst (AddVD src1 src2)); 5478 format %{ "vaddpd $dst,$src1,$src2\t! add packed2D" %} 5479 ins_encode %{ 5480 int vector_len = 0; 5481 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5482 %} 5483 ins_pipe( pipe_slow ); 5484 %} 5485 5486 instruct vadd2D_mem(vecX dst, vecX src, memory mem) %{ 5487 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 5488 match(Set dst (AddVD src (LoadVector mem))); 5489 format %{ "vaddpd $dst,$src,$mem\t! add packed2D" %} 5490 ins_encode %{ 5491 int vector_len = 0; 5492 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5493 %} 5494 ins_pipe( pipe_slow ); 5495 %} 5496 5497 instruct vadd4D_reg(vecY dst, vecY src1, vecY src2) %{ 5498 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5499 match(Set dst (AddVD src1 src2)); 5500 format %{ "vaddpd $dst,$src1,$src2\t! add packed4D" %} 5501 ins_encode %{ 5502 int vector_len = 1; 5503 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5504 %} 5505 ins_pipe( pipe_slow ); 5506 %} 5507 5508 instruct vadd4D_mem(vecY dst, vecY src, memory mem) %{ 5509 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5510 match(Set dst (AddVD src (LoadVector mem))); 5511 format %{ "vaddpd $dst,$src,$mem\t! add packed4D" %} 5512 ins_encode %{ 5513 int vector_len = 1; 5514 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5515 %} 5516 ins_pipe( pipe_slow ); 5517 %} 5518 5519 instruct vadd8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 5520 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 5521 match(Set dst (AddVD src1 src2)); 5522 format %{ "vaddpd $dst,$src1,$src2\t! add packed8D" %} 5523 ins_encode %{ 5524 int vector_len = 2; 5525 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5526 %} 5527 ins_pipe( pipe_slow ); 5528 %} 5529 5530 instruct vadd8D_mem(vecZ dst, vecZ src, memory mem) %{ 5531 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 5532 match(Set dst (AddVD src (LoadVector mem))); 5533 format %{ "vaddpd $dst,$src,$mem\t! add packed8D" %} 5534 ins_encode %{ 5535 int vector_len = 2; 5536 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5537 %} 5538 ins_pipe( pipe_slow ); 5539 %} 5540 5541 // --------------------------------- SUB -------------------------------------- 5542 5543 // Bytes vector sub 5544 instruct vsub4B(vecS dst, vecS src) %{ 5545 predicate(n->as_Vector()->length() == 4); 5546 match(Set dst (SubVB dst src)); 5547 format %{ "psubb $dst,$src\t! sub packed4B" %} 5548 ins_encode %{ 5549 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 5550 %} 5551 ins_pipe( pipe_slow ); 5552 %} 5553 5554 instruct vsub4B_reg(vecS dst, vecS src1, vecS src2) %{ 5555 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5556 match(Set dst (SubVB src1 src2)); 5557 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} 5558 ins_encode %{ 5559 int vector_len = 0; 5560 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5561 %} 5562 ins_pipe( pipe_slow ); 5563 %} 5564 5565 instruct vsub8B(vecD dst, vecD src) %{ 5566 predicate(n->as_Vector()->length() == 8); 5567 match(Set dst (SubVB dst src)); 5568 format %{ "psubb $dst,$src\t! sub packed8B" %} 5569 ins_encode %{ 5570 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 5571 %} 5572 ins_pipe( pipe_slow ); 5573 %} 5574 5575 instruct vsub8B_reg(vecD dst, vecD src1, vecD src2) %{ 5576 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 5577 match(Set dst (SubVB src1 src2)); 5578 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} 5579 ins_encode %{ 5580 int vector_len = 0; 5581 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5582 %} 5583 ins_pipe( pipe_slow ); 5584 %} 5585 5586 instruct vsub16B(vecX dst, vecX src) %{ 5587 predicate(n->as_Vector()->length() == 16); 5588 match(Set dst (SubVB dst src)); 5589 format %{ "psubb $dst,$src\t! sub packed16B" %} 5590 ins_encode %{ 5591 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 5592 %} 5593 ins_pipe( pipe_slow ); 5594 %} 5595 5596 instruct vsub16B_reg(vecX dst, vecX src1, vecX src2) %{ 5597 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 5598 match(Set dst (SubVB src1 src2)); 5599 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} 5600 ins_encode %{ 5601 int vector_len = 0; 5602 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5603 %} 5604 ins_pipe( pipe_slow ); 5605 %} 5606 5607 instruct vsub16B_mem(vecX dst, vecX src, memory mem) %{ 5608 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 5609 match(Set dst (SubVB src (LoadVector mem))); 5610 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} 5611 ins_encode %{ 5612 int vector_len = 0; 5613 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5614 %} 5615 ins_pipe( pipe_slow ); 5616 %} 5617 5618 instruct vsub32B_reg(vecY dst, vecY src1, vecY src2) %{ 5619 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 5620 match(Set dst (SubVB src1 src2)); 5621 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} 5622 ins_encode %{ 5623 int vector_len = 1; 5624 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5625 %} 5626 ins_pipe( pipe_slow ); 5627 %} 5628 5629 instruct vsub32B_mem(vecY dst, vecY src, memory mem) %{ 5630 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 5631 match(Set dst (SubVB src (LoadVector mem))); 5632 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} 5633 ins_encode %{ 5634 int vector_len = 1; 5635 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5636 %} 5637 ins_pipe( pipe_slow ); 5638 %} 5639 5640 instruct vsub64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 5641 predicate(UseAVX > 2 && n->as_Vector()->length() == 64); 5642 match(Set dst (SubVB src1 src2)); 5643 format %{ "vpsubb $dst,$src1,$src2\t! sub packed64B" %} 5644 ins_encode %{ 5645 int vector_len = 2; 5646 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5647 %} 5648 ins_pipe( pipe_slow ); 5649 %} 5650 5651 instruct vsub64B_mem(vecZ dst, vecZ src, memory mem) %{ 5652 predicate(UseAVX > 2 && n->as_Vector()->length() == 64); 5653 match(Set dst (SubVB src (LoadVector mem))); 5654 format %{ "vpsubb $dst,$src,$mem\t! sub packed64B" %} 5655 ins_encode %{ 5656 int vector_len = 2; 5657 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5658 %} 5659 ins_pipe( pipe_slow ); 5660 %} 5661 5662 // Shorts/Chars vector sub 5663 instruct vsub2S(vecS dst, vecS src) %{ 5664 predicate(n->as_Vector()->length() == 2); 5665 match(Set dst (SubVS dst src)); 5666 format %{ "psubw $dst,$src\t! sub packed2S" %} 5667 ins_encode %{ 5668 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 5669 %} 5670 ins_pipe( pipe_slow ); 5671 %} 5672 5673 instruct vsub2S_reg(vecS dst, vecS src1, vecS src2) %{ 5674 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 5675 match(Set dst (SubVS src1 src2)); 5676 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} 5677 ins_encode %{ 5678 int vector_len = 0; 5679 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5680 %} 5681 ins_pipe( pipe_slow ); 5682 %} 5683 5684 instruct vsub4S(vecD dst, vecD src) %{ 5685 predicate(n->as_Vector()->length() == 4); 5686 match(Set dst (SubVS dst src)); 5687 format %{ "psubw $dst,$src\t! sub packed4S" %} 5688 ins_encode %{ 5689 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 5690 %} 5691 ins_pipe( pipe_slow ); 5692 %} 5693 5694 instruct vsub4S_reg(vecD dst, vecD src1, vecD src2) %{ 5695 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5696 match(Set dst (SubVS src1 src2)); 5697 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} 5698 ins_encode %{ 5699 int vector_len = 0; 5700 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5701 %} 5702 ins_pipe( pipe_slow ); 5703 %} 5704 5705 instruct vsub8S(vecX dst, vecX src) %{ 5706 predicate(n->as_Vector()->length() == 8); 5707 match(Set dst (SubVS dst src)); 5708 format %{ "psubw $dst,$src\t! sub packed8S" %} 5709 ins_encode %{ 5710 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 5711 %} 5712 ins_pipe( pipe_slow ); 5713 %} 5714 5715 instruct vsub8S_reg(vecX dst, vecX src1, vecX src2) %{ 5716 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 5717 match(Set dst (SubVS src1 src2)); 5718 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} 5719 ins_encode %{ 5720 int vector_len = 0; 5721 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5722 %} 5723 ins_pipe( pipe_slow ); 5724 %} 5725 5726 instruct vsub8S_mem(vecX dst, vecX src, memory mem) %{ 5727 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 5728 match(Set dst (SubVS src (LoadVector mem))); 5729 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} 5730 ins_encode %{ 5731 int vector_len = 0; 5732 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5733 %} 5734 ins_pipe( pipe_slow ); 5735 %} 5736 5737 instruct vsub16S_reg(vecY dst, vecY src1, vecY src2) %{ 5738 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 5739 match(Set dst (SubVS src1 src2)); 5740 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} 5741 ins_encode %{ 5742 int vector_len = 1; 5743 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5744 %} 5745 ins_pipe( pipe_slow ); 5746 %} 5747 5748 instruct vsub16S_mem(vecY dst, vecY src, memory mem) %{ 5749 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 5750 match(Set dst (SubVS src (LoadVector mem))); 5751 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} 5752 ins_encode %{ 5753 int vector_len = 1; 5754 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5755 %} 5756 ins_pipe( pipe_slow ); 5757 %} 5758 5759 instruct vsub32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 5760 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); 5761 match(Set dst (SubVS src1 src2)); 5762 format %{ "vpsubw $dst,$src1,$src2\t! sub packed32S" %} 5763 ins_encode %{ 5764 int vector_len = 2; 5765 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5766 %} 5767 ins_pipe( pipe_slow ); 5768 %} 5769 5770 instruct vsub32S_mem(vecZ dst, vecZ src, memory mem) %{ 5771 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); 5772 match(Set dst (SubVS src (LoadVector mem))); 5773 format %{ "vpsubw $dst,$src,$mem\t! sub packed32S" %} 5774 ins_encode %{ 5775 int vector_len = 2; 5776 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5777 %} 5778 ins_pipe( pipe_slow ); 5779 %} 5780 5781 // Integers vector sub 5782 instruct vsub2I(vecD dst, vecD src) %{ 5783 predicate(n->as_Vector()->length() == 2); 5784 match(Set dst (SubVI dst src)); 5785 format %{ "psubd $dst,$src\t! sub packed2I" %} 5786 ins_encode %{ 5787 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 5788 %} 5789 ins_pipe( pipe_slow ); 5790 %} 5791 5792 instruct vsub2I_reg(vecD dst, vecD src1, vecD src2) %{ 5793 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 5794 match(Set dst (SubVI src1 src2)); 5795 format %{ "vpsubd $dst,$src1,$src2\t! sub packed2I" %} 5796 ins_encode %{ 5797 int vector_len = 0; 5798 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5799 %} 5800 ins_pipe( pipe_slow ); 5801 %} 5802 5803 instruct vsub4I(vecX dst, vecX src) %{ 5804 predicate(n->as_Vector()->length() == 4); 5805 match(Set dst (SubVI dst src)); 5806 format %{ "psubd $dst,$src\t! sub packed4I" %} 5807 ins_encode %{ 5808 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 5809 %} 5810 ins_pipe( pipe_slow ); 5811 %} 5812 5813 instruct vsub4I_reg(vecX dst, vecX src1, vecX src2) %{ 5814 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5815 match(Set dst (SubVI src1 src2)); 5816 format %{ "vpsubd $dst,$src1,$src2\t! sub packed4I" %} 5817 ins_encode %{ 5818 int vector_len = 0; 5819 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5820 %} 5821 ins_pipe( pipe_slow ); 5822 %} 5823 5824 instruct vsub4I_mem(vecX dst, vecX src, memory mem) %{ 5825 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5826 match(Set dst (SubVI src (LoadVector mem))); 5827 format %{ "vpsubd $dst,$src,$mem\t! sub packed4I" %} 5828 ins_encode %{ 5829 int vector_len = 0; 5830 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5831 %} 5832 ins_pipe( pipe_slow ); 5833 %} 5834 5835 instruct vsub8I_reg(vecY dst, vecY src1, vecY src2) %{ 5836 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 5837 match(Set dst (SubVI src1 src2)); 5838 format %{ "vpsubd $dst,$src1,$src2\t! sub packed8I" %} 5839 ins_encode %{ 5840 int vector_len = 1; 5841 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5842 %} 5843 ins_pipe( pipe_slow ); 5844 %} 5845 5846 instruct vsub8I_mem(vecY dst, vecY src, memory mem) %{ 5847 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 5848 match(Set dst (SubVI src (LoadVector mem))); 5849 format %{ "vpsubd $dst,$src,$mem\t! sub packed8I" %} 5850 ins_encode %{ 5851 int vector_len = 1; 5852 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5853 %} 5854 ins_pipe( pipe_slow ); 5855 %} 5856 5857 instruct vsub16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 5858 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 5859 match(Set dst (SubVI src1 src2)); 5860 format %{ "vpsubd $dst,$src1,$src2\t! sub packed16I" %} 5861 ins_encode %{ 5862 int vector_len = 2; 5863 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5864 %} 5865 ins_pipe( pipe_slow ); 5866 %} 5867 5868 instruct vsub16I_mem(vecZ dst, vecZ src, memory mem) %{ 5869 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 5870 match(Set dst (SubVI src (LoadVector mem))); 5871 format %{ "vpsubd $dst,$src,$mem\t! sub packed16I" %} 5872 ins_encode %{ 5873 int vector_len = 2; 5874 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5875 %} 5876 ins_pipe( pipe_slow ); 5877 %} 5878 5879 // Longs vector sub 5880 instruct vsub2L(vecX dst, vecX src) %{ 5881 predicate(n->as_Vector()->length() == 2); 5882 match(Set dst (SubVL dst src)); 5883 format %{ "psubq $dst,$src\t! sub packed2L" %} 5884 ins_encode %{ 5885 __ psubq($dst$$XMMRegister, $src$$XMMRegister); 5886 %} 5887 ins_pipe( pipe_slow ); 5888 %} 5889 5890 instruct vsub2L_reg(vecX dst, vecX src1, vecX src2) %{ 5891 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 5892 match(Set dst (SubVL src1 src2)); 5893 format %{ "vpsubq $dst,$src1,$src2\t! sub packed2L" %} 5894 ins_encode %{ 5895 int vector_len = 0; 5896 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5897 %} 5898 ins_pipe( pipe_slow ); 5899 %} 5900 5901 instruct vsub2L_mem(vecX dst, vecX src, memory mem) %{ 5902 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 5903 match(Set dst (SubVL src (LoadVector mem))); 5904 format %{ "vpsubq $dst,$src,$mem\t! sub packed2L" %} 5905 ins_encode %{ 5906 int vector_len = 0; 5907 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5908 %} 5909 ins_pipe( pipe_slow ); 5910 %} 5911 5912 instruct vsub4L_reg(vecY dst, vecY src1, vecY src2) %{ 5913 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 5914 match(Set dst (SubVL src1 src2)); 5915 format %{ "vpsubq $dst,$src1,$src2\t! sub packed4L" %} 5916 ins_encode %{ 5917 int vector_len = 1; 5918 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5919 %} 5920 ins_pipe( pipe_slow ); 5921 %} 5922 5923 instruct vsub4L_mem(vecY dst, vecY src, memory mem) %{ 5924 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 5925 match(Set dst (SubVL src (LoadVector mem))); 5926 format %{ "vpsubq $dst,$src,$mem\t! sub packed4L" %} 5927 ins_encode %{ 5928 int vector_len = 1; 5929 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5930 %} 5931 ins_pipe( pipe_slow ); 5932 %} 5933 5934 instruct vsub8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 5935 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 5936 match(Set dst (SubVL src1 src2)); 5937 format %{ "vpsubq $dst,$src1,$src2\t! sub packed8L" %} 5938 ins_encode %{ 5939 int vector_len = 2; 5940 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5941 %} 5942 ins_pipe( pipe_slow ); 5943 %} 5944 5945 instruct vsub8L_mem(vecZ dst, vecZ src, memory mem) %{ 5946 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 5947 match(Set dst (SubVL src (LoadVector mem))); 5948 format %{ "vpsubq $dst,$src,$mem\t! sub packed8L" %} 5949 ins_encode %{ 5950 int vector_len = 2; 5951 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5952 %} 5953 ins_pipe( pipe_slow ); 5954 %} 5955 5956 // Floats vector sub 5957 instruct vsub2F(vecD dst, vecD src) %{ 5958 predicate(n->as_Vector()->length() == 2); 5959 match(Set dst (SubVF dst src)); 5960 format %{ "subps $dst,$src\t! sub packed2F" %} 5961 ins_encode %{ 5962 __ subps($dst$$XMMRegister, $src$$XMMRegister); 5963 %} 5964 ins_pipe( pipe_slow ); 5965 %} 5966 5967 instruct vsub2F_reg(vecD dst, vecD src1, vecD src2) %{ 5968 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 5969 match(Set dst (SubVF src1 src2)); 5970 format %{ "vsubps $dst,$src1,$src2\t! sub packed2F" %} 5971 ins_encode %{ 5972 int vector_len = 0; 5973 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5974 %} 5975 ins_pipe( pipe_slow ); 5976 %} 5977 5978 instruct vsub4F(vecX dst, vecX src) %{ 5979 predicate(n->as_Vector()->length() == 4); 5980 match(Set dst (SubVF dst src)); 5981 format %{ "subps $dst,$src\t! sub packed4F" %} 5982 ins_encode %{ 5983 __ subps($dst$$XMMRegister, $src$$XMMRegister); 5984 %} 5985 ins_pipe( pipe_slow ); 5986 %} 5987 5988 instruct vsub4F_reg(vecX dst, vecX src1, vecX src2) %{ 5989 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5990 match(Set dst (SubVF src1 src2)); 5991 format %{ "vsubps $dst,$src1,$src2\t! sub packed4F" %} 5992 ins_encode %{ 5993 int vector_len = 0; 5994 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5995 %} 5996 ins_pipe( pipe_slow ); 5997 %} 5998 5999 instruct vsub4F_mem(vecX dst, vecX src, memory mem) %{ 6000 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6001 match(Set dst (SubVF src (LoadVector mem))); 6002 format %{ "vsubps $dst,$src,$mem\t! sub packed4F" %} 6003 ins_encode %{ 6004 int vector_len = 0; 6005 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6006 %} 6007 ins_pipe( pipe_slow ); 6008 %} 6009 6010 instruct vsub8F_reg(vecY dst, vecY src1, vecY src2) %{ 6011 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6012 match(Set dst (SubVF src1 src2)); 6013 format %{ "vsubps $dst,$src1,$src2\t! sub packed8F" %} 6014 ins_encode %{ 6015 int vector_len = 1; 6016 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6017 %} 6018 ins_pipe( pipe_slow ); 6019 %} 6020 6021 instruct vsub8F_mem(vecY dst, vecY src, memory mem) %{ 6022 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6023 match(Set dst (SubVF src (LoadVector mem))); 6024 format %{ "vsubps $dst,$src,$mem\t! sub packed8F" %} 6025 ins_encode %{ 6026 int vector_len = 1; 6027 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6028 %} 6029 ins_pipe( pipe_slow ); 6030 %} 6031 6032 instruct vsub16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6033 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6034 match(Set dst (SubVF src1 src2)); 6035 format %{ "vsubps $dst,$src1,$src2\t! sub packed16F" %} 6036 ins_encode %{ 6037 int vector_len = 2; 6038 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6039 %} 6040 ins_pipe( pipe_slow ); 6041 %} 6042 6043 instruct vsub16F_mem(vecZ dst, vecZ src, memory mem) %{ 6044 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6045 match(Set dst (SubVF src (LoadVector mem))); 6046 format %{ "vsubps $dst,$src,$mem\t! sub packed16F" %} 6047 ins_encode %{ 6048 int vector_len = 2; 6049 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6050 %} 6051 ins_pipe( pipe_slow ); 6052 %} 6053 6054 // Doubles vector sub 6055 instruct vsub2D(vecX dst, vecX src) %{ 6056 predicate(n->as_Vector()->length() == 2); 6057 match(Set dst (SubVD dst src)); 6058 format %{ "subpd $dst,$src\t! sub packed2D" %} 6059 ins_encode %{ 6060 __ subpd($dst$$XMMRegister, $src$$XMMRegister); 6061 %} 6062 ins_pipe( pipe_slow ); 6063 %} 6064 6065 instruct vsub2D_reg(vecX dst, vecX src1, vecX src2) %{ 6066 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6067 match(Set dst (SubVD src1 src2)); 6068 format %{ "vsubpd $dst,$src1,$src2\t! sub packed2D" %} 6069 ins_encode %{ 6070 int vector_len = 0; 6071 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6072 %} 6073 ins_pipe( pipe_slow ); 6074 %} 6075 6076 instruct vsub2D_mem(vecX dst, vecX src, memory mem) %{ 6077 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6078 match(Set dst (SubVD src (LoadVector mem))); 6079 format %{ "vsubpd $dst,$src,$mem\t! sub packed2D" %} 6080 ins_encode %{ 6081 int vector_len = 0; 6082 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6083 %} 6084 ins_pipe( pipe_slow ); 6085 %} 6086 6087 instruct vsub4D_reg(vecY dst, vecY src1, vecY src2) %{ 6088 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6089 match(Set dst (SubVD src1 src2)); 6090 format %{ "vsubpd $dst,$src1,$src2\t! sub packed4D" %} 6091 ins_encode %{ 6092 int vector_len = 1; 6093 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6094 %} 6095 ins_pipe( pipe_slow ); 6096 %} 6097 6098 instruct vsub4D_mem(vecY dst, vecY src, memory mem) %{ 6099 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6100 match(Set dst (SubVD src (LoadVector mem))); 6101 format %{ "vsubpd $dst,$src,$mem\t! sub packed4D" %} 6102 ins_encode %{ 6103 int vector_len = 1; 6104 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6105 %} 6106 ins_pipe( pipe_slow ); 6107 %} 6108 6109 instruct vsub8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6110 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6111 match(Set dst (SubVD src1 src2)); 6112 format %{ "vsubpd $dst,$src1,$src2\t! sub packed8D" %} 6113 ins_encode %{ 6114 int vector_len = 2; 6115 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6116 %} 6117 ins_pipe( pipe_slow ); 6118 %} 6119 6120 instruct vsub8D_mem(vecZ dst, vecZ src, memory mem) %{ 6121 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6122 match(Set dst (SubVD src (LoadVector mem))); 6123 format %{ "vsubpd $dst,$src,$mem\t! sub packed8D" %} 6124 ins_encode %{ 6125 int vector_len = 2; 6126 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6127 %} 6128 ins_pipe( pipe_slow ); 6129 %} 6130 6131 // --------------------------------- MUL -------------------------------------- 6132 6133 // Shorts/Chars vector mul 6134 instruct vmul2S(vecS dst, vecS src) %{ 6135 predicate(n->as_Vector()->length() == 2); 6136 match(Set dst (MulVS dst src)); 6137 format %{ "pmullw $dst,$src\t! mul packed2S" %} 6138 ins_encode %{ 6139 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 6140 %} 6141 ins_pipe( pipe_slow ); 6142 %} 6143 6144 instruct vmul2S_reg(vecS dst, vecS src1, vecS src2) %{ 6145 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6146 match(Set dst (MulVS src1 src2)); 6147 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} 6148 ins_encode %{ 6149 int vector_len = 0; 6150 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6151 %} 6152 ins_pipe( pipe_slow ); 6153 %} 6154 6155 instruct vmul4S(vecD dst, vecD src) %{ 6156 predicate(n->as_Vector()->length() == 4); 6157 match(Set dst (MulVS dst src)); 6158 format %{ "pmullw $dst,$src\t! mul packed4S" %} 6159 ins_encode %{ 6160 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 6161 %} 6162 ins_pipe( pipe_slow ); 6163 %} 6164 6165 instruct vmul4S_reg(vecD dst, vecD src1, vecD src2) %{ 6166 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6167 match(Set dst (MulVS src1 src2)); 6168 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} 6169 ins_encode %{ 6170 int vector_len = 0; 6171 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6172 %} 6173 ins_pipe( pipe_slow ); 6174 %} 6175 6176 instruct vmul8S(vecX dst, vecX src) %{ 6177 predicate(n->as_Vector()->length() == 8); 6178 match(Set dst (MulVS dst src)); 6179 format %{ "pmullw $dst,$src\t! mul packed8S" %} 6180 ins_encode %{ 6181 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 6182 %} 6183 ins_pipe( pipe_slow ); 6184 %} 6185 6186 instruct vmul8S_reg(vecX dst, vecX src1, vecX src2) %{ 6187 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6188 match(Set dst (MulVS src1 src2)); 6189 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} 6190 ins_encode %{ 6191 int vector_len = 0; 6192 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6193 %} 6194 ins_pipe( pipe_slow ); 6195 %} 6196 6197 instruct vmul8S_mem(vecX dst, vecX src, memory mem) %{ 6198 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6199 match(Set dst (MulVS src (LoadVector mem))); 6200 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} 6201 ins_encode %{ 6202 int vector_len = 0; 6203 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6204 %} 6205 ins_pipe( pipe_slow ); 6206 %} 6207 6208 instruct vmul16S_reg(vecY dst, vecY src1, vecY src2) %{ 6209 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 6210 match(Set dst (MulVS src1 src2)); 6211 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} 6212 ins_encode %{ 6213 int vector_len = 1; 6214 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6215 %} 6216 ins_pipe( pipe_slow ); 6217 %} 6218 6219 instruct vmul16S_mem(vecY dst, vecY src, memory mem) %{ 6220 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 6221 match(Set dst (MulVS src (LoadVector mem))); 6222 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} 6223 ins_encode %{ 6224 int vector_len = 1; 6225 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6226 %} 6227 ins_pipe( pipe_slow ); 6228 %} 6229 6230 instruct vmul32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6231 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); 6232 match(Set dst (MulVS src1 src2)); 6233 format %{ "vpmullw $dst,$src1,$src2\t! mul packed32S" %} 6234 ins_encode %{ 6235 int vector_len = 2; 6236 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6237 %} 6238 ins_pipe( pipe_slow ); 6239 %} 6240 6241 instruct vmul32S_mem(vecZ dst, vecZ src, memory mem) %{ 6242 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); 6243 match(Set dst (MulVS src (LoadVector mem))); 6244 format %{ "vpmullw $dst,$src,$mem\t! mul packed32S" %} 6245 ins_encode %{ 6246 int vector_len = 2; 6247 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6248 %} 6249 ins_pipe( pipe_slow ); 6250 %} 6251 6252 // Integers vector mul (sse4_1) 6253 instruct vmul2I(vecD dst, vecD src) %{ 6254 predicate(UseSSE > 3 && n->as_Vector()->length() == 2); 6255 match(Set dst (MulVI dst src)); 6256 format %{ "pmulld $dst,$src\t! mul packed2I" %} 6257 ins_encode %{ 6258 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 6259 %} 6260 ins_pipe( pipe_slow ); 6261 %} 6262 6263 instruct vmul2I_reg(vecD dst, vecD src1, vecD src2) %{ 6264 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6265 match(Set dst (MulVI src1 src2)); 6266 format %{ "vpmulld $dst,$src1,$src2\t! mul packed2I" %} 6267 ins_encode %{ 6268 int vector_len = 0; 6269 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6270 %} 6271 ins_pipe( pipe_slow ); 6272 %} 6273 6274 instruct vmul2L_reg(vecX dst, vecX src1, vecX src2) %{ 6275 predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && VM_Version::supports_avx512dq()); 6276 match(Set dst (MulVL src1 src2)); 6277 format %{ "vpmullq $dst,$src1,$src2\t! mul packed2L" %} 6278 ins_encode %{ 6279 int vector_len = 0; 6280 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6281 %} 6282 ins_pipe( pipe_slow ); 6283 %} 6284 6285 instruct vmul4I(vecX dst, vecX src) %{ 6286 predicate(UseSSE > 3 && n->as_Vector()->length() == 4); 6287 match(Set dst (MulVI dst src)); 6288 format %{ "pmulld $dst,$src\t! mul packed4I" %} 6289 ins_encode %{ 6290 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 6291 %} 6292 ins_pipe( pipe_slow ); 6293 %} 6294 6295 instruct vmul4I_reg(vecX dst, vecX src1, vecX src2) %{ 6296 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6297 match(Set dst (MulVI src1 src2)); 6298 format %{ "vpmulld $dst,$src1,$src2\t! mul packed4I" %} 6299 ins_encode %{ 6300 int vector_len = 0; 6301 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6302 %} 6303 ins_pipe( pipe_slow ); 6304 %} 6305 6306 instruct vmul4I_mem(vecX dst, vecX src, memory mem) %{ 6307 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6308 match(Set dst (MulVI src (LoadVector mem))); 6309 format %{ "vpmulld $dst,$src,$mem\t! mul packed4I" %} 6310 ins_encode %{ 6311 int vector_len = 0; 6312 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6313 %} 6314 ins_pipe( pipe_slow ); 6315 %} 6316 6317 instruct vmul4L_reg(vecY dst, vecY src1, vecY src2) %{ 6318 predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && VM_Version::supports_avx512dq()); 6319 match(Set dst (MulVL src1 src2)); 6320 format %{ "vpmullq $dst,$src1,$src2\t! mul packed4L" %} 6321 ins_encode %{ 6322 int vector_len = 1; 6323 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6324 %} 6325 ins_pipe( pipe_slow ); 6326 %} 6327 6328 instruct vmul4L_mem(vecY dst, vecY src, memory mem) %{ 6329 predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && VM_Version::supports_avx512dq()); 6330 match(Set dst (MulVL src (LoadVector mem))); 6331 format %{ "vpmullq $dst,$src,$mem\t! mul packed4L" %} 6332 ins_encode %{ 6333 int vector_len = 1; 6334 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6335 %} 6336 ins_pipe( pipe_slow ); 6337 %} 6338 6339 instruct vmul8I_reg(vecY dst, vecY src1, vecY src2) %{ 6340 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 6341 match(Set dst (MulVI src1 src2)); 6342 format %{ "vpmulld $dst,$src1,$src2\t! mul packed8I" %} 6343 ins_encode %{ 6344 int vector_len = 1; 6345 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6346 %} 6347 ins_pipe( pipe_slow ); 6348 %} 6349 6350 instruct vmul8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6351 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq()); 6352 match(Set dst (MulVL src1 src2)); 6353 format %{ "vpmullq $dst,$src1,$src2\t! mul packed8L" %} 6354 ins_encode %{ 6355 int vector_len = 2; 6356 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6357 %} 6358 ins_pipe( pipe_slow ); 6359 %} 6360 6361 instruct vmul16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6362 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6363 match(Set dst (MulVI src1 src2)); 6364 format %{ "vpmulld $dst,$src1,$src2\t! mul packed16I" %} 6365 ins_encode %{ 6366 int vector_len = 2; 6367 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6368 %} 6369 ins_pipe( pipe_slow ); 6370 %} 6371 6372 instruct vmul8I_mem(vecY dst, vecY src, memory mem) %{ 6373 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 6374 match(Set dst (MulVI src (LoadVector mem))); 6375 format %{ "vpmulld $dst,$src,$mem\t! mul packed8I" %} 6376 ins_encode %{ 6377 int vector_len = 1; 6378 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6379 %} 6380 ins_pipe( pipe_slow ); 6381 %} 6382 6383 instruct vmul8L_mem(vecZ dst, vecZ src, memory mem) %{ 6384 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq()); 6385 match(Set dst (MulVL src (LoadVector mem))); 6386 format %{ "vpmullq $dst,$src,$mem\t! mul packed8L" %} 6387 ins_encode %{ 6388 int vector_len = 2; 6389 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6390 %} 6391 ins_pipe( pipe_slow ); 6392 %} 6393 6394 instruct vmul16I_mem(vecZ dst, vecZ src, memory mem) %{ 6395 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6396 match(Set dst (MulVI src (LoadVector mem))); 6397 format %{ "vpmulld $dst,$src,$mem\t! mul packed16I" %} 6398 ins_encode %{ 6399 int vector_len = 2; 6400 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6401 %} 6402 ins_pipe( pipe_slow ); 6403 %} 6404 6405 // Floats vector mul 6406 instruct vmul2F(vecD dst, vecD src) %{ 6407 predicate(n->as_Vector()->length() == 2); 6408 match(Set dst (MulVF dst src)); 6409 format %{ "mulps $dst,$src\t! mul packed2F" %} 6410 ins_encode %{ 6411 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 6412 %} 6413 ins_pipe( pipe_slow ); 6414 %} 6415 6416 instruct vmul2F_reg(vecD dst, vecD src1, vecD src2) %{ 6417 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6418 match(Set dst (MulVF src1 src2)); 6419 format %{ "vmulps $dst,$src1,$src2\t! mul packed2F" %} 6420 ins_encode %{ 6421 int vector_len = 0; 6422 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6423 %} 6424 ins_pipe( pipe_slow ); 6425 %} 6426 6427 instruct vmul4F(vecX dst, vecX src) %{ 6428 predicate(n->as_Vector()->length() == 4); 6429 match(Set dst (MulVF dst src)); 6430 format %{ "mulps $dst,$src\t! mul packed4F" %} 6431 ins_encode %{ 6432 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 6433 %} 6434 ins_pipe( pipe_slow ); 6435 %} 6436 6437 instruct vmul4F_reg(vecX dst, vecX src1, vecX src2) %{ 6438 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6439 match(Set dst (MulVF src1 src2)); 6440 format %{ "vmulps $dst,$src1,$src2\t! mul packed4F" %} 6441 ins_encode %{ 6442 int vector_len = 0; 6443 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6444 %} 6445 ins_pipe( pipe_slow ); 6446 %} 6447 6448 instruct vmul4F_mem(vecX dst, vecX src, memory mem) %{ 6449 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6450 match(Set dst (MulVF src (LoadVector mem))); 6451 format %{ "vmulps $dst,$src,$mem\t! mul packed4F" %} 6452 ins_encode %{ 6453 int vector_len = 0; 6454 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6455 %} 6456 ins_pipe( pipe_slow ); 6457 %} 6458 6459 instruct vmul8F_reg(vecY dst, vecY src1, vecY src2) %{ 6460 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6461 match(Set dst (MulVF src1 src2)); 6462 format %{ "vmulps $dst,$src1,$src2\t! mul packed8F" %} 6463 ins_encode %{ 6464 int vector_len = 1; 6465 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6466 %} 6467 ins_pipe( pipe_slow ); 6468 %} 6469 6470 instruct vmul8F_mem(vecY dst, vecY src, memory mem) %{ 6471 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6472 match(Set dst (MulVF src (LoadVector mem))); 6473 format %{ "vmulps $dst,$src,$mem\t! mul packed8F" %} 6474 ins_encode %{ 6475 int vector_len = 1; 6476 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6477 %} 6478 ins_pipe( pipe_slow ); 6479 %} 6480 6481 instruct vmul16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6482 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6483 match(Set dst (MulVF src1 src2)); 6484 format %{ "vmulps $dst,$src1,$src2\t! mul packed16F" %} 6485 ins_encode %{ 6486 int vector_len = 2; 6487 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6488 %} 6489 ins_pipe( pipe_slow ); 6490 %} 6491 6492 instruct vmul16F_mem(vecZ dst, vecZ src, memory mem) %{ 6493 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6494 match(Set dst (MulVF src (LoadVector mem))); 6495 format %{ "vmulps $dst,$src,$mem\t! mul packed16F" %} 6496 ins_encode %{ 6497 int vector_len = 2; 6498 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6499 %} 6500 ins_pipe( pipe_slow ); 6501 %} 6502 6503 // Doubles vector mul 6504 instruct vmul2D(vecX dst, vecX src) %{ 6505 predicate(n->as_Vector()->length() == 2); 6506 match(Set dst (MulVD dst src)); 6507 format %{ "mulpd $dst,$src\t! mul packed2D" %} 6508 ins_encode %{ 6509 __ mulpd($dst$$XMMRegister, $src$$XMMRegister); 6510 %} 6511 ins_pipe( pipe_slow ); 6512 %} 6513 6514 instruct vmul2D_reg(vecX dst, vecX src1, vecX src2) %{ 6515 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6516 match(Set dst (MulVD src1 src2)); 6517 format %{ "vmulpd $dst,$src1,$src2\t! mul packed2D" %} 6518 ins_encode %{ 6519 int vector_len = 0; 6520 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6521 %} 6522 ins_pipe( pipe_slow ); 6523 %} 6524 6525 instruct vmul2D_mem(vecX dst, vecX src, memory mem) %{ 6526 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6527 match(Set dst (MulVD src (LoadVector mem))); 6528 format %{ "vmulpd $dst,$src,$mem\t! mul packed2D" %} 6529 ins_encode %{ 6530 int vector_len = 0; 6531 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6532 %} 6533 ins_pipe( pipe_slow ); 6534 %} 6535 6536 instruct vmul4D_reg(vecY dst, vecY src1, vecY src2) %{ 6537 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6538 match(Set dst (MulVD src1 src2)); 6539 format %{ "vmulpd $dst,$src1,$src2\t! mul packed4D" %} 6540 ins_encode %{ 6541 int vector_len = 1; 6542 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6543 %} 6544 ins_pipe( pipe_slow ); 6545 %} 6546 6547 instruct vmul4D_mem(vecY dst, vecY src, memory mem) %{ 6548 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6549 match(Set dst (MulVD src (LoadVector mem))); 6550 format %{ "vmulpd $dst,$src,$mem\t! mul packed4D" %} 6551 ins_encode %{ 6552 int vector_len = 1; 6553 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6554 %} 6555 ins_pipe( pipe_slow ); 6556 %} 6557 6558 instruct vmul8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6559 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6560 match(Set dst (MulVD src1 src2)); 6561 format %{ "vmulpd $dst k0,$src1,$src2\t! mul packed8D" %} 6562 ins_encode %{ 6563 int vector_len = 2; 6564 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6565 %} 6566 ins_pipe( pipe_slow ); 6567 %} 6568 6569 instruct vmul8D_mem(vecZ dst, vecZ src, memory mem) %{ 6570 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6571 match(Set dst (MulVD src (LoadVector mem))); 6572 format %{ "vmulpd $dst k0,$src,$mem\t! mul packed8D" %} 6573 ins_encode %{ 6574 int vector_len = 2; 6575 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6576 %} 6577 ins_pipe( pipe_slow ); 6578 %} 6579 6580 // --------------------------------- DIV -------------------------------------- 6581 6582 // Floats vector div 6583 instruct vdiv2F(vecD dst, vecD src) %{ 6584 predicate(n->as_Vector()->length() == 2); 6585 match(Set dst (DivVF dst src)); 6586 format %{ "divps $dst,$src\t! div packed2F" %} 6587 ins_encode %{ 6588 __ divps($dst$$XMMRegister, $src$$XMMRegister); 6589 %} 6590 ins_pipe( pipe_slow ); 6591 %} 6592 6593 instruct vdiv2F_reg(vecD dst, vecD src1, vecD src2) %{ 6594 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6595 match(Set dst (DivVF src1 src2)); 6596 format %{ "vdivps $dst,$src1,$src2\t! div packed2F" %} 6597 ins_encode %{ 6598 int vector_len = 0; 6599 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6600 %} 6601 ins_pipe( pipe_slow ); 6602 %} 6603 6604 instruct vdiv4F(vecX dst, vecX src) %{ 6605 predicate(n->as_Vector()->length() == 4); 6606 match(Set dst (DivVF dst src)); 6607 format %{ "divps $dst,$src\t! div packed4F" %} 6608 ins_encode %{ 6609 __ divps($dst$$XMMRegister, $src$$XMMRegister); 6610 %} 6611 ins_pipe( pipe_slow ); 6612 %} 6613 6614 instruct vdiv4F_reg(vecX dst, vecX src1, vecX src2) %{ 6615 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6616 match(Set dst (DivVF src1 src2)); 6617 format %{ "vdivps $dst,$src1,$src2\t! div packed4F" %} 6618 ins_encode %{ 6619 int vector_len = 0; 6620 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6621 %} 6622 ins_pipe( pipe_slow ); 6623 %} 6624 6625 instruct vdiv4F_mem(vecX dst, vecX src, memory mem) %{ 6626 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6627 match(Set dst (DivVF src (LoadVector mem))); 6628 format %{ "vdivps $dst,$src,$mem\t! div packed4F" %} 6629 ins_encode %{ 6630 int vector_len = 0; 6631 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6632 %} 6633 ins_pipe( pipe_slow ); 6634 %} 6635 6636 instruct vdiv8F_reg(vecY dst, vecY src1, vecY src2) %{ 6637 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6638 match(Set dst (DivVF src1 src2)); 6639 format %{ "vdivps $dst,$src1,$src2\t! div packed8F" %} 6640 ins_encode %{ 6641 int vector_len = 1; 6642 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6643 %} 6644 ins_pipe( pipe_slow ); 6645 %} 6646 6647 instruct vdiv8F_mem(vecY dst, vecY src, memory mem) %{ 6648 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6649 match(Set dst (DivVF src (LoadVector mem))); 6650 format %{ "vdivps $dst,$src,$mem\t! div packed8F" %} 6651 ins_encode %{ 6652 int vector_len = 1; 6653 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6654 %} 6655 ins_pipe( pipe_slow ); 6656 %} 6657 6658 instruct vdiv16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6659 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 6660 match(Set dst (DivVF src1 src2)); 6661 format %{ "vdivps $dst,$src1,$src2\t! div packed16F" %} 6662 ins_encode %{ 6663 int vector_len = 2; 6664 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6665 %} 6666 ins_pipe( pipe_slow ); 6667 %} 6668 6669 instruct vdiv16F_mem(vecZ dst, vecZ src, memory mem) %{ 6670 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 6671 match(Set dst (DivVF src (LoadVector mem))); 6672 format %{ "vdivps $dst,$src,$mem\t! div packed16F" %} 6673 ins_encode %{ 6674 int vector_len = 2; 6675 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6676 %} 6677 ins_pipe( pipe_slow ); 6678 %} 6679 6680 // Doubles vector div 6681 instruct vdiv2D(vecX dst, vecX src) %{ 6682 predicate(n->as_Vector()->length() == 2); 6683 match(Set dst (DivVD dst src)); 6684 format %{ "divpd $dst,$src\t! div packed2D" %} 6685 ins_encode %{ 6686 __ divpd($dst$$XMMRegister, $src$$XMMRegister); 6687 %} 6688 ins_pipe( pipe_slow ); 6689 %} 6690 6691 instruct vdiv2D_reg(vecX dst, vecX src1, vecX src2) %{ 6692 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6693 match(Set dst (DivVD src1 src2)); 6694 format %{ "vdivpd $dst,$src1,$src2\t! div packed2D" %} 6695 ins_encode %{ 6696 int vector_len = 0; 6697 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6698 %} 6699 ins_pipe( pipe_slow ); 6700 %} 6701 6702 instruct vdiv2D_mem(vecX dst, vecX src, memory mem) %{ 6703 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6704 match(Set dst (DivVD src (LoadVector mem))); 6705 format %{ "vdivpd $dst,$src,$mem\t! div packed2D" %} 6706 ins_encode %{ 6707 int vector_len = 0; 6708 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6709 %} 6710 ins_pipe( pipe_slow ); 6711 %} 6712 6713 instruct vdiv4D_reg(vecY dst, vecY src1, vecY src2) %{ 6714 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6715 match(Set dst (DivVD src1 src2)); 6716 format %{ "vdivpd $dst,$src1,$src2\t! div packed4D" %} 6717 ins_encode %{ 6718 int vector_len = 1; 6719 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6720 %} 6721 ins_pipe( pipe_slow ); 6722 %} 6723 6724 instruct vdiv4D_mem(vecY dst, vecY src, memory mem) %{ 6725 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6726 match(Set dst (DivVD src (LoadVector mem))); 6727 format %{ "vdivpd $dst,$src,$mem\t! div packed4D" %} 6728 ins_encode %{ 6729 int vector_len = 1; 6730 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6731 %} 6732 ins_pipe( pipe_slow ); 6733 %} 6734 6735 instruct vdiv8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6736 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6737 match(Set dst (DivVD src1 src2)); 6738 format %{ "vdivpd $dst,$src1,$src2\t! div packed8D" %} 6739 ins_encode %{ 6740 int vector_len = 2; 6741 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6742 %} 6743 ins_pipe( pipe_slow ); 6744 %} 6745 6746 instruct vdiv8D_mem(vecZ dst, vecZ src, memory mem) %{ 6747 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6748 match(Set dst (DivVD src (LoadVector mem))); 6749 format %{ "vdivpd $dst,$src,$mem\t! div packed8D" %} 6750 ins_encode %{ 6751 int vector_len = 2; 6752 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6753 %} 6754 ins_pipe( pipe_slow ); 6755 %} 6756 6757 // ------------------------------ Shift --------------------------------------- 6758 6759 // Left and right shift count vectors are the same on x86 6760 // (only lowest bits of xmm reg are used for count). 6761 instruct vshiftcnt(vecS dst, rRegI cnt) %{ 6762 match(Set dst (LShiftCntV cnt)); 6763 match(Set dst (RShiftCntV cnt)); 6764 format %{ "movd $dst,$cnt\t! load shift count" %} 6765 ins_encode %{ 6766 __ movdl($dst$$XMMRegister, $cnt$$Register); 6767 %} 6768 ins_pipe( pipe_slow ); 6769 %} 6770 6771 // ------------------------------ LeftShift ----------------------------------- 6772 6773 // Shorts/Chars vector left shift 6774 instruct vsll2S(vecS dst, vecS shift) %{ 6775 predicate(n->as_Vector()->length() == 2); 6776 match(Set dst (LShiftVS dst shift)); 6777 format %{ "psllw $dst,$shift\t! left shift packed2S" %} 6778 ins_encode %{ 6779 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 6780 %} 6781 ins_pipe( pipe_slow ); 6782 %} 6783 6784 instruct vsll2S_imm(vecS dst, immI8 shift) %{ 6785 predicate(n->as_Vector()->length() == 2); 6786 match(Set dst (LShiftVS dst shift)); 6787 format %{ "psllw $dst,$shift\t! left shift packed2S" %} 6788 ins_encode %{ 6789 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 6790 %} 6791 ins_pipe( pipe_slow ); 6792 %} 6793 6794 instruct vsll2S_reg(vecS dst, vecS src, vecS shift) %{ 6795 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6796 match(Set dst (LShiftVS src shift)); 6797 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 6798 ins_encode %{ 6799 int vector_len = 0; 6800 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 6801 %} 6802 ins_pipe( pipe_slow ); 6803 %} 6804 6805 instruct vsll2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ 6806 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6807 match(Set dst (LShiftVS src shift)); 6808 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 6809 ins_encode %{ 6810 int vector_len = 0; 6811 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 6812 %} 6813 ins_pipe( pipe_slow ); 6814 %} 6815 6816 instruct vsll4S(vecD dst, vecS shift) %{ 6817 predicate(n->as_Vector()->length() == 4); 6818 match(Set dst (LShiftVS dst shift)); 6819 format %{ "psllw $dst,$shift\t! left shift packed4S" %} 6820 ins_encode %{ 6821 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 6822 %} 6823 ins_pipe( pipe_slow ); 6824 %} 6825 6826 instruct vsll4S_imm(vecD dst, immI8 shift) %{ 6827 predicate(n->as_Vector()->length() == 4); 6828 match(Set dst (LShiftVS dst shift)); 6829 format %{ "psllw $dst,$shift\t! left shift packed4S" %} 6830 ins_encode %{ 6831 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 6832 %} 6833 ins_pipe( pipe_slow ); 6834 %} 6835 6836 instruct vsll4S_reg(vecD dst, vecD src, vecS shift) %{ 6837 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6838 match(Set dst (LShiftVS src shift)); 6839 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 6840 ins_encode %{ 6841 int vector_len = 0; 6842 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 6843 %} 6844 ins_pipe( pipe_slow ); 6845 %} 6846 6847 instruct vsll4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ 6848 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6849 match(Set dst (LShiftVS src shift)); 6850 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 6851 ins_encode %{ 6852 int vector_len = 0; 6853 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 6854 %} 6855 ins_pipe( pipe_slow ); 6856 %} 6857 6858 instruct vsll8S(vecX dst, vecS shift) %{ 6859 predicate(n->as_Vector()->length() == 8); 6860 match(Set dst (LShiftVS dst shift)); 6861 format %{ "psllw $dst,$shift\t! left shift packed8S" %} 6862 ins_encode %{ 6863 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 6864 %} 6865 ins_pipe( pipe_slow ); 6866 %} 6867 6868 instruct vsll8S_imm(vecX dst, immI8 shift) %{ 6869 predicate(n->as_Vector()->length() == 8); 6870 match(Set dst (LShiftVS dst shift)); 6871 format %{ "psllw $dst,$shift\t! left shift packed8S" %} 6872 ins_encode %{ 6873 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 6874 %} 6875 ins_pipe( pipe_slow ); 6876 %} 6877 6878 instruct vsll8S_reg(vecX dst, vecX src, vecS shift) %{ 6879 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6880 match(Set dst (LShiftVS src shift)); 6881 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 6882 ins_encode %{ 6883 int vector_len = 0; 6884 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 6885 %} 6886 ins_pipe( pipe_slow ); 6887 %} 6888 6889 instruct vsll8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ 6890 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6891 match(Set dst (LShiftVS src shift)); 6892 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 6893 ins_encode %{ 6894 int vector_len = 0; 6895 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 6896 %} 6897 ins_pipe( pipe_slow ); 6898 %} 6899 6900 instruct vsll16S_reg(vecY dst, vecY src, vecS shift) %{ 6901 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 6902 match(Set dst (LShiftVS src shift)); 6903 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 6904 ins_encode %{ 6905 int vector_len = 1; 6906 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 6907 %} 6908 ins_pipe( pipe_slow ); 6909 %} 6910 6911 instruct vsll16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ 6912 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 6913 match(Set dst (LShiftVS src shift)); 6914 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 6915 ins_encode %{ 6916 int vector_len = 1; 6917 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 6918 %} 6919 ins_pipe( pipe_slow ); 6920 %} 6921 6922 instruct vsll32S_reg(vecZ dst, vecZ src, vecS shift) %{ 6923 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); 6924 match(Set dst (LShiftVS src shift)); 6925 format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %} 6926 ins_encode %{ 6927 int vector_len = 2; 6928 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 6929 %} 6930 ins_pipe( pipe_slow ); 6931 %} 6932 6933 instruct vsll32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 6934 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); 6935 match(Set dst (LShiftVS src shift)); 6936 format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %} 6937 ins_encode %{ 6938 int vector_len = 2; 6939 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 6940 %} 6941 ins_pipe( pipe_slow ); 6942 %} 6943 6944 // Integers vector left shift 6945 instruct vsll2I(vecD dst, vecS shift) %{ 6946 predicate(n->as_Vector()->length() == 2); 6947 match(Set dst (LShiftVI dst shift)); 6948 format %{ "pslld $dst,$shift\t! left shift packed2I" %} 6949 ins_encode %{ 6950 __ pslld($dst$$XMMRegister, $shift$$XMMRegister); 6951 %} 6952 ins_pipe( pipe_slow ); 6953 %} 6954 6955 instruct vsll2I_imm(vecD dst, immI8 shift) %{ 6956 predicate(n->as_Vector()->length() == 2); 6957 match(Set dst (LShiftVI dst shift)); 6958 format %{ "pslld $dst,$shift\t! left shift packed2I" %} 6959 ins_encode %{ 6960 __ pslld($dst$$XMMRegister, (int)$shift$$constant); 6961 %} 6962 ins_pipe( pipe_slow ); 6963 %} 6964 6965 instruct vsll2I_reg(vecD dst, vecD src, vecS shift) %{ 6966 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6967 match(Set dst (LShiftVI src shift)); 6968 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} 6969 ins_encode %{ 6970 int vector_len = 0; 6971 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 6972 %} 6973 ins_pipe( pipe_slow ); 6974 %} 6975 6976 instruct vsll2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 6977 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6978 match(Set dst (LShiftVI src shift)); 6979 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} 6980 ins_encode %{ 6981 int vector_len = 0; 6982 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 6983 %} 6984 ins_pipe( pipe_slow ); 6985 %} 6986 6987 instruct vsll4I(vecX dst, vecS shift) %{ 6988 predicate(n->as_Vector()->length() == 4); 6989 match(Set dst (LShiftVI dst shift)); 6990 format %{ "pslld $dst,$shift\t! left shift packed4I" %} 6991 ins_encode %{ 6992 __ pslld($dst$$XMMRegister, $shift$$XMMRegister); 6993 %} 6994 ins_pipe( pipe_slow ); 6995 %} 6996 6997 instruct vsll4I_imm(vecX dst, immI8 shift) %{ 6998 predicate(n->as_Vector()->length() == 4); 6999 match(Set dst (LShiftVI dst shift)); 7000 format %{ "pslld $dst,$shift\t! left shift packed4I" %} 7001 ins_encode %{ 7002 __ pslld($dst$$XMMRegister, (int)$shift$$constant); 7003 %} 7004 ins_pipe( pipe_slow ); 7005 %} 7006 7007 instruct vsll4I_reg(vecX dst, vecX src, vecS shift) %{ 7008 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7009 match(Set dst (LShiftVI src shift)); 7010 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} 7011 ins_encode %{ 7012 int vector_len = 0; 7013 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7014 %} 7015 ins_pipe( pipe_slow ); 7016 %} 7017 7018 instruct vsll4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 7019 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7020 match(Set dst (LShiftVI src shift)); 7021 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} 7022 ins_encode %{ 7023 int vector_len = 0; 7024 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7025 %} 7026 ins_pipe( pipe_slow ); 7027 %} 7028 7029 instruct vsll8I_reg(vecY dst, vecY src, vecS shift) %{ 7030 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 7031 match(Set dst (LShiftVI src shift)); 7032 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} 7033 ins_encode %{ 7034 int vector_len = 1; 7035 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7036 %} 7037 ins_pipe( pipe_slow ); 7038 %} 7039 7040 instruct vsll8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 7041 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 7042 match(Set dst (LShiftVI src shift)); 7043 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} 7044 ins_encode %{ 7045 int vector_len = 1; 7046 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7047 %} 7048 ins_pipe( pipe_slow ); 7049 %} 7050 7051 instruct vsll16I_reg(vecZ dst, vecZ src, vecS shift) %{ 7052 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7053 match(Set dst (LShiftVI src shift)); 7054 format %{ "vpslld $dst,$src,$shift\t! left shift packed16I" %} 7055 ins_encode %{ 7056 int vector_len = 2; 7057 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7058 %} 7059 ins_pipe( pipe_slow ); 7060 %} 7061 7062 instruct vsll16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 7063 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7064 match(Set dst (LShiftVI src shift)); 7065 format %{ "vpslld $dst,$src,$shift\t! left shift packed16I" %} 7066 ins_encode %{ 7067 int vector_len = 2; 7068 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7069 %} 7070 ins_pipe( pipe_slow ); 7071 %} 7072 7073 // Longs vector left shift 7074 instruct vsll2L(vecX dst, vecS shift) %{ 7075 predicate(n->as_Vector()->length() == 2); 7076 match(Set dst (LShiftVL dst shift)); 7077 format %{ "psllq $dst,$shift\t! left shift packed2L" %} 7078 ins_encode %{ 7079 __ psllq($dst$$XMMRegister, $shift$$XMMRegister); 7080 %} 7081 ins_pipe( pipe_slow ); 7082 %} 7083 7084 instruct vsll2L_imm(vecX dst, immI8 shift) %{ 7085 predicate(n->as_Vector()->length() == 2); 7086 match(Set dst (LShiftVL dst shift)); 7087 format %{ "psllq $dst,$shift\t! left shift packed2L" %} 7088 ins_encode %{ 7089 __ psllq($dst$$XMMRegister, (int)$shift$$constant); 7090 %} 7091 ins_pipe( pipe_slow ); 7092 %} 7093 7094 instruct vsll2L_reg(vecX dst, vecX src, vecS shift) %{ 7095 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7096 match(Set dst (LShiftVL src shift)); 7097 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} 7098 ins_encode %{ 7099 int vector_len = 0; 7100 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7101 %} 7102 ins_pipe( pipe_slow ); 7103 %} 7104 7105 instruct vsll2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ 7106 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7107 match(Set dst (LShiftVL src shift)); 7108 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} 7109 ins_encode %{ 7110 int vector_len = 0; 7111 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7112 %} 7113 ins_pipe( pipe_slow ); 7114 %} 7115 7116 instruct vsll4L_reg(vecY dst, vecY src, vecS shift) %{ 7117 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 7118 match(Set dst (LShiftVL src shift)); 7119 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} 7120 ins_encode %{ 7121 int vector_len = 1; 7122 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7123 %} 7124 ins_pipe( pipe_slow ); 7125 %} 7126 7127 instruct vsll4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ 7128 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 7129 match(Set dst (LShiftVL src shift)); 7130 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} 7131 ins_encode %{ 7132 int vector_len = 1; 7133 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7134 %} 7135 ins_pipe( pipe_slow ); 7136 %} 7137 7138 instruct vsll8L_reg(vecZ dst, vecZ src, vecS shift) %{ 7139 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7140 match(Set dst (LShiftVL src shift)); 7141 format %{ "vpsllq $dst,$src,$shift\t! left shift packed8L" %} 7142 ins_encode %{ 7143 int vector_len = 2; 7144 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7145 %} 7146 ins_pipe( pipe_slow ); 7147 %} 7148 7149 instruct vsll8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 7150 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7151 match(Set dst (LShiftVL src shift)); 7152 format %{ "vpsllq $dst,$src,$shift\t! left shift packed8L" %} 7153 ins_encode %{ 7154 int vector_len = 2; 7155 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7156 %} 7157 ins_pipe( pipe_slow ); 7158 %} 7159 7160 // ----------------------- LogicalRightShift ----------------------------------- 7161 7162 // Shorts vector logical right shift produces incorrect Java result 7163 // for negative data because java code convert short value into int with 7164 // sign extension before a shift. But char vectors are fine since chars are 7165 // unsigned values. 7166 7167 instruct vsrl2S(vecS dst, vecS shift) %{ 7168 predicate(n->as_Vector()->length() == 2); 7169 match(Set dst (URShiftVS dst shift)); 7170 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} 7171 ins_encode %{ 7172 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 7173 %} 7174 ins_pipe( pipe_slow ); 7175 %} 7176 7177 instruct vsrl2S_imm(vecS dst, immI8 shift) %{ 7178 predicate(n->as_Vector()->length() == 2); 7179 match(Set dst (URShiftVS dst shift)); 7180 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} 7181 ins_encode %{ 7182 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 7183 %} 7184 ins_pipe( pipe_slow ); 7185 %} 7186 7187 instruct vsrl2S_reg(vecS dst, vecS src, vecS shift) %{ 7188 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7189 match(Set dst (URShiftVS src shift)); 7190 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 7191 ins_encode %{ 7192 int vector_len = 0; 7193 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7194 %} 7195 ins_pipe( pipe_slow ); 7196 %} 7197 7198 instruct vsrl2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ 7199 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7200 match(Set dst (URShiftVS src shift)); 7201 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 7202 ins_encode %{ 7203 int vector_len = 0; 7204 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7205 %} 7206 ins_pipe( pipe_slow ); 7207 %} 7208 7209 instruct vsrl4S(vecD dst, vecS shift) %{ 7210 predicate(n->as_Vector()->length() == 4); 7211 match(Set dst (URShiftVS dst shift)); 7212 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} 7213 ins_encode %{ 7214 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 7215 %} 7216 ins_pipe( pipe_slow ); 7217 %} 7218 7219 instruct vsrl4S_imm(vecD dst, immI8 shift) %{ 7220 predicate(n->as_Vector()->length() == 4); 7221 match(Set dst (URShiftVS dst shift)); 7222 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} 7223 ins_encode %{ 7224 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 7225 %} 7226 ins_pipe( pipe_slow ); 7227 %} 7228 7229 instruct vsrl4S_reg(vecD dst, vecD src, vecS shift) %{ 7230 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7231 match(Set dst (URShiftVS src shift)); 7232 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 7233 ins_encode %{ 7234 int vector_len = 0; 7235 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7236 %} 7237 ins_pipe( pipe_slow ); 7238 %} 7239 7240 instruct vsrl4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ 7241 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7242 match(Set dst (URShiftVS src shift)); 7243 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 7244 ins_encode %{ 7245 int vector_len = 0; 7246 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7247 %} 7248 ins_pipe( pipe_slow ); 7249 %} 7250 7251 instruct vsrl8S(vecX dst, vecS shift) %{ 7252 predicate(n->as_Vector()->length() == 8); 7253 match(Set dst (URShiftVS dst shift)); 7254 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} 7255 ins_encode %{ 7256 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 7257 %} 7258 ins_pipe( pipe_slow ); 7259 %} 7260 7261 instruct vsrl8S_imm(vecX dst, immI8 shift) %{ 7262 predicate(n->as_Vector()->length() == 8); 7263 match(Set dst (URShiftVS dst shift)); 7264 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} 7265 ins_encode %{ 7266 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 7267 %} 7268 ins_pipe( pipe_slow ); 7269 %} 7270 7271 instruct vsrl8S_reg(vecX dst, vecX src, vecS shift) %{ 7272 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7273 match(Set dst (URShiftVS src shift)); 7274 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 7275 ins_encode %{ 7276 int vector_len = 0; 7277 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7278 %} 7279 ins_pipe( pipe_slow ); 7280 %} 7281 7282 instruct vsrl8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ 7283 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7284 match(Set dst (URShiftVS src shift)); 7285 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 7286 ins_encode %{ 7287 int vector_len = 0; 7288 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7289 %} 7290 ins_pipe( pipe_slow ); 7291 %} 7292 7293 instruct vsrl16S_reg(vecY dst, vecY src, vecS shift) %{ 7294 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 7295 match(Set dst (URShiftVS src shift)); 7296 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 7297 ins_encode %{ 7298 int vector_len = 1; 7299 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7300 %} 7301 ins_pipe( pipe_slow ); 7302 %} 7303 7304 instruct vsrl16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ 7305 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 7306 match(Set dst (URShiftVS src shift)); 7307 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 7308 ins_encode %{ 7309 int vector_len = 1; 7310 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7311 %} 7312 ins_pipe( pipe_slow ); 7313 %} 7314 7315 instruct vsrl32S_reg(vecZ dst, vecZ src, vecS shift) %{ 7316 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); 7317 match(Set dst (URShiftVS src shift)); 7318 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %} 7319 ins_encode %{ 7320 int vector_len = 2; 7321 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7322 %} 7323 ins_pipe( pipe_slow ); 7324 %} 7325 7326 instruct vsrl32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 7327 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); 7328 match(Set dst (URShiftVS src shift)); 7329 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %} 7330 ins_encode %{ 7331 int vector_len = 2; 7332 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7333 %} 7334 ins_pipe( pipe_slow ); 7335 %} 7336 7337 // Integers vector logical right shift 7338 instruct vsrl2I(vecD dst, vecS shift) %{ 7339 predicate(n->as_Vector()->length() == 2); 7340 match(Set dst (URShiftVI dst shift)); 7341 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} 7342 ins_encode %{ 7343 __ psrld($dst$$XMMRegister, $shift$$XMMRegister); 7344 %} 7345 ins_pipe( pipe_slow ); 7346 %} 7347 7348 instruct vsrl2I_imm(vecD dst, immI8 shift) %{ 7349 predicate(n->as_Vector()->length() == 2); 7350 match(Set dst (URShiftVI dst shift)); 7351 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} 7352 ins_encode %{ 7353 __ psrld($dst$$XMMRegister, (int)$shift$$constant); 7354 %} 7355 ins_pipe( pipe_slow ); 7356 %} 7357 7358 instruct vsrl2I_reg(vecD dst, vecD src, vecS shift) %{ 7359 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7360 match(Set dst (URShiftVI src shift)); 7361 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} 7362 ins_encode %{ 7363 int vector_len = 0; 7364 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7365 %} 7366 ins_pipe( pipe_slow ); 7367 %} 7368 7369 instruct vsrl2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 7370 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7371 match(Set dst (URShiftVI src shift)); 7372 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} 7373 ins_encode %{ 7374 int vector_len = 0; 7375 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7376 %} 7377 ins_pipe( pipe_slow ); 7378 %} 7379 7380 instruct vsrl4I(vecX dst, vecS shift) %{ 7381 predicate(n->as_Vector()->length() == 4); 7382 match(Set dst (URShiftVI dst shift)); 7383 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} 7384 ins_encode %{ 7385 __ psrld($dst$$XMMRegister, $shift$$XMMRegister); 7386 %} 7387 ins_pipe( pipe_slow ); 7388 %} 7389 7390 instruct vsrl4I_imm(vecX dst, immI8 shift) %{ 7391 predicate(n->as_Vector()->length() == 4); 7392 match(Set dst (URShiftVI dst shift)); 7393 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} 7394 ins_encode %{ 7395 __ psrld($dst$$XMMRegister, (int)$shift$$constant); 7396 %} 7397 ins_pipe( pipe_slow ); 7398 %} 7399 7400 instruct vsrl4I_reg(vecX dst, vecX src, vecS shift) %{ 7401 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7402 match(Set dst (URShiftVI src shift)); 7403 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} 7404 ins_encode %{ 7405 int vector_len = 0; 7406 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7407 %} 7408 ins_pipe( pipe_slow ); 7409 %} 7410 7411 instruct vsrl4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 7412 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7413 match(Set dst (URShiftVI src shift)); 7414 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} 7415 ins_encode %{ 7416 int vector_len = 0; 7417 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7418 %} 7419 ins_pipe( pipe_slow ); 7420 %} 7421 7422 instruct vsrl8I_reg(vecY dst, vecY src, vecS shift) %{ 7423 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 7424 match(Set dst (URShiftVI src shift)); 7425 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} 7426 ins_encode %{ 7427 int vector_len = 1; 7428 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7429 %} 7430 ins_pipe( pipe_slow ); 7431 %} 7432 7433 instruct vsrl8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 7434 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 7435 match(Set dst (URShiftVI src shift)); 7436 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} 7437 ins_encode %{ 7438 int vector_len = 1; 7439 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7440 %} 7441 ins_pipe( pipe_slow ); 7442 %} 7443 7444 instruct vsrl16I_reg(vecZ dst, vecZ src, vecS shift) %{ 7445 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7446 match(Set dst (URShiftVI src shift)); 7447 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed16I" %} 7448 ins_encode %{ 7449 int vector_len = 2; 7450 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7451 %} 7452 ins_pipe( pipe_slow ); 7453 %} 7454 7455 instruct vsrl16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 7456 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7457 match(Set dst (URShiftVI src shift)); 7458 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed16I" %} 7459 ins_encode %{ 7460 int vector_len = 2; 7461 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7462 %} 7463 ins_pipe( pipe_slow ); 7464 %} 7465 7466 // Longs vector logical right shift 7467 instruct vsrl2L(vecX dst, vecS shift) %{ 7468 predicate(n->as_Vector()->length() == 2); 7469 match(Set dst (URShiftVL dst shift)); 7470 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} 7471 ins_encode %{ 7472 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister); 7473 %} 7474 ins_pipe( pipe_slow ); 7475 %} 7476 7477 instruct vsrl2L_imm(vecX dst, immI8 shift) %{ 7478 predicate(n->as_Vector()->length() == 2); 7479 match(Set dst (URShiftVL dst shift)); 7480 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} 7481 ins_encode %{ 7482 __ psrlq($dst$$XMMRegister, (int)$shift$$constant); 7483 %} 7484 ins_pipe( pipe_slow ); 7485 %} 7486 7487 instruct vsrl2L_reg(vecX dst, vecX src, vecS shift) %{ 7488 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7489 match(Set dst (URShiftVL src shift)); 7490 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} 7491 ins_encode %{ 7492 int vector_len = 0; 7493 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7494 %} 7495 ins_pipe( pipe_slow ); 7496 %} 7497 7498 instruct vsrl2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ 7499 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7500 match(Set dst (URShiftVL src shift)); 7501 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} 7502 ins_encode %{ 7503 int vector_len = 0; 7504 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7505 %} 7506 ins_pipe( pipe_slow ); 7507 %} 7508 7509 instruct vsrl4L_reg(vecY dst, vecY src, vecS shift) %{ 7510 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 7511 match(Set dst (URShiftVL src shift)); 7512 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} 7513 ins_encode %{ 7514 int vector_len = 1; 7515 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7516 %} 7517 ins_pipe( pipe_slow ); 7518 %} 7519 7520 instruct vsrl4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ 7521 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 7522 match(Set dst (URShiftVL src shift)); 7523 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} 7524 ins_encode %{ 7525 int vector_len = 1; 7526 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7527 %} 7528 ins_pipe( pipe_slow ); 7529 %} 7530 7531 instruct vsrl8L_reg(vecZ dst, vecZ src, vecS shift) %{ 7532 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7533 match(Set dst (URShiftVL src shift)); 7534 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed8L" %} 7535 ins_encode %{ 7536 int vector_len = 2; 7537 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7538 %} 7539 ins_pipe( pipe_slow ); 7540 %} 7541 7542 instruct vsrl8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 7543 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7544 match(Set dst (URShiftVL src shift)); 7545 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed8L" %} 7546 ins_encode %{ 7547 int vector_len = 2; 7548 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7549 %} 7550 ins_pipe( pipe_slow ); 7551 %} 7552 7553 // ------------------- ArithmeticRightShift ----------------------------------- 7554 7555 // Shorts/Chars vector arithmetic right shift 7556 instruct vsra2S(vecS dst, vecS shift) %{ 7557 predicate(n->as_Vector()->length() == 2); 7558 match(Set dst (RShiftVS dst shift)); 7559 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} 7560 ins_encode %{ 7561 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 7562 %} 7563 ins_pipe( pipe_slow ); 7564 %} 7565 7566 instruct vsra2S_imm(vecS dst, immI8 shift) %{ 7567 predicate(n->as_Vector()->length() == 2); 7568 match(Set dst (RShiftVS dst shift)); 7569 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} 7570 ins_encode %{ 7571 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 7572 %} 7573 ins_pipe( pipe_slow ); 7574 %} 7575 7576 instruct vsra2S_reg(vecS dst, vecS src, vecS shift) %{ 7577 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7578 match(Set dst (RShiftVS src shift)); 7579 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 7580 ins_encode %{ 7581 int vector_len = 0; 7582 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7583 %} 7584 ins_pipe( pipe_slow ); 7585 %} 7586 7587 instruct vsra2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ 7588 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7589 match(Set dst (RShiftVS src shift)); 7590 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 7591 ins_encode %{ 7592 int vector_len = 0; 7593 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7594 %} 7595 ins_pipe( pipe_slow ); 7596 %} 7597 7598 instruct vsra4S(vecD dst, vecS shift) %{ 7599 predicate(n->as_Vector()->length() == 4); 7600 match(Set dst (RShiftVS dst shift)); 7601 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} 7602 ins_encode %{ 7603 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 7604 %} 7605 ins_pipe( pipe_slow ); 7606 %} 7607 7608 instruct vsra4S_imm(vecD dst, immI8 shift) %{ 7609 predicate(n->as_Vector()->length() == 4); 7610 match(Set dst (RShiftVS dst shift)); 7611 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} 7612 ins_encode %{ 7613 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 7614 %} 7615 ins_pipe( pipe_slow ); 7616 %} 7617 7618 instruct vsra4S_reg(vecD dst, vecD src, vecS shift) %{ 7619 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7620 match(Set dst (RShiftVS src shift)); 7621 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 7622 ins_encode %{ 7623 int vector_len = 0; 7624 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7625 %} 7626 ins_pipe( pipe_slow ); 7627 %} 7628 7629 instruct vsra4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ 7630 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7631 match(Set dst (RShiftVS src shift)); 7632 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 7633 ins_encode %{ 7634 int vector_len = 0; 7635 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7636 %} 7637 ins_pipe( pipe_slow ); 7638 %} 7639 7640 instruct vsra8S(vecX dst, vecS shift) %{ 7641 predicate(n->as_Vector()->length() == 8); 7642 match(Set dst (RShiftVS dst shift)); 7643 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} 7644 ins_encode %{ 7645 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 7646 %} 7647 ins_pipe( pipe_slow ); 7648 %} 7649 7650 instruct vsra8S_imm(vecX dst, immI8 shift) %{ 7651 predicate(n->as_Vector()->length() == 8); 7652 match(Set dst (RShiftVS dst shift)); 7653 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} 7654 ins_encode %{ 7655 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 7656 %} 7657 ins_pipe( pipe_slow ); 7658 %} 7659 7660 instruct vsra8S_reg(vecX dst, vecX src, vecS shift) %{ 7661 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7662 match(Set dst (RShiftVS src shift)); 7663 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 7664 ins_encode %{ 7665 int vector_len = 0; 7666 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7667 %} 7668 ins_pipe( pipe_slow ); 7669 %} 7670 7671 instruct vsra8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ 7672 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7673 match(Set dst (RShiftVS src shift)); 7674 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 7675 ins_encode %{ 7676 int vector_len = 0; 7677 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7678 %} 7679 ins_pipe( pipe_slow ); 7680 %} 7681 7682 instruct vsra16S_reg(vecY dst, vecY src, vecS shift) %{ 7683 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 7684 match(Set dst (RShiftVS src shift)); 7685 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 7686 ins_encode %{ 7687 int vector_len = 1; 7688 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7689 %} 7690 ins_pipe( pipe_slow ); 7691 %} 7692 7693 instruct vsra16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ 7694 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 7695 match(Set dst (RShiftVS src shift)); 7696 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 7697 ins_encode %{ 7698 int vector_len = 1; 7699 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7700 %} 7701 ins_pipe( pipe_slow ); 7702 %} 7703 7704 instruct vsra32S_reg(vecZ dst, vecZ src, vecS shift) %{ 7705 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); 7706 match(Set dst (RShiftVS src shift)); 7707 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %} 7708 ins_encode %{ 7709 int vector_len = 2; 7710 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7711 %} 7712 ins_pipe( pipe_slow ); 7713 %} 7714 7715 instruct vsra32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 7716 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); 7717 match(Set dst (RShiftVS src shift)); 7718 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %} 7719 ins_encode %{ 7720 int vector_len = 2; 7721 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7722 %} 7723 ins_pipe( pipe_slow ); 7724 %} 7725 7726 // Integers vector arithmetic right shift 7727 instruct vsra2I(vecD dst, vecS shift) %{ 7728 predicate(n->as_Vector()->length() == 2); 7729 match(Set dst (RShiftVI dst shift)); 7730 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} 7731 ins_encode %{ 7732 __ psrad($dst$$XMMRegister, $shift$$XMMRegister); 7733 %} 7734 ins_pipe( pipe_slow ); 7735 %} 7736 7737 instruct vsra2I_imm(vecD dst, immI8 shift) %{ 7738 predicate(n->as_Vector()->length() == 2); 7739 match(Set dst (RShiftVI dst shift)); 7740 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} 7741 ins_encode %{ 7742 __ psrad($dst$$XMMRegister, (int)$shift$$constant); 7743 %} 7744 ins_pipe( pipe_slow ); 7745 %} 7746 7747 instruct vsra2I_reg(vecD dst, vecD src, vecS shift) %{ 7748 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7749 match(Set dst (RShiftVI src shift)); 7750 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} 7751 ins_encode %{ 7752 int vector_len = 0; 7753 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7754 %} 7755 ins_pipe( pipe_slow ); 7756 %} 7757 7758 instruct vsra2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 7759 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7760 match(Set dst (RShiftVI src shift)); 7761 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} 7762 ins_encode %{ 7763 int vector_len = 0; 7764 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7765 %} 7766 ins_pipe( pipe_slow ); 7767 %} 7768 7769 instruct vsra4I(vecX dst, vecS shift) %{ 7770 predicate(n->as_Vector()->length() == 4); 7771 match(Set dst (RShiftVI dst shift)); 7772 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} 7773 ins_encode %{ 7774 __ psrad($dst$$XMMRegister, $shift$$XMMRegister); 7775 %} 7776 ins_pipe( pipe_slow ); 7777 %} 7778 7779 instruct vsra4I_imm(vecX dst, immI8 shift) %{ 7780 predicate(n->as_Vector()->length() == 4); 7781 match(Set dst (RShiftVI dst shift)); 7782 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} 7783 ins_encode %{ 7784 __ psrad($dst$$XMMRegister, (int)$shift$$constant); 7785 %} 7786 ins_pipe( pipe_slow ); 7787 %} 7788 7789 instruct vsra4I_reg(vecX dst, vecX src, vecS shift) %{ 7790 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7791 match(Set dst (RShiftVI src shift)); 7792 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} 7793 ins_encode %{ 7794 int vector_len = 0; 7795 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7796 %} 7797 ins_pipe( pipe_slow ); 7798 %} 7799 7800 instruct vsra4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 7801 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7802 match(Set dst (RShiftVI src shift)); 7803 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} 7804 ins_encode %{ 7805 int vector_len = 0; 7806 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7807 %} 7808 ins_pipe( pipe_slow ); 7809 %} 7810 7811 instruct vsra8I_reg(vecY dst, vecY src, vecS shift) %{ 7812 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 7813 match(Set dst (RShiftVI src shift)); 7814 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} 7815 ins_encode %{ 7816 int vector_len = 1; 7817 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7818 %} 7819 ins_pipe( pipe_slow ); 7820 %} 7821 7822 instruct vsra8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 7823 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 7824 match(Set dst (RShiftVI src shift)); 7825 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} 7826 ins_encode %{ 7827 int vector_len = 1; 7828 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7829 %} 7830 ins_pipe( pipe_slow ); 7831 %} 7832 7833 instruct vsra16I_reg(vecZ dst, vecZ src, vecS shift) %{ 7834 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7835 match(Set dst (RShiftVI src shift)); 7836 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed16I" %} 7837 ins_encode %{ 7838 int vector_len = 2; 7839 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7840 %} 7841 ins_pipe( pipe_slow ); 7842 %} 7843 7844 instruct vsra16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 7845 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7846 match(Set dst (RShiftVI src shift)); 7847 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed16I" %} 7848 ins_encode %{ 7849 int vector_len = 2; 7850 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7851 %} 7852 ins_pipe( pipe_slow ); 7853 %} 7854 7855 // There are no longs vector arithmetic right shift instructions. 7856 7857 7858 // --------------------------------- AND -------------------------------------- 7859 7860 instruct vand4B(vecS dst, vecS src) %{ 7861 predicate(n->as_Vector()->length_in_bytes() == 4); 7862 match(Set dst (AndV dst src)); 7863 format %{ "pand $dst,$src\t! and vectors (4 bytes)" %} 7864 ins_encode %{ 7865 __ pand($dst$$XMMRegister, $src$$XMMRegister); 7866 %} 7867 ins_pipe( pipe_slow ); 7868 %} 7869 7870 instruct vand4B_reg(vecS dst, vecS src1, vecS src2) %{ 7871 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 7872 match(Set dst (AndV src1 src2)); 7873 format %{ "vpand $dst,$src1,$src2\t! and vectors (4 bytes)" %} 7874 ins_encode %{ 7875 int vector_len = 0; 7876 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7877 %} 7878 ins_pipe( pipe_slow ); 7879 %} 7880 7881 instruct vand8B(vecD dst, vecD src) %{ 7882 predicate(n->as_Vector()->length_in_bytes() == 8); 7883 match(Set dst (AndV dst src)); 7884 format %{ "pand $dst,$src\t! and vectors (8 bytes)" %} 7885 ins_encode %{ 7886 __ pand($dst$$XMMRegister, $src$$XMMRegister); 7887 %} 7888 ins_pipe( pipe_slow ); 7889 %} 7890 7891 instruct vand8B_reg(vecD dst, vecD src1, vecD src2) %{ 7892 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 7893 match(Set dst (AndV src1 src2)); 7894 format %{ "vpand $dst,$src1,$src2\t! and vectors (8 bytes)" %} 7895 ins_encode %{ 7896 int vector_len = 0; 7897 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7898 %} 7899 ins_pipe( pipe_slow ); 7900 %} 7901 7902 instruct vand16B(vecX dst, vecX src) %{ 7903 predicate(n->as_Vector()->length_in_bytes() == 16); 7904 match(Set dst (AndV dst src)); 7905 format %{ "pand $dst,$src\t! and vectors (16 bytes)" %} 7906 ins_encode %{ 7907 __ pand($dst$$XMMRegister, $src$$XMMRegister); 7908 %} 7909 ins_pipe( pipe_slow ); 7910 %} 7911 7912 instruct vand16B_reg(vecX dst, vecX src1, vecX src2) %{ 7913 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 7914 match(Set dst (AndV src1 src2)); 7915 format %{ "vpand $dst,$src1,$src2\t! and vectors (16 bytes)" %} 7916 ins_encode %{ 7917 int vector_len = 0; 7918 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7919 %} 7920 ins_pipe( pipe_slow ); 7921 %} 7922 7923 instruct vand16B_mem(vecX dst, vecX src, memory mem) %{ 7924 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 7925 match(Set dst (AndV src (LoadVector mem))); 7926 format %{ "vpand $dst,$src,$mem\t! and vectors (16 bytes)" %} 7927 ins_encode %{ 7928 int vector_len = 0; 7929 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7930 %} 7931 ins_pipe( pipe_slow ); 7932 %} 7933 7934 instruct vand32B_reg(vecY dst, vecY src1, vecY src2) %{ 7935 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 7936 match(Set dst (AndV src1 src2)); 7937 format %{ "vpand $dst,$src1,$src2\t! and vectors (32 bytes)" %} 7938 ins_encode %{ 7939 int vector_len = 1; 7940 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7941 %} 7942 ins_pipe( pipe_slow ); 7943 %} 7944 7945 instruct vand32B_mem(vecY dst, vecY src, memory mem) %{ 7946 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 7947 match(Set dst (AndV src (LoadVector mem))); 7948 format %{ "vpand $dst,$src,$mem\t! and vectors (32 bytes)" %} 7949 ins_encode %{ 7950 int vector_len = 1; 7951 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7952 %} 7953 ins_pipe( pipe_slow ); 7954 %} 7955 7956 instruct vand64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7957 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 7958 match(Set dst (AndV src1 src2)); 7959 format %{ "vpand $dst,$src1,$src2\t! and vectors (64 bytes)" %} 7960 ins_encode %{ 7961 int vector_len = 2; 7962 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7963 %} 7964 ins_pipe( pipe_slow ); 7965 %} 7966 7967 instruct vand64B_mem(vecZ dst, vecZ src, memory mem) %{ 7968 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 7969 match(Set dst (AndV src (LoadVector mem))); 7970 format %{ "vpand $dst,$src,$mem\t! and vectors (64 bytes)" %} 7971 ins_encode %{ 7972 int vector_len = 2; 7973 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7974 %} 7975 ins_pipe( pipe_slow ); 7976 %} 7977 7978 // --------------------------------- OR --------------------------------------- 7979 7980 instruct vor4B(vecS dst, vecS src) %{ 7981 predicate(n->as_Vector()->length_in_bytes() == 4); 7982 match(Set dst (OrV dst src)); 7983 format %{ "por $dst,$src\t! or vectors (4 bytes)" %} 7984 ins_encode %{ 7985 __ por($dst$$XMMRegister, $src$$XMMRegister); 7986 %} 7987 ins_pipe( pipe_slow ); 7988 %} 7989 7990 instruct vor4B_reg(vecS dst, vecS src1, vecS src2) %{ 7991 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 7992 match(Set dst (OrV src1 src2)); 7993 format %{ "vpor $dst,$src1,$src2\t! or vectors (4 bytes)" %} 7994 ins_encode %{ 7995 int vector_len = 0; 7996 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7997 %} 7998 ins_pipe( pipe_slow ); 7999 %} 8000 8001 instruct vor8B(vecD dst, vecD src) %{ 8002 predicate(n->as_Vector()->length_in_bytes() == 8); 8003 match(Set dst (OrV dst src)); 8004 format %{ "por $dst,$src\t! or vectors (8 bytes)" %} 8005 ins_encode %{ 8006 __ por($dst$$XMMRegister, $src$$XMMRegister); 8007 %} 8008 ins_pipe( pipe_slow ); 8009 %} 8010 8011 instruct vor8B_reg(vecD dst, vecD src1, vecD src2) %{ 8012 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 8013 match(Set dst (OrV src1 src2)); 8014 format %{ "vpor $dst,$src1,$src2\t! or vectors (8 bytes)" %} 8015 ins_encode %{ 8016 int vector_len = 0; 8017 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8018 %} 8019 ins_pipe( pipe_slow ); 8020 %} 8021 8022 instruct vor16B(vecX dst, vecX src) %{ 8023 predicate(n->as_Vector()->length_in_bytes() == 16); 8024 match(Set dst (OrV dst src)); 8025 format %{ "por $dst,$src\t! or vectors (16 bytes)" %} 8026 ins_encode %{ 8027 __ por($dst$$XMMRegister, $src$$XMMRegister); 8028 %} 8029 ins_pipe( pipe_slow ); 8030 %} 8031 8032 instruct vor16B_reg(vecX dst, vecX src1, vecX src2) %{ 8033 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 8034 match(Set dst (OrV src1 src2)); 8035 format %{ "vpor $dst,$src1,$src2\t! or vectors (16 bytes)" %} 8036 ins_encode %{ 8037 int vector_len = 0; 8038 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8039 %} 8040 ins_pipe( pipe_slow ); 8041 %} 8042 8043 instruct vor16B_mem(vecX dst, vecX src, memory mem) %{ 8044 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 8045 match(Set dst (OrV src (LoadVector mem))); 8046 format %{ "vpor $dst,$src,$mem\t! or vectors (16 bytes)" %} 8047 ins_encode %{ 8048 int vector_len = 0; 8049 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8050 %} 8051 ins_pipe( pipe_slow ); 8052 %} 8053 8054 instruct vor32B_reg(vecY dst, vecY src1, vecY src2) %{ 8055 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 8056 match(Set dst (OrV src1 src2)); 8057 format %{ "vpor $dst,$src1,$src2\t! or vectors (32 bytes)" %} 8058 ins_encode %{ 8059 int vector_len = 1; 8060 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8061 %} 8062 ins_pipe( pipe_slow ); 8063 %} 8064 8065 instruct vor32B_mem(vecY dst, vecY src, memory mem) %{ 8066 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 8067 match(Set dst (OrV src (LoadVector mem))); 8068 format %{ "vpor $dst,$src,$mem\t! or vectors (32 bytes)" %} 8069 ins_encode %{ 8070 int vector_len = 1; 8071 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8072 %} 8073 ins_pipe( pipe_slow ); 8074 %} 8075 8076 instruct vor64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8077 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 8078 match(Set dst (OrV src1 src2)); 8079 format %{ "vpor $dst,$src1,$src2\t! or vectors (64 bytes)" %} 8080 ins_encode %{ 8081 int vector_len = 2; 8082 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8083 %} 8084 ins_pipe( pipe_slow ); 8085 %} 8086 8087 instruct vor64B_mem(vecZ dst, vecZ src, memory mem) %{ 8088 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 8089 match(Set dst (OrV src (LoadVector mem))); 8090 format %{ "vpor $dst,$src,$mem\t! or vectors (64 bytes)" %} 8091 ins_encode %{ 8092 int vector_len = 2; 8093 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8094 %} 8095 ins_pipe( pipe_slow ); 8096 %} 8097 8098 // --------------------------------- XOR -------------------------------------- 8099 8100 instruct vxor4B(vecS dst, vecS src) %{ 8101 predicate(n->as_Vector()->length_in_bytes() == 4); 8102 match(Set dst (XorV dst src)); 8103 format %{ "pxor $dst,$src\t! xor vectors (4 bytes)" %} 8104 ins_encode %{ 8105 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 8106 %} 8107 ins_pipe( pipe_slow ); 8108 %} 8109 8110 instruct vxor4B_reg(vecS dst, vecS src1, vecS src2) %{ 8111 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 8112 match(Set dst (XorV src1 src2)); 8113 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (4 bytes)" %} 8114 ins_encode %{ 8115 int vector_len = 0; 8116 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8117 %} 8118 ins_pipe( pipe_slow ); 8119 %} 8120 8121 instruct vxor8B(vecD dst, vecD src) %{ 8122 predicate(n->as_Vector()->length_in_bytes() == 8); 8123 match(Set dst (XorV dst src)); 8124 format %{ "pxor $dst,$src\t! xor vectors (8 bytes)" %} 8125 ins_encode %{ 8126 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 8127 %} 8128 ins_pipe( pipe_slow ); 8129 %} 8130 8131 instruct vxor8B_reg(vecD dst, vecD src1, vecD src2) %{ 8132 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 8133 match(Set dst (XorV src1 src2)); 8134 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (8 bytes)" %} 8135 ins_encode %{ 8136 int vector_len = 0; 8137 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8138 %} 8139 ins_pipe( pipe_slow ); 8140 %} 8141 8142 instruct vxor16B(vecX dst, vecX src) %{ 8143 predicate(n->as_Vector()->length_in_bytes() == 16); 8144 match(Set dst (XorV dst src)); 8145 format %{ "pxor $dst,$src\t! xor vectors (16 bytes)" %} 8146 ins_encode %{ 8147 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 8148 %} 8149 ins_pipe( pipe_slow ); 8150 %} 8151 8152 instruct vxor16B_reg(vecX dst, vecX src1, vecX src2) %{ 8153 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 8154 match(Set dst (XorV src1 src2)); 8155 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (16 bytes)" %} 8156 ins_encode %{ 8157 int vector_len = 0; 8158 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8159 %} 8160 ins_pipe( pipe_slow ); 8161 %} 8162 8163 instruct vxor16B_mem(vecX dst, vecX src, memory mem) %{ 8164 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 8165 match(Set dst (XorV src (LoadVector mem))); 8166 format %{ "vpxor $dst,$src,$mem\t! xor vectors (16 bytes)" %} 8167 ins_encode %{ 8168 int vector_len = 0; 8169 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8170 %} 8171 ins_pipe( pipe_slow ); 8172 %} 8173 8174 instruct vxor32B_reg(vecY dst, vecY src1, vecY src2) %{ 8175 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 8176 match(Set dst (XorV src1 src2)); 8177 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (32 bytes)" %} 8178 ins_encode %{ 8179 int vector_len = 1; 8180 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8181 %} 8182 ins_pipe( pipe_slow ); 8183 %} 8184 8185 instruct vxor32B_mem(vecY dst, vecY src, memory mem) %{ 8186 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 8187 match(Set dst (XorV src (LoadVector mem))); 8188 format %{ "vpxor $dst,$src,$mem\t! xor vectors (32 bytes)" %} 8189 ins_encode %{ 8190 int vector_len = 1; 8191 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8192 %} 8193 ins_pipe( pipe_slow ); 8194 %} 8195 8196 instruct vxor64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8197 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 8198 match(Set dst (XorV src1 src2)); 8199 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (64 bytes)" %} 8200 ins_encode %{ 8201 int vector_len = 2; 8202 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8203 %} 8204 ins_pipe( pipe_slow ); 8205 %} 8206 8207 instruct vxor64B_mem(vecZ dst, vecZ src, memory mem) %{ 8208 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 8209 match(Set dst (XorV src (LoadVector mem))); 8210 format %{ "vpxor $dst,$src,$mem\t! xor vectors (64 bytes)" %} 8211 ins_encode %{ 8212 int vector_len = 2; 8213 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8214 %} 8215 ins_pipe( pipe_slow ); 8216 %} 8217