1 // 2 // Copyright (c) 2011, 2014, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Common Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // archtecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // XMM registers. 512-bit registers or 8 words each, labeled (a)-p. 63 // Word a in each register holds a Float, words ab hold a Double. 64 // The whole registers are used in SSE4.2 version intrinsics, 65 // array copy stubs and superword operations (see UseSSE42Intrinsics, 66 // UseXMMForArrayCopy and UseSuperword flags). 67 // For pre EVEX enabled architectures: 68 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX) 69 // For EVEX enabled architectures: 70 // XMM8-XMM31 must be encoded with REX (EVEX for UseAVX). 71 // 72 // Linux ABI: No register preserved across function calls 73 // XMM0-XMM7 might hold parameters 74 // Windows ABI: XMM6-XMM31 preserved across function calls 75 // XMM0-XMM3 might hold parameters 76 77 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()); 78 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1)); 79 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2)); 80 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3)); 81 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4)); 82 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5)); 83 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6)); 84 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7)); 85 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8)); 86 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9)); 87 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10)); 88 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11)); 89 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12)); 90 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13)); 91 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14)); 92 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15)); 93 94 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()); 95 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1)); 96 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2)); 97 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3)); 98 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4)); 99 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5)); 100 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6)); 101 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7)); 102 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8)); 103 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9)); 104 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10)); 105 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11)); 106 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12)); 107 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13)); 108 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14)); 109 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15)); 110 111 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()); 112 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1)); 113 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2)); 114 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3)); 115 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4)); 116 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5)); 117 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6)); 118 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7)); 119 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8)); 120 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9)); 121 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10)); 122 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11)); 123 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12)); 124 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13)); 125 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14)); 126 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15)); 127 128 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()); 129 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1)); 130 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2)); 131 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3)); 132 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4)); 133 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5)); 134 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6)); 135 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7)); 136 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8)); 137 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9)); 138 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10)); 139 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11)); 140 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12)); 141 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13)); 142 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14)); 143 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15)); 144 145 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()); 146 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1)); 147 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2)); 148 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3)); 149 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4)); 150 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5)); 151 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6)); 152 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7)); 153 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8)); 154 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9)); 155 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10)); 156 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11)); 157 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12)); 158 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13)); 159 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14)); 160 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15)); 161 162 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()); 163 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1)); 164 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2)); 165 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3)); 166 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4)); 167 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5)); 168 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6)); 169 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7)); 170 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8)); 171 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9)); 172 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10)); 173 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11)); 174 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12)); 175 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13)); 176 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14)); 177 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15)); 178 179 #ifdef _WIN64 180 181 reg_def XMM6 ( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()); 182 reg_def XMM6b( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(1)); 183 reg_def XMM6c( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(2)); 184 reg_def XMM6d( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(3)); 185 reg_def XMM6e( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(4)); 186 reg_def XMM6f( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(5)); 187 reg_def XMM6g( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(6)); 188 reg_def XMM6h( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(7)); 189 reg_def XMM6i( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(8)); 190 reg_def XMM6j( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(9)); 191 reg_def XMM6k( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(10)); 192 reg_def XMM6l( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(11)); 193 reg_def XMM6m( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(12)); 194 reg_def XMM6n( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(13)); 195 reg_def XMM6o( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(14)); 196 reg_def XMM6p( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(15)); 197 198 reg_def XMM7 ( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()); 199 reg_def XMM7b( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(1)); 200 reg_def XMM7c( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(2)); 201 reg_def XMM7d( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(3)); 202 reg_def XMM7e( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(4)); 203 reg_def XMM7f( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(5)); 204 reg_def XMM7g( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(6)); 205 reg_def XMM7h( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(7)); 206 reg_def XMM7i( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(8)); 207 reg_def XMM7j( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(9)); 208 reg_def XMM7k( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(10)); 209 reg_def XMM7l( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(11)); 210 reg_def XMM7m( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(12)); 211 reg_def XMM7n( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(13)); 212 reg_def XMM7o( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(14)); 213 reg_def XMM7p( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(15)); 214 215 reg_def XMM8 ( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()); 216 reg_def XMM8b( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(1)); 217 reg_def XMM8c( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(2)); 218 reg_def XMM8d( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(3)); 219 reg_def XMM8e( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(4)); 220 reg_def XMM8f( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(5)); 221 reg_def XMM8g( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(6)); 222 reg_def XMM8h( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(7)); 223 reg_def XMM8i( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(8)); 224 reg_def XMM8j( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(9)); 225 reg_def XMM8k( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(10)); 226 reg_def XMM8l( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(11)); 227 reg_def XMM8m( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(12)); 228 reg_def XMM8n( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(13)); 229 reg_def XMM8o( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(14)); 230 reg_def XMM8p( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(15)); 231 232 reg_def XMM9 ( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()); 233 reg_def XMM9b( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(1)); 234 reg_def XMM9c( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(2)); 235 reg_def XMM9d( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(3)); 236 reg_def XMM9e( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(4)); 237 reg_def XMM9f( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(5)); 238 reg_def XMM9g( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(6)); 239 reg_def XMM9h( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(7)); 240 reg_def XMM9i( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(8)); 241 reg_def XMM9j( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(9)); 242 reg_def XMM9k( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(10)); 243 reg_def XMM9l( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(11)); 244 reg_def XMM9m( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(12)); 245 reg_def XMM9n( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(13)); 246 reg_def XMM9o( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(14)); 247 reg_def XMM9p( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(15)); 248 249 reg_def XMM10 ( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()); 250 reg_def XMM10b( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(1)); 251 reg_def XMM10c( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(2)); 252 reg_def XMM10d( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(3)); 253 reg_def XMM10e( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(4)); 254 reg_def XMM10f( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(5)); 255 reg_def XMM10g( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(6)); 256 reg_def XMM10h( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(7)); 257 reg_def XMM10i( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(8)); 258 reg_def XMM10j( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(9)); 259 reg_def XMM10k( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(10)); 260 reg_def XMM10l( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(11)); 261 reg_def XMM10m( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(12)); 262 reg_def XMM10n( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(13)); 263 reg_def XMM10o( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(14)); 264 reg_def XMM10p( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(15)); 265 266 reg_def XMM11 ( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()); 267 reg_def XMM11b( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(1)); 268 reg_def XMM11c( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(2)); 269 reg_def XMM11d( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(3)); 270 reg_def XMM11e( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(4)); 271 reg_def XMM11f( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(5)); 272 reg_def XMM11g( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(6)); 273 reg_def XMM11h( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(7)); 274 reg_def XMM11i( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(8)); 275 reg_def XMM11j( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(9)); 276 reg_def XMM11k( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(10)); 277 reg_def XMM11l( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(11)); 278 reg_def XMM11m( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(12)); 279 reg_def XMM11n( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(13)); 280 reg_def XMM11o( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(14)); 281 reg_def XMM11p( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(15)); 282 283 reg_def XMM12 ( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()); 284 reg_def XMM12b( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(1)); 285 reg_def XMM12c( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(2)); 286 reg_def XMM12d( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(3)); 287 reg_def XMM12e( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(4)); 288 reg_def XMM12f( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(5)); 289 reg_def XMM12g( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(6)); 290 reg_def XMM12h( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(7)); 291 reg_def XMM12i( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(8)); 292 reg_def XMM12j( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(9)); 293 reg_def XMM12k( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(10)); 294 reg_def XMM12l( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(11)); 295 reg_def XMM12m( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(12)); 296 reg_def XMM12n( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(13)); 297 reg_def XMM12o( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(14)); 298 reg_def XMM12p( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(15)); 299 300 reg_def XMM13 ( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()); 301 reg_def XMM13b( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(1)); 302 reg_def XMM13c( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(2)); 303 reg_def XMM13d( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(3)); 304 reg_def XMM13e( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(4)); 305 reg_def XMM13f( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(5)); 306 reg_def XMM13g( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(6)); 307 reg_def XMM13h( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(7)); 308 reg_def XMM13i( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(8)); 309 reg_def XMM13j( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(9)); 310 reg_def XMM13k( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(10)); 311 reg_def XMM13l( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(11)); 312 reg_def XMM13m( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(12)); 313 reg_def XMM13n( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(13)); 314 reg_def XMM13o( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(14)); 315 reg_def XMM13p( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(15)); 316 317 reg_def XMM14 ( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()); 318 reg_def XMM14b( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(1)); 319 reg_def XMM14c( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(2)); 320 reg_def XMM14d( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(3)); 321 reg_def XMM14e( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(4)); 322 reg_def XMM14f( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(5)); 323 reg_def XMM14g( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(6)); 324 reg_def XMM14h( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(7)); 325 reg_def XMM14i( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(8)); 326 reg_def XMM14j( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(9)); 327 reg_def XMM14k( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(10)); 328 reg_def XMM14l( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(11)); 329 reg_def XMM14m( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(12)); 330 reg_def XMM14n( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(13)); 331 reg_def XMM14o( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(14)); 332 reg_def XMM14p( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(15)); 333 334 reg_def XMM15 ( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()); 335 reg_def XMM15b( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(1)); 336 reg_def XMM15c( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(2)); 337 reg_def XMM15d( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(3)); 338 reg_def XMM15e( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(4)); 339 reg_def XMM15f( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(5)); 340 reg_def XMM15g( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(6)); 341 reg_def XMM15h( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(7)); 342 reg_def XMM15i( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(8)); 343 reg_def XMM15j( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(9)); 344 reg_def XMM15k( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(10)); 345 reg_def XMM15l( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(11)); 346 reg_def XMM15m( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(12)); 347 reg_def XMM15n( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(13)); 348 reg_def XMM15o( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(14)); 349 reg_def XMM15p( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(15)); 350 351 reg_def XMM16 ( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()); 352 reg_def XMM16b( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(1)); 353 reg_def XMM16c( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(2)); 354 reg_def XMM16d( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(3)); 355 reg_def XMM16e( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(4)); 356 reg_def XMM16f( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(5)); 357 reg_def XMM16g( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(6)); 358 reg_def XMM16h( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(7)); 359 reg_def XMM16i( SOC, SOE, Op_RegF, 16, xmm15->as_VMReg()->next(8)); 360 reg_def XMM16j( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(9)); 361 reg_def XMM16k( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(10)); 362 reg_def XMM16l( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(11)); 363 reg_def XMM16m( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(12)); 364 reg_def XMM16n( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(13)); 365 reg_def XMM16o( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(14)); 366 reg_def XMM16p( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(15)); 367 368 reg_def XMM17 ( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()); 369 reg_def XMM17b( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(1)); 370 reg_def XMM17c( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(2)); 371 reg_def XMM17d( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(3)); 372 reg_def XMM17e( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(4)); 373 reg_def XMM17f( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(5)); 374 reg_def XMM17g( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(6)); 375 reg_def XMM17h( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(7)); 376 reg_def XMM17i( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(8)); 377 reg_def XMM17j( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(9)); 378 reg_def XMM17k( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(10)); 379 reg_def XMM17l( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(11)); 380 reg_def XMM17m( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(12)); 381 reg_def XMM17n( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(13)); 382 reg_def XMM17o( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(14)); 383 reg_def XMM17p( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(15)); 384 385 reg_def XMM18 ( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()); 386 reg_def XMM18b( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(1)); 387 reg_def XMM18c( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(2)); 388 reg_def XMM18d( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(3)); 389 reg_def XMM18e( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(4)); 390 reg_def XMM18f( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(5)); 391 reg_def XMM18g( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(6)); 392 reg_def XMM18h( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(7)); 393 reg_def XMM18i( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(8)); 394 reg_def XMM18j( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(9)); 395 reg_def XMM18k( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(10)); 396 reg_def XMM18l( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(11)); 397 reg_def XMM18m( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(12)); 398 reg_def XMM18n( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(13)); 399 reg_def XMM18o( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(14)); 400 reg_def XMM18p( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(15)); 401 402 reg_def XMM19 ( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()); 403 reg_def XMM19b( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(1)); 404 reg_def XMM19c( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(2)); 405 reg_def XMM19d( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(3)); 406 reg_def XMM19e( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(4)); 407 reg_def XMM19f( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(5)); 408 reg_def XMM19g( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(6)); 409 reg_def XMM19h( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(7)); 410 reg_def XMM19i( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(8)); 411 reg_def XMM19j( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(9)); 412 reg_def XMM19k( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(10)); 413 reg_def XMM19l( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(11)); 414 reg_def XMM19m( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(12)); 415 reg_def XMM19n( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(13)); 416 reg_def XMM19o( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(14)); 417 reg_def XMM19p( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(15)); 418 419 reg_def XMM20 ( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()); 420 reg_def XMM20b( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(1)); 421 reg_def XMM20c( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(2)); 422 reg_def XMM20d( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(3)); 423 reg_def XMM20e( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(4)); 424 reg_def XMM20f( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(5)); 425 reg_def XMM20g( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(6)); 426 reg_def XMM20h( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(7)); 427 reg_def XMM20i( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(8)); 428 reg_def XMM20j( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(9)); 429 reg_def XMM20k( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(10)); 430 reg_def XMM20l( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(11)); 431 reg_def XMM20m( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(12)); 432 reg_def XMM20n( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(13)); 433 reg_def XMM20o( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(14)); 434 reg_def XMM20p( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(15)); 435 436 reg_def XMM21 ( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()); 437 reg_def XMM21b( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(1)); 438 reg_def XMM21c( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(2)); 439 reg_def XMM21d( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(3)); 440 reg_def XMM21e( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(4)); 441 reg_def XMM21f( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(5)); 442 reg_def XMM21g( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(6)); 443 reg_def XMM21h( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(7)); 444 reg_def XMM21i( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(8)); 445 reg_def XMM21j( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(9)); 446 reg_def XMM21k( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(10)); 447 reg_def XMM21l( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(11)); 448 reg_def XMM21m( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(12)); 449 reg_def XMM21n( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(13)); 450 reg_def XMM21o( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(14)); 451 reg_def XMM21p( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(15)); 452 453 reg_def XMM22 ( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()); 454 reg_def XMM22b( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(1)); 455 reg_def XMM22c( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(2)); 456 reg_def XMM22d( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(3)); 457 reg_def XMM22e( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(4)); 458 reg_def XMM22f( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(5)); 459 reg_def XMM22g( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(6)); 460 reg_def XMM22h( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(7)); 461 reg_def XMM22i( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(8)); 462 reg_def XMM22j( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(9)); 463 reg_def XMM22k( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(10)); 464 reg_def XMM22l( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(11)); 465 reg_def XMM22m( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(12)); 466 reg_def XMM22n( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(13)); 467 reg_def XMM22o( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(14)); 468 reg_def XMM22p( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(15)); 469 470 reg_def XMM23 ( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()); 471 reg_def XMM23b( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(1)); 472 reg_def XMM23c( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(2)); 473 reg_def XMM23d( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(3)); 474 reg_def XMM23e( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(4)); 475 reg_def XMM23f( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(5)); 476 reg_def XMM23g( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(6)); 477 reg_def XMM23h( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(7)); 478 reg_def XMM23i( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(8)); 479 reg_def XMM23j( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(9)); 480 reg_def XMM23k( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(10)); 481 reg_def XMM23l( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(11)); 482 reg_def XMM23m( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(12)); 483 reg_def XMM23n( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(13)); 484 reg_def XMM23o( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(14)); 485 reg_def XMM23p( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(15)); 486 487 reg_def XMM24 ( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()); 488 reg_def XMM24b( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(1)); 489 reg_def XMM24c( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(2)); 490 reg_def XMM24d( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(3)); 491 reg_def XMM24e( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(4)); 492 reg_def XMM24f( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(5)); 493 reg_def XMM24g( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(6)); 494 reg_def XMM24h( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(7)); 495 reg_def XMM24i( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(8)); 496 reg_def XMM24j( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(9)); 497 reg_def XMM24k( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(10)); 498 reg_def XMM24l( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(11)); 499 reg_def XMM24m( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(12)); 500 reg_def XMM24n( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(13)); 501 reg_def XMM24o( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(14)); 502 reg_def XMM24p( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(15)); 503 504 reg_def XMM25 ( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()); 505 reg_def XMM25b( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(1)); 506 reg_def XMM25c( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(2)); 507 reg_def XMM25d( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(3)); 508 reg_def XMM25e( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(4)); 509 reg_def XMM25f( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(5)); 510 reg_def XMM25g( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(6)); 511 reg_def XMM25h( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(7)); 512 reg_def XMM25i( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(8)); 513 reg_def XMM25j( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(9)); 514 reg_def XMM25k( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(10)); 515 reg_def XMM25l( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(11)); 516 reg_def XMM25m( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(12)); 517 reg_def XMM25n( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(13)); 518 reg_def XMM25o( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(14)); 519 reg_def XMM25p( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(15)); 520 521 reg_def XMM26 ( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()); 522 reg_def XMM26b( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(1)); 523 reg_def XMM26c( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(2)); 524 reg_def XMM26d( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(3)); 525 reg_def XMM26e( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(4)); 526 reg_def XMM26f( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(5)); 527 reg_def XMM26g( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(6)); 528 reg_def XMM26h( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(7)); 529 reg_def XMM26i( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(8)); 530 reg_def XMM26j( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(9)); 531 reg_def XMM26k( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(10)); 532 reg_def XMM26l( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(11)); 533 reg_def XMM26m( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(12)); 534 reg_def XMM26n( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(13)); 535 reg_def XMM26o( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(14)); 536 reg_def XMM26p( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(15)); 537 538 reg_def XMM27g( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(1)); 539 reg_def XMM27c( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(2)); 540 reg_def XMM27d( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(3)); 541 reg_def XMM27e( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(4)); 542 reg_def XMM27f( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(5)); 543 reg_def XMM27g( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(6)); 544 reg_def XMM27h( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(7)); 545 reg_def XMM27i( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(8)); 546 reg_def XMM27j( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(9)); 547 reg_def XMM27k( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(10)); 548 reg_def XMM27l( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(11)); 549 reg_def XMM27m( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(12)); 550 reg_def XMM27n( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(13)); 551 reg_def XMM27o( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(14)); 552 reg_def XMM27p( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(15)); 553 554 reg_def XMM28 ( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()); 555 reg_def XMM28b( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(1)); 556 reg_def XMM28c( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(2)); 557 reg_def XMM28d( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(3)); 558 reg_def XMM28e( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(4)); 559 reg_def XMM28f( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(5)); 560 reg_def XMM28g( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(6)); 561 reg_def XMM28h( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(7)); 562 reg_def XMM28i( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(8)); 563 reg_def XMM28j( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(9)); 564 reg_def XMM28k( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(10)); 565 reg_def XMM28l( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(11)); 566 reg_def XMM28m( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(12)); 567 reg_def XMM28n( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(13)); 568 reg_def XMM28o( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(14)); 569 reg_def XMM28p( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(15)); 570 571 reg_def XMM29 ( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()); 572 reg_def XMM29b( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(1)); 573 reg_def XMM29c( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(2)); 574 reg_def XMM29d( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(3)); 575 reg_def XMM29e( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(4)); 576 reg_def XMM29f( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(5)); 577 reg_def XMM29g( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(6)); 578 reg_def XMM29h( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(7)); 579 reg_def XMM29i( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(8)); 580 reg_def XMM29j( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(9)); 581 reg_def XMM29k( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(10)); 582 reg_def XMM29l( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(11)); 583 reg_def XMM29m( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(12)); 584 reg_def XMM29n( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(13)); 585 reg_def XMM29o( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(14)); 586 reg_def XMM29p( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(15)); 587 588 reg_def XMM30 ( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()); 589 reg_def XMM30b( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(1)); 590 reg_def XMM30c( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(2)); 591 reg_def XMM30d( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(3)); 592 reg_def XMM30e( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(4)); 593 reg_def XMM30f( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(5)); 594 reg_def XMM30g( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(6)); 595 reg_def XMM30h( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(7)); 596 reg_def XMM30i( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(8)); 597 reg_def XMM30j( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(9)); 598 reg_def XMM30k( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(10)); 599 reg_def XMM30l( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(11)); 600 reg_def XMM30m( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(12)); 601 reg_def XMM30n( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(13)); 602 reg_def XMM30o( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(14)); 603 reg_def XMM30p( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(15)); 604 605 reg_def XMM31 ( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()); 606 reg_def XMM31b( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(1)); 607 reg_def XMM31c( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(2)); 608 reg_def XMM31d( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(3)); 609 reg_def XMM31e( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(4)); 610 reg_def XMM31f( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(5)); 611 reg_def XMM31g( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(6)); 612 reg_def XMM31h( SOC, SOE, Op_RegF, 31, xmm31>-as_VMReg()->next(7)); 613 reg_def XMM31i( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(8)); 614 reg_def XMM31j( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(9)); 615 reg_def XMM31k( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(10)); 616 reg_def XMM31l( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(11)); 617 reg_def XMM31m( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(12)); 618 reg_def XMM31n( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(13)); 619 reg_def XMM31o( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(14)); 620 reg_def XMM31p( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(15)); 621 622 #else // _WIN64 623 624 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()); 625 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1)); 626 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2)); 627 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3)); 628 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4)); 629 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5)); 630 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6)); 631 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7)); 632 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8)); 633 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9)); 634 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10)); 635 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11)); 636 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12)); 637 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13)); 638 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14)); 639 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15)); 640 641 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()); 642 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1)); 643 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2)); 644 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3)); 645 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4)); 646 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5)); 647 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6)); 648 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7)); 649 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8)); 650 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9)); 651 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10)); 652 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11)); 653 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12)); 654 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13)); 655 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14)); 656 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15)); 657 658 #ifdef _LP64 659 660 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()); 661 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1)); 662 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2)); 663 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3)); 664 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4)); 665 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5)); 666 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6)); 667 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7)); 668 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8)); 669 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9)); 670 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10)); 671 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11)); 672 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12)); 673 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13)); 674 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14)); 675 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15)); 676 677 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()); 678 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1)); 679 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2)); 680 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3)); 681 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4)); 682 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5)); 683 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6)); 684 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7)); 685 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8)); 686 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9)); 687 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10)); 688 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11)); 689 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12)); 690 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13)); 691 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14)); 692 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15)); 693 694 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()); 695 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1)); 696 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2)); 697 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3)); 698 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4)); 699 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5)); 700 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6)); 701 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7)); 702 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8)); 703 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9)); 704 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10)); 705 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11)); 706 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12)); 707 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13)); 708 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14)); 709 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15)); 710 711 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()); 712 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1)); 713 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2)); 714 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3)); 715 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4)); 716 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5)); 717 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6)); 718 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7)); 719 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8)); 720 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9)); 721 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10)); 722 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11)); 723 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12)); 724 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13)); 725 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14)); 726 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15)); 727 728 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()); 729 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1)); 730 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2)); 731 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3)); 732 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4)); 733 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5)); 734 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6)); 735 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7)); 736 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8)); 737 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9)); 738 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10)); 739 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11)); 740 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12)); 741 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13)); 742 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14)); 743 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15)); 744 745 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()); 746 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1)); 747 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2)); 748 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3)); 749 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4)); 750 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5)); 751 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6)); 752 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7)); 753 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8)); 754 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9)); 755 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10)); 756 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11)); 757 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12)); 758 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13)); 759 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14)); 760 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15)); 761 762 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()); 763 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1)); 764 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2)); 765 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3)); 766 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4)); 767 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5)); 768 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6)); 769 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7)); 770 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8)); 771 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9)); 772 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10)); 773 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11)); 774 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12)); 775 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13)); 776 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14)); 777 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15)); 778 779 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()); 780 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1)); 781 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2)); 782 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3)); 783 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4)); 784 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5)); 785 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6)); 786 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7)); 787 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8)); 788 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9)); 789 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10)); 790 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11)); 791 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12)); 792 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13)); 793 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14)); 794 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15)); 795 796 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()); 797 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1)); 798 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2)); 799 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3)); 800 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4)); 801 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5)); 802 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6)); 803 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7)); 804 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8)); 805 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9)); 806 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10)); 807 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11)); 808 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12)); 809 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13)); 810 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14)); 811 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15)); 812 813 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()); 814 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1)); 815 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2)); 816 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3)); 817 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4)); 818 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5)); 819 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6)); 820 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7)); 821 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8)); 822 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9)); 823 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10)); 824 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11)); 825 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12)); 826 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13)); 827 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14)); 828 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15)); 829 830 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()); 831 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1)); 832 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2)); 833 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3)); 834 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4)); 835 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5)); 836 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6)); 837 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7)); 838 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8)); 839 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9)); 840 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10)); 841 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11)); 842 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12)); 843 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13)); 844 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14)); 845 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15)); 846 847 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()); 848 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1)); 849 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2)); 850 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3)); 851 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4)); 852 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5)); 853 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6)); 854 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7)); 855 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8)); 856 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9)); 857 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10)); 858 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11)); 859 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12)); 860 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13)); 861 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14)); 862 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15)); 863 864 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()); 865 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1)); 866 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2)); 867 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3)); 868 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4)); 869 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5)); 870 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6)); 871 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7)); 872 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8)); 873 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9)); 874 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10)); 875 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11)); 876 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12)); 877 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13)); 878 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14)); 879 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15)); 880 881 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()); 882 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1)); 883 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2)); 884 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3)); 885 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4)); 886 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5)); 887 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6)); 888 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7)); 889 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8)); 890 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9)); 891 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10)); 892 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11)); 893 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12)); 894 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13)); 895 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14)); 896 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15)); 897 898 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()); 899 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1)); 900 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2)); 901 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3)); 902 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4)); 903 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5)); 904 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6)); 905 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7)); 906 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8)); 907 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9)); 908 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10)); 909 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11)); 910 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12)); 911 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13)); 912 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14)); 913 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15)); 914 915 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()); 916 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1)); 917 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2)); 918 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3)); 919 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4)); 920 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5)); 921 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6)); 922 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7)); 923 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8)); 924 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9)); 925 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10)); 926 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11)); 927 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12)); 928 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13)); 929 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14)); 930 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15)); 931 932 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()); 933 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1)); 934 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2)); 935 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3)); 936 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4)); 937 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5)); 938 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6)); 939 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7)); 940 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8)); 941 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9)); 942 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10)); 943 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11)); 944 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12)); 945 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13)); 946 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14)); 947 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15)); 948 949 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()); 950 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1)); 951 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2)); 952 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3)); 953 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4)); 954 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5)); 955 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6)); 956 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7)); 957 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8)); 958 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9)); 959 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10)); 960 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11)); 961 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12)); 962 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13)); 963 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14)); 964 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15)); 965 966 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()); 967 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1)); 968 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2)); 969 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3)); 970 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4)); 971 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5)); 972 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6)); 973 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7)); 974 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8)); 975 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9)); 976 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10)); 977 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11)); 978 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12)); 979 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13)); 980 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14)); 981 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15)); 982 983 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()); 984 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1)); 985 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2)); 986 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3)); 987 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4)); 988 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5)); 989 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6)); 990 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7)); 991 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8)); 992 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9)); 993 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10)); 994 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11)); 995 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12)); 996 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13)); 997 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14)); 998 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15)); 999 1000 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()); 1001 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1)); 1002 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2)); 1003 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3)); 1004 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4)); 1005 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5)); 1006 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6)); 1007 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7)); 1008 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8)); 1009 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9)); 1010 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10)); 1011 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11)); 1012 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12)); 1013 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13)); 1014 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14)); 1015 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15)); 1016 1017 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()); 1018 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1)); 1019 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2)); 1020 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3)); 1021 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4)); 1022 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5)); 1023 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6)); 1024 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7)); 1025 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8)); 1026 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9)); 1027 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10)); 1028 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11)); 1029 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12)); 1030 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13)); 1031 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14)); 1032 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15)); 1033 1034 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()); 1035 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1)); 1036 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2)); 1037 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3)); 1038 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4)); 1039 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5)); 1040 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6)); 1041 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7)); 1042 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8)); 1043 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9)); 1044 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10)); 1045 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11)); 1046 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12)); 1047 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13)); 1048 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14)); 1049 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15)); 1050 1051 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()); 1052 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1)); 1053 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2)); 1054 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3)); 1055 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4)); 1056 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5)); 1057 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6)); 1058 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7)); 1059 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8)); 1060 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9)); 1061 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10)); 1062 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11)); 1063 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12)); 1064 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13)); 1065 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14)); 1066 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15)); 1067 1068 #endif // _LP64 1069 1070 #endif // _WIN64 1071 1072 #ifdef _LP64 1073 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad()); 1074 #else 1075 reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad()); 1076 #endif // _LP64 1077 1078 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1079 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1080 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1081 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1082 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1083 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1084 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1085 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1086 #ifdef _LP64 1087 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1088 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1089 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1090 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1091 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1092 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1093 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1094 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1095 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 1096 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 1097 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 1098 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 1099 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 1100 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 1101 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 1102 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 1103 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 1104 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 1105 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 1106 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 1107 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 1108 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 1109 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 1110 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 1111 #endif 1112 ); 1113 1114 // flags allocation class should be last. 1115 alloc_class chunk3(RFLAGS); 1116 1117 // Singleton class for condition codes 1118 reg_class int_flags(RFLAGS); 1119 1120 // Class for pre evex float registers 1121 reg_class float_reg_legacy(XMM0, 1122 XMM1, 1123 XMM2, 1124 XMM3, 1125 XMM4, 1126 XMM5, 1127 XMM6, 1128 XMM7 1129 #ifdef _LP64 1130 ,XMM8, 1131 XMM9, 1132 XMM10, 1133 XMM11, 1134 XMM12, 1135 XMM13, 1136 XMM14, 1137 XMM15 1138 #endif 1139 ); 1140 1141 // Class for evex float registers 1142 reg_class float_reg_evex(XMM0, 1143 XMM1, 1144 XMM2, 1145 XMM3, 1146 XMM4, 1147 XMM5, 1148 XMM6, 1149 XMM7 1150 #ifdef _LP64 1151 ,XMM8, 1152 XMM9, 1153 XMM10, 1154 XMM11, 1155 XMM12, 1156 XMM13, 1157 XMM14, 1158 XMM15, 1159 XMM16, 1160 XMM17, 1161 XMM18, 1162 XMM19, 1163 XMM20, 1164 XMM21, 1165 XMM22, 1166 XMM23, 1167 XMM24, 1168 XMM25, 1169 XMM26, 1170 XMM27, 1171 XMM28, 1172 XMM29, 1173 XMM30, 1174 XMM31 1175 #endif 1176 ); 1177 1178 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} ); 1179 1180 // Class for pre evex double registers 1181 reg_class double_reg_legacy(XMM0, XMM0b, 1182 XMM1, XMM1b, 1183 XMM2, XMM2b, 1184 XMM3, XMM3b, 1185 XMM4, XMM4b, 1186 XMM5, XMM5b, 1187 XMM6, XMM6b, 1188 XMM7, XMM7b 1189 #ifdef _LP64 1190 ,XMM8, XMM8b, 1191 XMM9, XMM9b, 1192 XMM10, XMM10b, 1193 XMM11, XMM11b, 1194 XMM12, XMM12b, 1195 XMM13, XMM13b, 1196 XMM14, XMM14b, 1197 XMM15, XMM15b 1198 #endif 1199 ); 1200 1201 // Class for evex double registers 1202 reg_class double_reg_evex(XMM0, XMM0b, 1203 XMM1, XMM1b, 1204 XMM2, XMM2b, 1205 XMM3, XMM3b, 1206 XMM4, XMM4b, 1207 XMM5, XMM5b, 1208 XMM6, XMM6b, 1209 XMM7, XMM7b 1210 #ifdef _LP64 1211 ,XMM8, XMM8b, 1212 XMM9, XMM9b, 1213 XMM10, XMM10b, 1214 XMM11, XMM11b, 1215 XMM12, XMM12b, 1216 XMM13, XMM13b, 1217 XMM14, XMM14b, 1218 XMM15, XMM15b, 1219 XMM16, XMM16b, 1220 XMM17, XMM17b, 1221 XMM18, XMM18b, 1222 XMM19, XMM19b, 1223 XMM20, XMM20b, 1224 XMM21, XMM21b, 1225 XMM22, XMM22b, 1226 XMM23, XMM23b, 1227 XMM24, XMM24b, 1228 XMM25, XMM25b, 1229 XMM26, XMM26b, 1230 XMM27, XMM27b, 1231 XMM28, XMM28b, 1232 XMM29, XMM29b, 1233 XMM30, XMM30b, 1234 XMM31, XMM31b 1235 #endif 1236 ); 1237 1238 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} ); 1239 1240 // Class for pre evex 32bit vector registers 1241 reg_class vectors_reg_legacy(XMM0, 1242 XMM1, 1243 XMM2, 1244 XMM3, 1245 XMM4, 1246 XMM5, 1247 XMM6, 1248 XMM7 1249 #ifdef _LP64 1250 ,XMM8, 1251 XMM9, 1252 XMM10, 1253 XMM11, 1254 XMM12, 1255 XMM13, 1256 XMM14, 1257 XMM15 1258 #endif 1259 ); 1260 1261 // Class for evex 32bit vector registers 1262 reg_class vectors_reg_evex(XMM0, 1263 XMM1, 1264 XMM2, 1265 XMM3, 1266 XMM4, 1267 XMM5, 1268 XMM6, 1269 XMM7 1270 #ifdef _LP64 1271 ,XMM8, 1272 XMM9, 1273 XMM10, 1274 XMM11, 1275 XMM12, 1276 XMM13, 1277 XMM14, 1278 XMM15, 1279 XMM16, 1280 XMM17, 1281 XMM18, 1282 XMM19, 1283 XMM20, 1284 XMM21, 1285 XMM22, 1286 XMM23, 1287 XMM24, 1288 XMM25, 1289 XMM26, 1290 XMM27, 1291 XMM28, 1292 XMM29, 1293 XMM30, 1294 XMM31 1295 #endif 1296 ); 1297 1298 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} ); 1299 1300 // Class for all 64bit vector registers 1301 reg_class vectord_reg_legacy(XMM0, XMM0b, 1302 XMM1, XMM1b, 1303 XMM2, XMM2b, 1304 XMM3, XMM3b, 1305 XMM4, XMM4b, 1306 XMM5, XMM5b, 1307 XMM6, XMM6b, 1308 XMM7, XMM7b 1309 #ifdef _LP64 1310 ,XMM8, XMM8b, 1311 XMM9, XMM9b, 1312 XMM10, XMM10b, 1313 XMM11, XMM11b, 1314 XMM12, XMM12b, 1315 XMM13, XMM13b, 1316 XMM14, XMM14b, 1317 XMM15, XMM15b 1318 #endif 1319 ); 1320 1321 // Class for all 64bit vector registers 1322 reg_class vectord_reg_evex(XMM0, XMM0b, 1323 XMM1, XMM1b, 1324 XMM2, XMM2b, 1325 XMM3, XMM3b, 1326 XMM4, XMM4b, 1327 XMM5, XMM5b, 1328 XMM6, XMM6b, 1329 XMM7, XMM7b 1330 #ifdef _LP64 1331 ,XMM8, XMM8b, 1332 XMM9, XMM9b, 1333 XMM10, XMM10b, 1334 XMM11, XMM11b, 1335 XMM12, XMM12b, 1336 XMM13, XMM13b, 1337 XMM14, XMM14b, 1338 XMM15, XMM15b, 1339 XMM16, XMM16b, 1340 XMM17, XMM17b, 1341 XMM18, XMM18b, 1342 XMM19, XMM19b, 1343 XMM20, XMM20b, 1344 XMM21, XMM21b, 1345 XMM22, XMM22b, 1346 XMM23, XMM23b, 1347 XMM24, XMM24b, 1348 XMM25, XMM25b, 1349 XMM26, XMM26b, 1350 XMM27, XMM27b, 1351 XMM28, XMM28b, 1352 XMM29, XMM29b, 1353 XMM30, XMM30b, 1354 XMM31, XMM31b 1355 #endif 1356 ); 1357 1358 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} ); 1359 1360 // Class for all 128bit vector registers 1361 reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, 1362 XMM1, XMM1b, XMM1c, XMM1d, 1363 XMM2, XMM2b, XMM2c, XMM2d, 1364 XMM3, XMM3b, XMM3c, XMM3d, 1365 XMM4, XMM4b, XMM4c, XMM4d, 1366 XMM5, XMM5b, XMM5c, XMM5d, 1367 XMM6, XMM6b, XMM6c, XMM6d, 1368 XMM7, XMM7b, XMM7c, XMM7d 1369 #ifdef _LP64 1370 ,XMM8, XMM8b, XMM8c, XMM8d, 1371 XMM9, XMM9b, XMM9c, XMM9d, 1372 XMM10, XMM10b, XMM10c, XMM10d, 1373 XMM11, XMM11b, XMM11c, XMM11d, 1374 XMM12, XMM12b, XMM12c, XMM12d, 1375 XMM13, XMM13b, XMM13c, XMM13d, 1376 XMM14, XMM14b, XMM14c, XMM14d, 1377 XMM15, XMM15b, XMM15c, XMM15d 1378 #endif 1379 ); 1380 1381 // Class for all 128bit vector registers 1382 reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, 1383 XMM1, XMM1b, XMM1c, XMM1d, 1384 XMM2, XMM2b, XMM2c, XMM2d, 1385 XMM3, XMM3b, XMM3c, XMM3d, 1386 XMM4, XMM4b, XMM4c, XMM4d, 1387 XMM5, XMM5b, XMM5c, XMM5d, 1388 XMM6, XMM6b, XMM6c, XMM6d, 1389 XMM7, XMM7b, XMM7c, XMM7d 1390 #ifdef _LP64 1391 ,XMM8, XMM8b, XMM8c, XMM8d, 1392 XMM9, XMM9b, XMM9c, XMM9d, 1393 XMM10, XMM10b, XMM10c, XMM10d, 1394 XMM11, XMM11b, XMM11c, XMM11d, 1395 XMM12, XMM12b, XMM12c, XMM12d, 1396 XMM13, XMM13b, XMM13c, XMM13d, 1397 XMM14, XMM14b, XMM14c, XMM14d, 1398 XMM15, XMM15b, XMM15c, XMM15d, 1399 XMM16, XMM16b, XMM16c, XMM16d, 1400 XMM17, XMM17b, XMM17c, XMM17d, 1401 XMM18, XMM18b, XMM18c, XMM18d, 1402 XMM19, XMM19b, XMM19c, XMM19d, 1403 XMM20, XMM20b, XMM20c, XMM20d, 1404 XMM21, XMM21b, XMM21c, XMM21d, 1405 XMM22, XMM22b, XMM22c, XMM22d, 1406 XMM23, XMM23b, XMM23c, XMM23d, 1407 XMM24, XMM24b, XMM24c, XMM24d, 1408 XMM25, XMM25b, XMM25c, XMM25d, 1409 XMM26, XMM26b, XMM26c, XMM26d, 1410 XMM27, XMM27b, XMM27c, XMM27d, 1411 XMM28, XMM28b, XMM28c, XMM28d, 1412 XMM29, XMM29b, XMM29c, XMM29d, 1413 XMM30, XMM30b, XMM30c, XMM30d, 1414 XMM31, XMM31b, XMM31c, XMM31d 1415 #endif 1416 ); 1417 1418 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} ); 1419 1420 // Class for all 256bit vector registers 1421 reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 1422 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 1423 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 1424 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 1425 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 1426 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 1427 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 1428 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 1429 #ifdef _LP64 1430 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 1431 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 1432 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 1433 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 1434 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 1435 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1436 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1437 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h 1438 #endif 1439 ); 1440 1441 // Class for all 256bit vector registers 1442 reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 1443 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 1444 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 1445 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 1446 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 1447 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 1448 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 1449 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 1450 #ifdef _LP64 1451 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 1452 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 1453 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 1454 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 1455 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 1456 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1457 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1458 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, 1459 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, 1460 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, 1461 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, 1462 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, 1463 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, 1464 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, 1465 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, 1466 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, 1467 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, 1468 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, 1469 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, 1470 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, 1471 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, 1472 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, 1473 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, 1474 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h 1475 #endif 1476 ); 1477 1478 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} ); 1479 1480 // Class for all 512bit vector registers 1481 reg_class vectorz_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1482 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1483 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1484 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1485 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1486 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1487 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1488 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1489 #ifdef _LP64 1490 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1491 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1492 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1493 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1494 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1495 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1496 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1497 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1498 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 1499 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 1500 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 1501 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 1502 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 1503 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 1504 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 1505 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 1506 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 1507 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 1508 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 1509 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 1510 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 1511 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 1512 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 1513 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 1514 #endif 1515 ); 1516 1517 %} 1518 1519 1520 //----------SOURCE BLOCK------------------------------------------------------- 1521 // This is a block of C++ code which provides values, functions, and 1522 // definitions necessary in the rest of the architecture description 1523 1524 source_hpp %{ 1525 // Header information of the source block. 1526 // Method declarations/definitions which are used outside 1527 // the ad-scope can conveniently be defined here. 1528 // 1529 // To keep related declarations/definitions/uses close together, 1530 // we switch between source %{ }% and source_hpp %{ }% freely as needed. 1531 1532 class NativeJump; 1533 1534 class CallStubImpl { 1535 1536 //-------------------------------------------------------------- 1537 //---< Used for optimization in Compile::shorten_branches >--- 1538 //-------------------------------------------------------------- 1539 1540 public: 1541 // Size of call trampoline stub. 1542 static uint size_call_trampoline() { 1543 return 0; // no call trampolines on this platform 1544 } 1545 1546 // number of relocations needed by a call trampoline stub 1547 static uint reloc_call_trampoline() { 1548 return 0; // no call trampolines on this platform 1549 } 1550 }; 1551 1552 class HandlerImpl { 1553 1554 public: 1555 1556 static int emit_exception_handler(CodeBuffer &cbuf); 1557 static int emit_deopt_handler(CodeBuffer& cbuf); 1558 1559 static uint size_exception_handler() { 1560 // NativeCall instruction size is the same as NativeJump. 1561 // exception handler starts out as jump and can be patched to 1562 // a call be deoptimization. (4932387) 1563 // Note that this value is also credited (in output.cpp) to 1564 // the size of the code section. 1565 return NativeJump::instruction_size; 1566 } 1567 1568 #ifdef _LP64 1569 static uint size_deopt_handler() { 1570 // three 5 byte instructions 1571 return 15; 1572 } 1573 #else 1574 static uint size_deopt_handler() { 1575 // NativeCall instruction size is the same as NativeJump. 1576 // exception handler starts out as jump and can be patched to 1577 // a call be deoptimization. (4932387) 1578 // Note that this value is also credited (in output.cpp) to 1579 // the size of the code section. 1580 return 5 + NativeJump::instruction_size; // pushl(); jmp; 1581 } 1582 #endif 1583 }; 1584 1585 %} // end source_hpp 1586 1587 source %{ 1588 1589 // Emit exception handler code. 1590 // Stuff framesize into a register and call a VM stub routine. 1591 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) { 1592 1593 // Note that the code buffer's insts_mark is always relative to insts. 1594 // That's why we must use the macroassembler to generate a handler. 1595 MacroAssembler _masm(&cbuf); 1596 address base = __ start_a_stub(size_exception_handler()); 1597 if (base == NULL) return 0; // CodeBuffer::expand failed 1598 int offset = __ offset(); 1599 __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point())); 1600 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); 1601 __ end_a_stub(); 1602 return offset; 1603 } 1604 1605 // Emit deopt handler code. 1606 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) { 1607 1608 // Note that the code buffer's insts_mark is always relative to insts. 1609 // That's why we must use the macroassembler to generate a handler. 1610 MacroAssembler _masm(&cbuf); 1611 address base = __ start_a_stub(size_deopt_handler()); 1612 if (base == NULL) return 0; // CodeBuffer::expand failed 1613 int offset = __ offset(); 1614 1615 #ifdef _LP64 1616 address the_pc = (address) __ pc(); 1617 Label next; 1618 // push a "the_pc" on the stack without destroying any registers 1619 // as they all may be live. 1620 1621 // push address of "next" 1622 __ call(next, relocInfo::none); // reloc none is fine since it is a disp32 1623 __ bind(next); 1624 // adjust it so it matches "the_pc" 1625 __ subptr(Address(rsp, 0), __ offset() - offset); 1626 #else 1627 InternalAddress here(__ pc()); 1628 __ pushptr(here.addr()); 1629 #endif 1630 1631 __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack())); 1632 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow"); 1633 __ end_a_stub(); 1634 return offset; 1635 } 1636 1637 1638 //============================================================================= 1639 1640 // Float masks come from different places depending on platform. 1641 #ifdef _LP64 1642 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); } 1643 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); } 1644 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); } 1645 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); } 1646 #else 1647 static address float_signmask() { return (address)float_signmask_pool; } 1648 static address float_signflip() { return (address)float_signflip_pool; } 1649 static address double_signmask() { return (address)double_signmask_pool; } 1650 static address double_signflip() { return (address)double_signflip_pool; } 1651 #endif 1652 1653 1654 const bool Matcher::match_rule_supported(int opcode) { 1655 if (!has_match_rule(opcode)) 1656 return false; 1657 1658 switch (opcode) { 1659 case Op_PopCountI: 1660 case Op_PopCountL: 1661 if (!UsePopCountInstruction) 1662 return false; 1663 break; 1664 case Op_MulVI: 1665 if ((UseSSE < 4) && (UseAVX < 1)) // only with SSE4_1 or AVX 1666 return false; 1667 break; 1668 case Op_MulVL: 1669 case Op_MulReductionVL: 1670 if (VM_Version::supports_avx512dq() == false) 1671 return false; 1672 case Op_AddReductionVL: 1673 if (UseAVX < 3) // only EVEX : vector connectivity becomes an issue here 1674 return false; 1675 case Op_AddReductionVI: 1676 if (UseSSE < 3) // requires at least SSE3 1677 return false; 1678 case Op_MulReductionVI: 1679 if (UseSSE < 4) // requires at least SSE4 1680 return false; 1681 case Op_AddReductionVF: 1682 case Op_AddReductionVD: 1683 case Op_MulReductionVF: 1684 case Op_MulReductionVD: 1685 if (UseSSE < 1) // requires at least SSE 1686 return false; 1687 break; 1688 case Op_CompareAndSwapL: 1689 #ifdef _LP64 1690 case Op_CompareAndSwapP: 1691 #endif 1692 if (!VM_Version::supports_cx8()) 1693 return false; 1694 break; 1695 } 1696 1697 return true; // Per default match rules are supported. 1698 } 1699 1700 // Max vector size in bytes. 0 if not supported. 1701 const int Matcher::vector_width_in_bytes(BasicType bt) { 1702 assert(is_java_primitive(bt), "only primitive type vectors"); 1703 if (UseSSE < 2) return 0; 1704 // SSE2 supports 128bit vectors for all types. 1705 // AVX2 supports 256bit vectors for all types. 1706 // AVX2/EVEX supports 512bit vectors for all types. 1707 int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16; 1708 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE. 1709 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE)) 1710 size = (UseAVX > 2) ? 64 : 32; 1711 // Use flag to limit vector size. 1712 size = MIN2(size,(int)MaxVectorSize); 1713 // Minimum 2 values in vector (or 4 for bytes). 1714 switch (bt) { 1715 case T_DOUBLE: 1716 case T_LONG: 1717 if (size < 16) return 0; 1718 case T_FLOAT: 1719 case T_INT: 1720 if (size < 8) return 0; 1721 case T_BOOLEAN: 1722 case T_BYTE: 1723 case T_CHAR: 1724 case T_SHORT: 1725 if (size < 4) return 0; 1726 break; 1727 default: 1728 ShouldNotReachHere(); 1729 } 1730 return size; 1731 } 1732 1733 // Limits on vector size (number of elements) loaded into vector. 1734 const int Matcher::max_vector_size(const BasicType bt) { 1735 return vector_width_in_bytes(bt)/type2aelembytes(bt); 1736 } 1737 const int Matcher::min_vector_size(const BasicType bt) { 1738 int max_size = max_vector_size(bt); 1739 // Min size which can be loaded into vector is 4 bytes. 1740 int size = (type2aelembytes(bt) == 1) ? 4 : 2; 1741 return MIN2(size,max_size); 1742 } 1743 1744 // Vector ideal reg corresponding to specidied size in bytes 1745 const int Matcher::vector_ideal_reg(int size) { 1746 assert(MaxVectorSize >= size, ""); 1747 switch(size) { 1748 case 4: return Op_VecS; 1749 case 8: return Op_VecD; 1750 case 16: return Op_VecX; 1751 case 32: return Op_VecY; 1752 case 64: return Op_VecZ; 1753 } 1754 ShouldNotReachHere(); 1755 return 0; 1756 } 1757 1758 // Only lowest bits of xmm reg are used for vector shift count. 1759 const int Matcher::vector_shift_count_ideal_reg(int size) { 1760 return Op_VecS; 1761 } 1762 1763 // x86 supports misaligned vectors store/load. 1764 const bool Matcher::misaligned_vectors_ok() { 1765 return !AlignVector; // can be changed by flag 1766 } 1767 1768 // x86 AES instructions are compatible with SunJCE expanded 1769 // keys, hence we do not need to pass the original key to stubs 1770 const bool Matcher::pass_original_key_for_aes() { 1771 return false; 1772 } 1773 1774 // Helper methods for MachSpillCopyNode::implementation(). 1775 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 1776 int src_hi, int dst_hi, uint ireg, outputStream* st) { 1777 // In 64-bit VM size calculation is very complex. Emitting instructions 1778 // into scratch buffer is used to get size in 64-bit VM. 1779 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 1780 assert(ireg == Op_VecS || // 32bit vector 1781 (src_lo & 1) == 0 && (src_lo + 1) == src_hi && 1782 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi, 1783 "no non-adjacent vector moves" ); 1784 if (cbuf) { 1785 MacroAssembler _masm(cbuf); 1786 int offset = __ offset(); 1787 switch (ireg) { 1788 case Op_VecS: // copy whole register 1789 case Op_VecD: 1790 case Op_VecX: 1791 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1792 break; 1793 case Op_VecY: 1794 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1795 break; 1796 case Op_VecZ: 1797 __ evmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2); 1798 break; 1799 default: 1800 ShouldNotReachHere(); 1801 } 1802 int size = __ offset() - offset; 1803 #ifdef ASSERT 1804 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1805 assert(!do_size || size == 4, "incorrect size calculattion"); 1806 #endif 1807 return size; 1808 #ifndef PRODUCT 1809 } else if (!do_size) { 1810 switch (ireg) { 1811 case Op_VecS: 1812 case Op_VecD: 1813 case Op_VecX: 1814 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 1815 break; 1816 case Op_VecY: 1817 case Op_VecZ: 1818 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 1819 break; 1820 default: 1821 ShouldNotReachHere(); 1822 } 1823 #endif 1824 } 1825 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 1826 return (UseAVX > 2) ? 6 : 4; 1827 } 1828 1829 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, 1830 int stack_offset, int reg, uint ireg, outputStream* st) { 1831 // In 64-bit VM size calculation is very complex. Emitting instructions 1832 // into scratch buffer is used to get size in 64-bit VM. 1833 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 1834 if (cbuf) { 1835 MacroAssembler _masm(cbuf); 1836 int offset = __ offset(); 1837 if (is_load) { 1838 switch (ireg) { 1839 case Op_VecS: 1840 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1841 break; 1842 case Op_VecD: 1843 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1844 break; 1845 case Op_VecX: 1846 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1847 break; 1848 case Op_VecY: 1849 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1850 break; 1851 case Op_VecZ: 1852 __ evmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2); 1853 break; 1854 default: 1855 ShouldNotReachHere(); 1856 } 1857 } else { // store 1858 switch (ireg) { 1859 case Op_VecS: 1860 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1861 break; 1862 case Op_VecD: 1863 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1864 break; 1865 case Op_VecX: 1866 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1867 break; 1868 case Op_VecY: 1869 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1870 break; 1871 case Op_VecZ: 1872 __ evmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2); 1873 break; 1874 default: 1875 ShouldNotReachHere(); 1876 } 1877 } 1878 int size = __ offset() - offset; 1879 #ifdef ASSERT 1880 int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : (UseAVX > 2) ? 6 : 4); 1881 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1882 assert(!do_size || size == (5+offset_size), "incorrect size calculattion"); 1883 #endif 1884 return size; 1885 #ifndef PRODUCT 1886 } else if (!do_size) { 1887 if (is_load) { 1888 switch (ireg) { 1889 case Op_VecS: 1890 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1891 break; 1892 case Op_VecD: 1893 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1894 break; 1895 case Op_VecX: 1896 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1897 break; 1898 case Op_VecY: 1899 case Op_VecZ: 1900 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1901 break; 1902 default: 1903 ShouldNotReachHere(); 1904 } 1905 } else { // store 1906 switch (ireg) { 1907 case Op_VecS: 1908 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1909 break; 1910 case Op_VecD: 1911 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1912 break; 1913 case Op_VecX: 1914 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1915 break; 1916 case Op_VecY: 1917 case Op_VecZ: 1918 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1919 break; 1920 default: 1921 ShouldNotReachHere(); 1922 } 1923 } 1924 #endif 1925 } 1926 int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : (UseAVX > 2) ? 6 : 4); 1927 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1928 return 5+offset_size; 1929 } 1930 1931 static inline jfloat replicate4_imm(int con, int width) { 1932 // Load a constant of "width" (in bytes) and replicate it to fill 32bit. 1933 assert(width == 1 || width == 2, "only byte or short types here"); 1934 int bit_width = width * 8; 1935 jint val = con; 1936 val &= (1 << bit_width) - 1; // mask off sign bits 1937 while(bit_width < 32) { 1938 val |= (val << bit_width); 1939 bit_width <<= 1; 1940 } 1941 jfloat fval = *((jfloat*) &val); // coerce to float type 1942 return fval; 1943 } 1944 1945 static inline jdouble replicate8_imm(int con, int width) { 1946 // Load a constant of "width" (in bytes) and replicate it to fill 64bit. 1947 assert(width == 1 || width == 2 || width == 4, "only byte, short or int types here"); 1948 int bit_width = width * 8; 1949 jlong val = con; 1950 val &= (((jlong) 1) << bit_width) - 1; // mask off sign bits 1951 while(bit_width < 64) { 1952 val |= (val << bit_width); 1953 bit_width <<= 1; 1954 } 1955 jdouble dval = *((jdouble*) &val); // coerce to double type 1956 return dval; 1957 } 1958 1959 #ifndef PRODUCT 1960 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const { 1961 st->print("nop \t# %d bytes pad for loops and calls", _count); 1962 } 1963 #endif 1964 1965 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const { 1966 MacroAssembler _masm(&cbuf); 1967 __ nop(_count); 1968 } 1969 1970 uint MachNopNode::size(PhaseRegAlloc*) const { 1971 return _count; 1972 } 1973 1974 #ifndef PRODUCT 1975 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const { 1976 st->print("# breakpoint"); 1977 } 1978 #endif 1979 1980 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const { 1981 MacroAssembler _masm(&cbuf); 1982 __ int3(); 1983 } 1984 1985 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const { 1986 return MachNode::size(ra_); 1987 } 1988 1989 %} 1990 1991 encode %{ 1992 1993 enc_class preserve_SP %{ 1994 debug_only(int off0 = cbuf.insts_size()); 1995 MacroAssembler _masm(&cbuf); 1996 // RBP is preserved across all calls, even compiled calls. 1997 // Use it to preserve RSP in places where the callee might change the SP. 1998 __ movptr(rbp_mh_SP_save, rsp); 1999 debug_only(int off1 = cbuf.insts_size()); 2000 assert(off1 - off0 == preserve_SP_size(), "correct size prediction"); 2001 %} 2002 2003 enc_class restore_SP %{ 2004 MacroAssembler _masm(&cbuf); 2005 __ movptr(rsp, rbp_mh_SP_save); 2006 %} 2007 2008 enc_class call_epilog %{ 2009 if (VerifyStackAtCalls) { 2010 // Check that stack depth is unchanged: find majik cookie on stack 2011 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word)); 2012 MacroAssembler _masm(&cbuf); 2013 Label L; 2014 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d); 2015 __ jccb(Assembler::equal, L); 2016 // Die if stack mismatch 2017 __ int3(); 2018 __ bind(L); 2019 } 2020 %} 2021 2022 %} 2023 2024 2025 //----------OPERANDS----------------------------------------------------------- 2026 // Operand definitions must precede instruction definitions for correct parsing 2027 // in the ADLC because operands constitute user defined types which are used in 2028 // instruction definitions. 2029 2030 // This one generically applies only for evex, so only one version 2031 operand vecZ() %{ 2032 constraint(ALLOC_IN_RC(vectorz_reg)); 2033 match(VecZ); 2034 2035 format %{ %} 2036 interface(REG_INTER); 2037 %} 2038 2039 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit) 2040 2041 // ============================================================================ 2042 2043 instruct ShouldNotReachHere() %{ 2044 match(Halt); 2045 format %{ "int3\t# ShouldNotReachHere" %} 2046 ins_encode %{ 2047 __ int3(); 2048 %} 2049 ins_pipe(pipe_slow); 2050 %} 2051 2052 // ============================================================================ 2053 2054 instruct addF_reg(regF dst, regF src) %{ 2055 predicate((UseSSE>=1) && (UseAVX == 0)); 2056 match(Set dst (AddF dst src)); 2057 2058 format %{ "addss $dst, $src" %} 2059 ins_cost(150); 2060 ins_encode %{ 2061 __ addss($dst$$XMMRegister, $src$$XMMRegister); 2062 %} 2063 ins_pipe(pipe_slow); 2064 %} 2065 2066 instruct addF_mem(regF dst, memory src) %{ 2067 predicate((UseSSE>=1) && (UseAVX == 0)); 2068 match(Set dst (AddF dst (LoadF src))); 2069 2070 format %{ "addss $dst, $src" %} 2071 ins_cost(150); 2072 ins_encode %{ 2073 __ addss($dst$$XMMRegister, $src$$Address); 2074 %} 2075 ins_pipe(pipe_slow); 2076 %} 2077 2078 instruct addF_imm(regF dst, immF con) %{ 2079 predicate((UseSSE>=1) && (UseAVX == 0)); 2080 match(Set dst (AddF dst con)); 2081 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2082 ins_cost(150); 2083 ins_encode %{ 2084 __ addss($dst$$XMMRegister, $constantaddress($con)); 2085 %} 2086 ins_pipe(pipe_slow); 2087 %} 2088 2089 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{ 2090 predicate(UseAVX > 0); 2091 match(Set dst (AddF src1 src2)); 2092 2093 format %{ "vaddss $dst, $src1, $src2" %} 2094 ins_cost(150); 2095 ins_encode %{ 2096 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2097 %} 2098 ins_pipe(pipe_slow); 2099 %} 2100 2101 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{ 2102 predicate(UseAVX > 0); 2103 match(Set dst (AddF src1 (LoadF src2))); 2104 2105 format %{ "vaddss $dst, $src1, $src2" %} 2106 ins_cost(150); 2107 ins_encode %{ 2108 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2109 %} 2110 ins_pipe(pipe_slow); 2111 %} 2112 2113 instruct addF_reg_imm(regF dst, regF src, immF con) %{ 2114 predicate(UseAVX > 0); 2115 match(Set dst (AddF src con)); 2116 2117 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2118 ins_cost(150); 2119 ins_encode %{ 2120 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2121 %} 2122 ins_pipe(pipe_slow); 2123 %} 2124 2125 instruct addD_reg(regD dst, regD src) %{ 2126 predicate((UseSSE>=2) && (UseAVX == 0)); 2127 match(Set dst (AddD dst src)); 2128 2129 format %{ "addsd $dst, $src" %} 2130 ins_cost(150); 2131 ins_encode %{ 2132 __ addsd($dst$$XMMRegister, $src$$XMMRegister); 2133 %} 2134 ins_pipe(pipe_slow); 2135 %} 2136 2137 instruct addD_mem(regD dst, memory src) %{ 2138 predicate((UseSSE>=2) && (UseAVX == 0)); 2139 match(Set dst (AddD dst (LoadD src))); 2140 2141 format %{ "addsd $dst, $src" %} 2142 ins_cost(150); 2143 ins_encode %{ 2144 __ addsd($dst$$XMMRegister, $src$$Address); 2145 %} 2146 ins_pipe(pipe_slow); 2147 %} 2148 2149 instruct addD_imm(regD dst, immD con) %{ 2150 predicate((UseSSE>=2) && (UseAVX == 0)); 2151 match(Set dst (AddD dst con)); 2152 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2153 ins_cost(150); 2154 ins_encode %{ 2155 __ addsd($dst$$XMMRegister, $constantaddress($con)); 2156 %} 2157 ins_pipe(pipe_slow); 2158 %} 2159 2160 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{ 2161 predicate(UseAVX > 0); 2162 match(Set dst (AddD src1 src2)); 2163 2164 format %{ "vaddsd $dst, $src1, $src2" %} 2165 ins_cost(150); 2166 ins_encode %{ 2167 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2168 %} 2169 ins_pipe(pipe_slow); 2170 %} 2171 2172 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{ 2173 predicate(UseAVX > 0); 2174 match(Set dst (AddD src1 (LoadD src2))); 2175 2176 format %{ "vaddsd $dst, $src1, $src2" %} 2177 ins_cost(150); 2178 ins_encode %{ 2179 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2180 %} 2181 ins_pipe(pipe_slow); 2182 %} 2183 2184 instruct addD_reg_imm(regD dst, regD src, immD con) %{ 2185 predicate(UseAVX > 0); 2186 match(Set dst (AddD src con)); 2187 2188 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2189 ins_cost(150); 2190 ins_encode %{ 2191 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2192 %} 2193 ins_pipe(pipe_slow); 2194 %} 2195 2196 instruct subF_reg(regF dst, regF src) %{ 2197 predicate((UseSSE>=1) && (UseAVX == 0)); 2198 match(Set dst (SubF dst src)); 2199 2200 format %{ "subss $dst, $src" %} 2201 ins_cost(150); 2202 ins_encode %{ 2203 __ subss($dst$$XMMRegister, $src$$XMMRegister); 2204 %} 2205 ins_pipe(pipe_slow); 2206 %} 2207 2208 instruct subF_mem(regF dst, memory src) %{ 2209 predicate((UseSSE>=1) && (UseAVX == 0)); 2210 match(Set dst (SubF dst (LoadF src))); 2211 2212 format %{ "subss $dst, $src" %} 2213 ins_cost(150); 2214 ins_encode %{ 2215 __ subss($dst$$XMMRegister, $src$$Address); 2216 %} 2217 ins_pipe(pipe_slow); 2218 %} 2219 2220 instruct subF_imm(regF dst, immF con) %{ 2221 predicate((UseSSE>=1) && (UseAVX == 0)); 2222 match(Set dst (SubF dst con)); 2223 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2224 ins_cost(150); 2225 ins_encode %{ 2226 __ subss($dst$$XMMRegister, $constantaddress($con)); 2227 %} 2228 ins_pipe(pipe_slow); 2229 %} 2230 2231 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{ 2232 predicate(UseAVX > 0); 2233 match(Set dst (SubF src1 src2)); 2234 2235 format %{ "vsubss $dst, $src1, $src2" %} 2236 ins_cost(150); 2237 ins_encode %{ 2238 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2239 %} 2240 ins_pipe(pipe_slow); 2241 %} 2242 2243 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{ 2244 predicate(UseAVX > 0); 2245 match(Set dst (SubF src1 (LoadF src2))); 2246 2247 format %{ "vsubss $dst, $src1, $src2" %} 2248 ins_cost(150); 2249 ins_encode %{ 2250 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2251 %} 2252 ins_pipe(pipe_slow); 2253 %} 2254 2255 instruct subF_reg_imm(regF dst, regF src, immF con) %{ 2256 predicate(UseAVX > 0); 2257 match(Set dst (SubF src con)); 2258 2259 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2260 ins_cost(150); 2261 ins_encode %{ 2262 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2263 %} 2264 ins_pipe(pipe_slow); 2265 %} 2266 2267 instruct subD_reg(regD dst, regD src) %{ 2268 predicate((UseSSE>=2) && (UseAVX == 0)); 2269 match(Set dst (SubD dst src)); 2270 2271 format %{ "subsd $dst, $src" %} 2272 ins_cost(150); 2273 ins_encode %{ 2274 __ subsd($dst$$XMMRegister, $src$$XMMRegister); 2275 %} 2276 ins_pipe(pipe_slow); 2277 %} 2278 2279 instruct subD_mem(regD dst, memory src) %{ 2280 predicate((UseSSE>=2) && (UseAVX == 0)); 2281 match(Set dst (SubD dst (LoadD src))); 2282 2283 format %{ "subsd $dst, $src" %} 2284 ins_cost(150); 2285 ins_encode %{ 2286 __ subsd($dst$$XMMRegister, $src$$Address); 2287 %} 2288 ins_pipe(pipe_slow); 2289 %} 2290 2291 instruct subD_imm(regD dst, immD con) %{ 2292 predicate((UseSSE>=2) && (UseAVX == 0)); 2293 match(Set dst (SubD dst con)); 2294 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2295 ins_cost(150); 2296 ins_encode %{ 2297 __ subsd($dst$$XMMRegister, $constantaddress($con)); 2298 %} 2299 ins_pipe(pipe_slow); 2300 %} 2301 2302 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{ 2303 predicate(UseAVX > 0); 2304 match(Set dst (SubD src1 src2)); 2305 2306 format %{ "vsubsd $dst, $src1, $src2" %} 2307 ins_cost(150); 2308 ins_encode %{ 2309 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2310 %} 2311 ins_pipe(pipe_slow); 2312 %} 2313 2314 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{ 2315 predicate(UseAVX > 0); 2316 match(Set dst (SubD src1 (LoadD src2))); 2317 2318 format %{ "vsubsd $dst, $src1, $src2" %} 2319 ins_cost(150); 2320 ins_encode %{ 2321 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2322 %} 2323 ins_pipe(pipe_slow); 2324 %} 2325 2326 instruct subD_reg_imm(regD dst, regD src, immD con) %{ 2327 predicate(UseAVX > 0); 2328 match(Set dst (SubD src con)); 2329 2330 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2331 ins_cost(150); 2332 ins_encode %{ 2333 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2334 %} 2335 ins_pipe(pipe_slow); 2336 %} 2337 2338 instruct mulF_reg(regF dst, regF src) %{ 2339 predicate((UseSSE>=1) && (UseAVX == 0)); 2340 match(Set dst (MulF dst src)); 2341 2342 format %{ "mulss $dst, $src" %} 2343 ins_cost(150); 2344 ins_encode %{ 2345 __ mulss($dst$$XMMRegister, $src$$XMMRegister); 2346 %} 2347 ins_pipe(pipe_slow); 2348 %} 2349 2350 instruct mulF_mem(regF dst, memory src) %{ 2351 predicate((UseSSE>=1) && (UseAVX == 0)); 2352 match(Set dst (MulF dst (LoadF src))); 2353 2354 format %{ "mulss $dst, $src" %} 2355 ins_cost(150); 2356 ins_encode %{ 2357 __ mulss($dst$$XMMRegister, $src$$Address); 2358 %} 2359 ins_pipe(pipe_slow); 2360 %} 2361 2362 instruct mulF_imm(regF dst, immF con) %{ 2363 predicate((UseSSE>=1) && (UseAVX == 0)); 2364 match(Set dst (MulF dst con)); 2365 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2366 ins_cost(150); 2367 ins_encode %{ 2368 __ mulss($dst$$XMMRegister, $constantaddress($con)); 2369 %} 2370 ins_pipe(pipe_slow); 2371 %} 2372 2373 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{ 2374 predicate(UseAVX > 0); 2375 match(Set dst (MulF src1 src2)); 2376 2377 format %{ "vmulss $dst, $src1, $src2" %} 2378 ins_cost(150); 2379 ins_encode %{ 2380 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2381 %} 2382 ins_pipe(pipe_slow); 2383 %} 2384 2385 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{ 2386 predicate(UseAVX > 0); 2387 match(Set dst (MulF src1 (LoadF src2))); 2388 2389 format %{ "vmulss $dst, $src1, $src2" %} 2390 ins_cost(150); 2391 ins_encode %{ 2392 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2393 %} 2394 ins_pipe(pipe_slow); 2395 %} 2396 2397 instruct mulF_reg_imm(regF dst, regF src, immF con) %{ 2398 predicate(UseAVX > 0); 2399 match(Set dst (MulF src con)); 2400 2401 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2402 ins_cost(150); 2403 ins_encode %{ 2404 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2405 %} 2406 ins_pipe(pipe_slow); 2407 %} 2408 2409 instruct mulD_reg(regD dst, regD src) %{ 2410 predicate((UseSSE>=2) && (UseAVX == 0)); 2411 match(Set dst (MulD dst src)); 2412 2413 format %{ "mulsd $dst, $src" %} 2414 ins_cost(150); 2415 ins_encode %{ 2416 __ mulsd($dst$$XMMRegister, $src$$XMMRegister); 2417 %} 2418 ins_pipe(pipe_slow); 2419 %} 2420 2421 instruct mulD_mem(regD dst, memory src) %{ 2422 predicate((UseSSE>=2) && (UseAVX == 0)); 2423 match(Set dst (MulD dst (LoadD src))); 2424 2425 format %{ "mulsd $dst, $src" %} 2426 ins_cost(150); 2427 ins_encode %{ 2428 __ mulsd($dst$$XMMRegister, $src$$Address); 2429 %} 2430 ins_pipe(pipe_slow); 2431 %} 2432 2433 instruct mulD_imm(regD dst, immD con) %{ 2434 predicate((UseSSE>=2) && (UseAVX == 0)); 2435 match(Set dst (MulD dst con)); 2436 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2437 ins_cost(150); 2438 ins_encode %{ 2439 __ mulsd($dst$$XMMRegister, $constantaddress($con)); 2440 %} 2441 ins_pipe(pipe_slow); 2442 %} 2443 2444 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{ 2445 predicate(UseAVX > 0); 2446 match(Set dst (MulD src1 src2)); 2447 2448 format %{ "vmulsd $dst, $src1, $src2" %} 2449 ins_cost(150); 2450 ins_encode %{ 2451 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2452 %} 2453 ins_pipe(pipe_slow); 2454 %} 2455 2456 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{ 2457 predicate(UseAVX > 0); 2458 match(Set dst (MulD src1 (LoadD src2))); 2459 2460 format %{ "vmulsd $dst, $src1, $src2" %} 2461 ins_cost(150); 2462 ins_encode %{ 2463 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2464 %} 2465 ins_pipe(pipe_slow); 2466 %} 2467 2468 instruct mulD_reg_imm(regD dst, regD src, immD con) %{ 2469 predicate(UseAVX > 0); 2470 match(Set dst (MulD src con)); 2471 2472 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2473 ins_cost(150); 2474 ins_encode %{ 2475 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2476 %} 2477 ins_pipe(pipe_slow); 2478 %} 2479 2480 instruct divF_reg(regF dst, regF src) %{ 2481 predicate((UseSSE>=1) && (UseAVX == 0)); 2482 match(Set dst (DivF dst src)); 2483 2484 format %{ "divss $dst, $src" %} 2485 ins_cost(150); 2486 ins_encode %{ 2487 __ divss($dst$$XMMRegister, $src$$XMMRegister); 2488 %} 2489 ins_pipe(pipe_slow); 2490 %} 2491 2492 instruct divF_mem(regF dst, memory src) %{ 2493 predicate((UseSSE>=1) && (UseAVX == 0)); 2494 match(Set dst (DivF dst (LoadF src))); 2495 2496 format %{ "divss $dst, $src" %} 2497 ins_cost(150); 2498 ins_encode %{ 2499 __ divss($dst$$XMMRegister, $src$$Address); 2500 %} 2501 ins_pipe(pipe_slow); 2502 %} 2503 2504 instruct divF_imm(regF dst, immF con) %{ 2505 predicate((UseSSE>=1) && (UseAVX == 0)); 2506 match(Set dst (DivF dst con)); 2507 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2508 ins_cost(150); 2509 ins_encode %{ 2510 __ divss($dst$$XMMRegister, $constantaddress($con)); 2511 %} 2512 ins_pipe(pipe_slow); 2513 %} 2514 2515 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{ 2516 predicate(UseAVX > 0); 2517 match(Set dst (DivF src1 src2)); 2518 2519 format %{ "vdivss $dst, $src1, $src2" %} 2520 ins_cost(150); 2521 ins_encode %{ 2522 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2523 %} 2524 ins_pipe(pipe_slow); 2525 %} 2526 2527 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{ 2528 predicate(UseAVX > 0); 2529 match(Set dst (DivF src1 (LoadF src2))); 2530 2531 format %{ "vdivss $dst, $src1, $src2" %} 2532 ins_cost(150); 2533 ins_encode %{ 2534 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2535 %} 2536 ins_pipe(pipe_slow); 2537 %} 2538 2539 instruct divF_reg_imm(regF dst, regF src, immF con) %{ 2540 predicate(UseAVX > 0); 2541 match(Set dst (DivF src con)); 2542 2543 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2544 ins_cost(150); 2545 ins_encode %{ 2546 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2547 %} 2548 ins_pipe(pipe_slow); 2549 %} 2550 2551 instruct divD_reg(regD dst, regD src) %{ 2552 predicate((UseSSE>=2) && (UseAVX == 0)); 2553 match(Set dst (DivD dst src)); 2554 2555 format %{ "divsd $dst, $src" %} 2556 ins_cost(150); 2557 ins_encode %{ 2558 __ divsd($dst$$XMMRegister, $src$$XMMRegister); 2559 %} 2560 ins_pipe(pipe_slow); 2561 %} 2562 2563 instruct divD_mem(regD dst, memory src) %{ 2564 predicate((UseSSE>=2) && (UseAVX == 0)); 2565 match(Set dst (DivD dst (LoadD src))); 2566 2567 format %{ "divsd $dst, $src" %} 2568 ins_cost(150); 2569 ins_encode %{ 2570 __ divsd($dst$$XMMRegister, $src$$Address); 2571 %} 2572 ins_pipe(pipe_slow); 2573 %} 2574 2575 instruct divD_imm(regD dst, immD con) %{ 2576 predicate((UseSSE>=2) && (UseAVX == 0)); 2577 match(Set dst (DivD dst con)); 2578 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2579 ins_cost(150); 2580 ins_encode %{ 2581 __ divsd($dst$$XMMRegister, $constantaddress($con)); 2582 %} 2583 ins_pipe(pipe_slow); 2584 %} 2585 2586 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{ 2587 predicate(UseAVX > 0); 2588 match(Set dst (DivD src1 src2)); 2589 2590 format %{ "vdivsd $dst, $src1, $src2" %} 2591 ins_cost(150); 2592 ins_encode %{ 2593 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2594 %} 2595 ins_pipe(pipe_slow); 2596 %} 2597 2598 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{ 2599 predicate(UseAVX > 0); 2600 match(Set dst (DivD src1 (LoadD src2))); 2601 2602 format %{ "vdivsd $dst, $src1, $src2" %} 2603 ins_cost(150); 2604 ins_encode %{ 2605 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2606 %} 2607 ins_pipe(pipe_slow); 2608 %} 2609 2610 instruct divD_reg_imm(regD dst, regD src, immD con) %{ 2611 predicate(UseAVX > 0); 2612 match(Set dst (DivD src con)); 2613 2614 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2615 ins_cost(150); 2616 ins_encode %{ 2617 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2618 %} 2619 ins_pipe(pipe_slow); 2620 %} 2621 2622 instruct absF_reg(regF dst) %{ 2623 predicate((UseSSE>=1) && (UseAVX == 0)); 2624 match(Set dst (AbsF dst)); 2625 ins_cost(150); 2626 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %} 2627 ins_encode %{ 2628 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask())); 2629 %} 2630 ins_pipe(pipe_slow); 2631 %} 2632 2633 instruct absF_reg_reg(regF dst, regF src) %{ 2634 predicate(UseAVX > 0); 2635 match(Set dst (AbsF src)); 2636 ins_cost(150); 2637 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 2638 ins_encode %{ 2639 int vector_len = 0; 2640 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 2641 ExternalAddress(float_signmask()), vector_len); 2642 %} 2643 ins_pipe(pipe_slow); 2644 %} 2645 2646 instruct absD_reg(regD dst) %{ 2647 predicate((UseSSE>=2) && (UseAVX == 0)); 2648 match(Set dst (AbsD dst)); 2649 ins_cost(150); 2650 format %{ "andpd $dst, [0x7fffffffffffffff]\t" 2651 "# abs double by sign masking" %} 2652 ins_encode %{ 2653 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask())); 2654 %} 2655 ins_pipe(pipe_slow); 2656 %} 2657 2658 instruct absD_reg_reg(regD dst, regD src) %{ 2659 predicate(UseAVX > 0); 2660 match(Set dst (AbsD src)); 2661 ins_cost(150); 2662 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 2663 "# abs double by sign masking" %} 2664 ins_encode %{ 2665 int vector_len = 0; 2666 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 2667 ExternalAddress(double_signmask()), vector_len); 2668 %} 2669 ins_pipe(pipe_slow); 2670 %} 2671 2672 instruct negF_reg(regF dst) %{ 2673 predicate((UseSSE>=1) && (UseAVX == 0)); 2674 match(Set dst (NegF dst)); 2675 ins_cost(150); 2676 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %} 2677 ins_encode %{ 2678 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip())); 2679 %} 2680 ins_pipe(pipe_slow); 2681 %} 2682 2683 instruct negF_reg_reg(regF dst, regF src) %{ 2684 predicate(UseAVX > 0); 2685 match(Set dst (NegF src)); 2686 ins_cost(150); 2687 format %{ "vxorps $dst, $src, [0x80000000]\t# neg float by sign flipping" %} 2688 ins_encode %{ 2689 int vector_len = 0; 2690 __ vxorps($dst$$XMMRegister, $src$$XMMRegister, 2691 ExternalAddress(float_signflip()), vector_len); 2692 %} 2693 ins_pipe(pipe_slow); 2694 %} 2695 2696 instruct negD_reg(regD dst) %{ 2697 predicate((UseSSE>=2) && (UseAVX == 0)); 2698 match(Set dst (NegD dst)); 2699 ins_cost(150); 2700 format %{ "xorpd $dst, [0x8000000000000000]\t" 2701 "# neg double by sign flipping" %} 2702 ins_encode %{ 2703 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip())); 2704 %} 2705 ins_pipe(pipe_slow); 2706 %} 2707 2708 instruct negD_reg_reg(regD dst, regD src) %{ 2709 predicate(UseAVX > 0); 2710 match(Set dst (NegD src)); 2711 ins_cost(150); 2712 format %{ "vxorpd $dst, $src, [0x8000000000000000]\t" 2713 "# neg double by sign flipping" %} 2714 ins_encode %{ 2715 int vector_len = 0; 2716 __ vxorpd($dst$$XMMRegister, $src$$XMMRegister, 2717 ExternalAddress(double_signflip()), vector_len); 2718 %} 2719 ins_pipe(pipe_slow); 2720 %} 2721 2722 instruct sqrtF_reg(regF dst, regF src) %{ 2723 predicate(UseSSE>=1); 2724 match(Set dst (ConvD2F (SqrtD (ConvF2D src)))); 2725 2726 format %{ "sqrtss $dst, $src" %} 2727 ins_cost(150); 2728 ins_encode %{ 2729 __ sqrtss($dst$$XMMRegister, $src$$XMMRegister); 2730 %} 2731 ins_pipe(pipe_slow); 2732 %} 2733 2734 instruct sqrtF_mem(regF dst, memory src) %{ 2735 predicate(UseSSE>=1); 2736 match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF src))))); 2737 2738 format %{ "sqrtss $dst, $src" %} 2739 ins_cost(150); 2740 ins_encode %{ 2741 __ sqrtss($dst$$XMMRegister, $src$$Address); 2742 %} 2743 ins_pipe(pipe_slow); 2744 %} 2745 2746 instruct sqrtF_imm(regF dst, immF con) %{ 2747 predicate(UseSSE>=1); 2748 match(Set dst (ConvD2F (SqrtD (ConvF2D con)))); 2749 format %{ "sqrtss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2750 ins_cost(150); 2751 ins_encode %{ 2752 __ sqrtss($dst$$XMMRegister, $constantaddress($con)); 2753 %} 2754 ins_pipe(pipe_slow); 2755 %} 2756 2757 instruct sqrtD_reg(regD dst, regD src) %{ 2758 predicate(UseSSE>=2); 2759 match(Set dst (SqrtD src)); 2760 2761 format %{ "sqrtsd $dst, $src" %} 2762 ins_cost(150); 2763 ins_encode %{ 2764 __ sqrtsd($dst$$XMMRegister, $src$$XMMRegister); 2765 %} 2766 ins_pipe(pipe_slow); 2767 %} 2768 2769 instruct sqrtD_mem(regD dst, memory src) %{ 2770 predicate(UseSSE>=2); 2771 match(Set dst (SqrtD (LoadD src))); 2772 2773 format %{ "sqrtsd $dst, $src" %} 2774 ins_cost(150); 2775 ins_encode %{ 2776 __ sqrtsd($dst$$XMMRegister, $src$$Address); 2777 %} 2778 ins_pipe(pipe_slow); 2779 %} 2780 2781 instruct sqrtD_imm(regD dst, immD con) %{ 2782 predicate(UseSSE>=2); 2783 match(Set dst (SqrtD con)); 2784 format %{ "sqrtsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2785 ins_cost(150); 2786 ins_encode %{ 2787 __ sqrtsd($dst$$XMMRegister, $constantaddress($con)); 2788 %} 2789 ins_pipe(pipe_slow); 2790 %} 2791 2792 // ====================VECTOR INSTRUCTIONS===================================== 2793 2794 // Load vectors (4 bytes long) 2795 instruct loadV4(vecS dst, memory mem) %{ 2796 predicate(n->as_LoadVector()->memory_size() == 4); 2797 match(Set dst (LoadVector mem)); 2798 ins_cost(125); 2799 format %{ "movd $dst,$mem\t! load vector (4 bytes)" %} 2800 ins_encode %{ 2801 __ movdl($dst$$XMMRegister, $mem$$Address); 2802 %} 2803 ins_pipe( pipe_slow ); 2804 %} 2805 2806 // Load vectors (8 bytes long) 2807 instruct loadV8(vecD dst, memory mem) %{ 2808 predicate(n->as_LoadVector()->memory_size() == 8); 2809 match(Set dst (LoadVector mem)); 2810 ins_cost(125); 2811 format %{ "movq $dst,$mem\t! load vector (8 bytes)" %} 2812 ins_encode %{ 2813 __ movq($dst$$XMMRegister, $mem$$Address); 2814 %} 2815 ins_pipe( pipe_slow ); 2816 %} 2817 2818 // Load vectors (16 bytes long) 2819 instruct loadV16(vecX dst, memory mem) %{ 2820 predicate(n->as_LoadVector()->memory_size() == 16); 2821 match(Set dst (LoadVector mem)); 2822 ins_cost(125); 2823 format %{ "movdqu $dst,$mem\t! load vector (16 bytes)" %} 2824 ins_encode %{ 2825 __ movdqu($dst$$XMMRegister, $mem$$Address); 2826 %} 2827 ins_pipe( pipe_slow ); 2828 %} 2829 2830 // Load vectors (32 bytes long) 2831 instruct loadV32(vecY dst, memory mem) %{ 2832 predicate(n->as_LoadVector()->memory_size() == 32); 2833 match(Set dst (LoadVector mem)); 2834 ins_cost(125); 2835 format %{ "vmovdqu $dst,$mem\t! load vector (32 bytes)" %} 2836 ins_encode %{ 2837 __ vmovdqu($dst$$XMMRegister, $mem$$Address); 2838 %} 2839 ins_pipe( pipe_slow ); 2840 %} 2841 2842 // Load vectors (64 bytes long) 2843 instruct loadV64(vecZ dst, memory mem) %{ 2844 predicate(n->as_LoadVector()->memory_size() == 64); 2845 match(Set dst (LoadVector mem)); 2846 ins_cost(125); 2847 format %{ "vmovdqu $dst k0,$mem\t! load vector (64 bytes)" %} 2848 ins_encode %{ 2849 int vector_len = 2; 2850 __ evmovdqu($dst$$XMMRegister, $mem$$Address, vector_len); 2851 %} 2852 ins_pipe( pipe_slow ); 2853 %} 2854 2855 // Store vectors 2856 instruct storeV4(memory mem, vecS src) %{ 2857 predicate(n->as_StoreVector()->memory_size() == 4); 2858 match(Set mem (StoreVector mem src)); 2859 ins_cost(145); 2860 format %{ "movd $mem,$src\t! store vector (4 bytes)" %} 2861 ins_encode %{ 2862 __ movdl($mem$$Address, $src$$XMMRegister); 2863 %} 2864 ins_pipe( pipe_slow ); 2865 %} 2866 2867 instruct storeV8(memory mem, vecD src) %{ 2868 predicate(n->as_StoreVector()->memory_size() == 8); 2869 match(Set mem (StoreVector mem src)); 2870 ins_cost(145); 2871 format %{ "movq $mem,$src\t! store vector (8 bytes)" %} 2872 ins_encode %{ 2873 __ movq($mem$$Address, $src$$XMMRegister); 2874 %} 2875 ins_pipe( pipe_slow ); 2876 %} 2877 2878 instruct storeV16(memory mem, vecX src) %{ 2879 predicate(n->as_StoreVector()->memory_size() == 16); 2880 match(Set mem (StoreVector mem src)); 2881 ins_cost(145); 2882 format %{ "movdqu $mem,$src\t! store vector (16 bytes)" %} 2883 ins_encode %{ 2884 __ movdqu($mem$$Address, $src$$XMMRegister); 2885 %} 2886 ins_pipe( pipe_slow ); 2887 %} 2888 2889 instruct storeV32(memory mem, vecY src) %{ 2890 predicate(n->as_StoreVector()->memory_size() == 32); 2891 match(Set mem (StoreVector mem src)); 2892 ins_cost(145); 2893 format %{ "vmovdqu $mem,$src\t! store vector (32 bytes)" %} 2894 ins_encode %{ 2895 __ vmovdqu($mem$$Address, $src$$XMMRegister); 2896 %} 2897 ins_pipe( pipe_slow ); 2898 %} 2899 2900 instruct storeV64(memory mem, vecZ src) %{ 2901 predicate(n->as_StoreVector()->memory_size() == 64); 2902 match(Set mem (StoreVector mem src)); 2903 ins_cost(145); 2904 format %{ "vmovdqu $mem k0,$src\t! store vector (64 bytes)" %} 2905 ins_encode %{ 2906 int vector_len = 2; 2907 __ evmovdqu($mem$$Address, $src$$XMMRegister, vector_len); 2908 %} 2909 ins_pipe( pipe_slow ); 2910 %} 2911 2912 // Replicate byte scalar to be vector 2913 instruct Repl4B(vecS dst, rRegI src) %{ 2914 predicate(n->as_Vector()->length() == 4); 2915 match(Set dst (ReplicateB src)); 2916 format %{ "movd $dst,$src\n\t" 2917 "punpcklbw $dst,$dst\n\t" 2918 "pshuflw $dst,$dst,0x00\t! replicate4B" %} 2919 ins_encode %{ 2920 __ movdl($dst$$XMMRegister, $src$$Register); 2921 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 2922 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2923 %} 2924 ins_pipe( pipe_slow ); 2925 %} 2926 2927 instruct Repl8B(vecD dst, rRegI src) %{ 2928 predicate(n->as_Vector()->length() == 8); 2929 match(Set dst (ReplicateB src)); 2930 format %{ "movd $dst,$src\n\t" 2931 "punpcklbw $dst,$dst\n\t" 2932 "pshuflw $dst,$dst,0x00\t! replicate8B" %} 2933 ins_encode %{ 2934 __ movdl($dst$$XMMRegister, $src$$Register); 2935 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 2936 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2937 %} 2938 ins_pipe( pipe_slow ); 2939 %} 2940 2941 instruct Repl16B(vecX dst, rRegI src) %{ 2942 predicate(n->as_Vector()->length() == 16); 2943 match(Set dst (ReplicateB src)); 2944 format %{ "movd $dst,$src\n\t" 2945 "punpcklbw $dst,$dst\n\t" 2946 "pshuflw $dst,$dst,0x00\n\t" 2947 "punpcklqdq $dst,$dst\t! replicate16B" %} 2948 ins_encode %{ 2949 __ movdl($dst$$XMMRegister, $src$$Register); 2950 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 2951 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2952 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2953 %} 2954 ins_pipe( pipe_slow ); 2955 %} 2956 2957 instruct Repl32B(vecY dst, rRegI src) %{ 2958 predicate(n->as_Vector()->length() == 32); 2959 match(Set dst (ReplicateB src)); 2960 format %{ "movd $dst,$src\n\t" 2961 "punpcklbw $dst,$dst\n\t" 2962 "pshuflw $dst,$dst,0x00\n\t" 2963 "punpcklqdq $dst,$dst\n\t" 2964 "vinserti128h $dst,$dst,$dst\t! replicate32B" %} 2965 ins_encode %{ 2966 __ movdl($dst$$XMMRegister, $src$$Register); 2967 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 2968 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2969 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2970 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2971 %} 2972 ins_pipe( pipe_slow ); 2973 %} 2974 2975 instruct Repl64B(vecZ dst, rRegI src) %{ 2976 predicate(n->as_Vector()->length() == 64); 2977 match(Set dst (ReplicateB src)); 2978 format %{ "movd $dst,$src\n\t" 2979 "punpcklbw $dst,$dst\n\t" 2980 "pshuflw $dst,$dst,0x00\n\t" 2981 "punpcklqdq $dst,$dst\n\t" 2982 "vinserti128h $dst,$dst,$dst\t! lower replicate32B\n\t" 2983 "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate632B" %} 2984 ins_encode %{ 2985 __ movdl($dst$$XMMRegister, $src$$Register); 2986 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 2987 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2988 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2989 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2990 __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2991 %} 2992 ins_pipe( pipe_slow ); 2993 %} 2994 2995 // Replicate byte scalar immediate to be vector by loading from const table. 2996 instruct Repl4B_imm(vecS dst, immI con) %{ 2997 predicate(n->as_Vector()->length() == 4); 2998 match(Set dst (ReplicateB con)); 2999 format %{ "movdl $dst,[$constantaddress]\t! replicate4B($con)" %} 3000 ins_encode %{ 3001 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 1))); 3002 %} 3003 ins_pipe( pipe_slow ); 3004 %} 3005 3006 instruct Repl8B_imm(vecD dst, immI con) %{ 3007 predicate(n->as_Vector()->length() == 8); 3008 match(Set dst (ReplicateB con)); 3009 format %{ "movq $dst,[$constantaddress]\t! replicate8B($con)" %} 3010 ins_encode %{ 3011 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3012 %} 3013 ins_pipe( pipe_slow ); 3014 %} 3015 3016 instruct Repl16B_imm(vecX dst, immI con) %{ 3017 predicate(n->as_Vector()->length() == 16); 3018 match(Set dst (ReplicateB con)); 3019 format %{ "movq $dst,[$constantaddress]\n\t" 3020 "punpcklqdq $dst,$dst\t! replicate16B($con)" %} 3021 ins_encode %{ 3022 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3023 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3024 %} 3025 ins_pipe( pipe_slow ); 3026 %} 3027 3028 instruct Repl32B_imm(vecY dst, immI con) %{ 3029 predicate(n->as_Vector()->length() == 32); 3030 match(Set dst (ReplicateB con)); 3031 format %{ "movq $dst,[$constantaddress]\n\t" 3032 "punpcklqdq $dst,$dst\n\t" 3033 "vinserti128h $dst,$dst,$dst\t! lreplicate32B($con)" %} 3034 ins_encode %{ 3035 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3036 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3037 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3038 %} 3039 ins_pipe( pipe_slow ); 3040 %} 3041 3042 instruct Repl64B_imm(vecZ dst, immI con) %{ 3043 predicate(n->as_Vector()->length() == 64); 3044 match(Set dst (ReplicateB con)); 3045 format %{ "movq $dst,[$constantaddress]\n\t" 3046 "punpcklqdq $dst,$dst\n\t" 3047 "vinserti128h $dst,$dst,$dst\t! lower replicate32B($con)\n\t" 3048 "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate32B($con)" %} 3049 ins_encode %{ 3050 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3051 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3052 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3053 __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3054 %} 3055 ins_pipe( pipe_slow ); 3056 %} 3057 3058 // Replicate byte scalar zero to be vector 3059 instruct Repl4B_zero(vecS dst, immI0 zero) %{ 3060 predicate(n->as_Vector()->length() == 4); 3061 match(Set dst (ReplicateB zero)); 3062 format %{ "pxor $dst,$dst\t! replicate4B zero" %} 3063 ins_encode %{ 3064 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3065 %} 3066 ins_pipe( fpu_reg_reg ); 3067 %} 3068 3069 instruct Repl8B_zero(vecD dst, immI0 zero) %{ 3070 predicate(n->as_Vector()->length() == 8); 3071 match(Set dst (ReplicateB zero)); 3072 format %{ "pxor $dst,$dst\t! replicate8B zero" %} 3073 ins_encode %{ 3074 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3075 %} 3076 ins_pipe( fpu_reg_reg ); 3077 %} 3078 3079 instruct Repl16B_zero(vecX dst, immI0 zero) %{ 3080 predicate(n->as_Vector()->length() == 16); 3081 match(Set dst (ReplicateB zero)); 3082 format %{ "pxor $dst,$dst\t! replicate16B zero" %} 3083 ins_encode %{ 3084 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3085 %} 3086 ins_pipe( fpu_reg_reg ); 3087 %} 3088 3089 instruct Repl32B_zero(vecY dst, immI0 zero) %{ 3090 predicate(n->as_Vector()->length() == 32); 3091 match(Set dst (ReplicateB zero)); 3092 format %{ "vpxor $dst,$dst,$dst\t! replicate32B zero" %} 3093 ins_encode %{ 3094 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3095 int vector_len = 1; 3096 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3097 %} 3098 ins_pipe( fpu_reg_reg ); 3099 %} 3100 3101 instruct Repl64B_zero(vecZ dst, immI0 zero) %{ 3102 predicate(n->as_Vector()->length() == 64); 3103 match(Set dst (ReplicateB zero)); 3104 format %{ "vpxor $dst k0,$dst,$dst\t! replicate64B zero" %} 3105 ins_encode %{ 3106 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 3107 int vector_len = 2; 3108 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3109 %} 3110 ins_pipe( fpu_reg_reg ); 3111 %} 3112 3113 // Replicate char/short (2 byte) scalar to be vector 3114 instruct Repl2S(vecS dst, rRegI src) %{ 3115 predicate(n->as_Vector()->length() == 2); 3116 match(Set dst (ReplicateS src)); 3117 format %{ "movd $dst,$src\n\t" 3118 "pshuflw $dst,$dst,0x00\t! replicate2S" %} 3119 ins_encode %{ 3120 __ movdl($dst$$XMMRegister, $src$$Register); 3121 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3122 %} 3123 ins_pipe( fpu_reg_reg ); 3124 %} 3125 3126 instruct Repl4S(vecD dst, rRegI src) %{ 3127 predicate(n->as_Vector()->length() == 4); 3128 match(Set dst (ReplicateS src)); 3129 format %{ "movd $dst,$src\n\t" 3130 "pshuflw $dst,$dst,0x00\t! replicate4S" %} 3131 ins_encode %{ 3132 __ movdl($dst$$XMMRegister, $src$$Register); 3133 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3134 %} 3135 ins_pipe( fpu_reg_reg ); 3136 %} 3137 3138 instruct Repl8S(vecX dst, rRegI src) %{ 3139 predicate(n->as_Vector()->length() == 8); 3140 match(Set dst (ReplicateS src)); 3141 format %{ "movd $dst,$src\n\t" 3142 "pshuflw $dst,$dst,0x00\n\t" 3143 "punpcklqdq $dst,$dst\t! replicate8S" %} 3144 ins_encode %{ 3145 __ movdl($dst$$XMMRegister, $src$$Register); 3146 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3147 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3148 %} 3149 ins_pipe( pipe_slow ); 3150 %} 3151 3152 instruct Repl16S(vecY dst, rRegI src) %{ 3153 predicate(n->as_Vector()->length() == 16); 3154 match(Set dst (ReplicateS src)); 3155 format %{ "movd $dst,$src\n\t" 3156 "pshuflw $dst,$dst,0x00\n\t" 3157 "punpcklqdq $dst,$dst\n\t" 3158 "vinserti128h $dst,$dst,$dst\t! replicate16S" %} 3159 ins_encode %{ 3160 __ movdl($dst$$XMMRegister, $src$$Register); 3161 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3162 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3163 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3164 %} 3165 ins_pipe( pipe_slow ); 3166 %} 3167 3168 instruct Repl32S(vecZ dst, rRegI src) %{ 3169 predicate(n->as_Vector()->length() == 32); 3170 match(Set dst (ReplicateS src)); 3171 format %{ "movd $dst,$src\n\t" 3172 "pshuflw $dst,$dst,0x00\n\t" 3173 "punpcklqdq $dst,$dst\n\t" 3174 "vinserti128h $dst,$dst,$dst\t! lower replicate16S\n\t" 3175 "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate16S" %} 3176 ins_encode %{ 3177 __ movdl($dst$$XMMRegister, $src$$Register); 3178 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3179 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3180 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3181 __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3182 %} 3183 ins_pipe( pipe_slow ); 3184 %} 3185 3186 // Replicate char/short (2 byte) scalar immediate to be vector by loading from const table. 3187 instruct Repl2S_imm(vecS dst, immI con) %{ 3188 predicate(n->as_Vector()->length() == 2); 3189 match(Set dst (ReplicateS con)); 3190 format %{ "movdl $dst,[$constantaddress]\t! replicate2S($con)" %} 3191 ins_encode %{ 3192 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 2))); 3193 %} 3194 ins_pipe( fpu_reg_reg ); 3195 %} 3196 3197 instruct Repl4S_imm(vecD dst, immI con) %{ 3198 predicate(n->as_Vector()->length() == 4); 3199 match(Set dst (ReplicateS con)); 3200 format %{ "movq $dst,[$constantaddress]\t! replicate4S($con)" %} 3201 ins_encode %{ 3202 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3203 %} 3204 ins_pipe( fpu_reg_reg ); 3205 %} 3206 3207 instruct Repl8S_imm(vecX dst, immI con) %{ 3208 predicate(n->as_Vector()->length() == 8); 3209 match(Set dst (ReplicateS con)); 3210 format %{ "movq $dst,[$constantaddress]\n\t" 3211 "punpcklqdq $dst,$dst\t! replicate8S($con)" %} 3212 ins_encode %{ 3213 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3214 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3215 %} 3216 ins_pipe( pipe_slow ); 3217 %} 3218 3219 instruct Repl16S_imm(vecY dst, immI con) %{ 3220 predicate(n->as_Vector()->length() == 16); 3221 match(Set dst (ReplicateS con)); 3222 format %{ "movq $dst,[$constantaddress]\n\t" 3223 "punpcklqdq $dst,$dst\n\t" 3224 "vinserti128h $dst,$dst,$dst\t! replicate16S($con)" %} 3225 ins_encode %{ 3226 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3227 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3228 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3229 %} 3230 ins_pipe( pipe_slow ); 3231 %} 3232 3233 instruct Repl32S_imm(vecZ dst, immI con) %{ 3234 predicate(n->as_Vector()->length() == 32); 3235 match(Set dst (ReplicateS con)); 3236 format %{ "movq $dst,[$constantaddress]\n\t" 3237 "punpcklqdq $dst,$dst\n\t" 3238 "vinserti128h $dst,$dst,$dst\t! lower replicate16S($con)\n\t" 3239 "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate16S($con)" %} 3240 ins_encode %{ 3241 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3242 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3243 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3244 __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3245 %} 3246 ins_pipe( pipe_slow ); 3247 %} 3248 3249 // Replicate char/short (2 byte) scalar zero to be vector 3250 instruct Repl2S_zero(vecS dst, immI0 zero) %{ 3251 predicate(n->as_Vector()->length() == 2); 3252 match(Set dst (ReplicateS zero)); 3253 format %{ "pxor $dst,$dst\t! replicate2S zero" %} 3254 ins_encode %{ 3255 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3256 %} 3257 ins_pipe( fpu_reg_reg ); 3258 %} 3259 3260 instruct Repl4S_zero(vecD dst, immI0 zero) %{ 3261 predicate(n->as_Vector()->length() == 4); 3262 match(Set dst (ReplicateS zero)); 3263 format %{ "pxor $dst,$dst\t! replicate4S zero" %} 3264 ins_encode %{ 3265 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3266 %} 3267 ins_pipe( fpu_reg_reg ); 3268 %} 3269 3270 instruct Repl8S_zero(vecX dst, immI0 zero) %{ 3271 predicate(n->as_Vector()->length() == 8); 3272 match(Set dst (ReplicateS zero)); 3273 format %{ "pxor $dst,$dst\t! replicate8S zero" %} 3274 ins_encode %{ 3275 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3276 %} 3277 ins_pipe( fpu_reg_reg ); 3278 %} 3279 3280 instruct Repl16S_zero(vecY dst, immI0 zero) %{ 3281 predicate(n->as_Vector()->length() == 16); 3282 match(Set dst (ReplicateS zero)); 3283 format %{ "vpxor $dst,$dst,$dst\t! replicate16S zero" %} 3284 ins_encode %{ 3285 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3286 int vector_len = 1; 3287 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3288 %} 3289 ins_pipe( fpu_reg_reg ); 3290 %} 3291 3292 instruct Repl32S_zero(vecZ dst, immI0 zero) %{ 3293 predicate(n->as_Vector()->length() == 32); 3294 match(Set dst (ReplicateS zero)); 3295 format %{ "vpxor $dst k0,$dst,$dst\t! replicate32S zero" %} 3296 ins_encode %{ 3297 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 3298 int vector_len = 2; 3299 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3300 %} 3301 ins_pipe( fpu_reg_reg ); 3302 %} 3303 3304 // Replicate integer (4 byte) scalar to be vector 3305 instruct Repl2I(vecD dst, rRegI src) %{ 3306 predicate(n->as_Vector()->length() == 2); 3307 match(Set dst (ReplicateI src)); 3308 format %{ "movd $dst,$src\n\t" 3309 "pshufd $dst,$dst,0x00\t! replicate2I" %} 3310 ins_encode %{ 3311 __ movdl($dst$$XMMRegister, $src$$Register); 3312 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3313 %} 3314 ins_pipe( fpu_reg_reg ); 3315 %} 3316 3317 instruct Repl4I(vecX dst, rRegI src) %{ 3318 predicate(n->as_Vector()->length() == 4); 3319 match(Set dst (ReplicateI src)); 3320 format %{ "movd $dst,$src\n\t" 3321 "pshufd $dst,$dst,0x00\t! replicate4I" %} 3322 ins_encode %{ 3323 __ movdl($dst$$XMMRegister, $src$$Register); 3324 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3325 %} 3326 ins_pipe( pipe_slow ); 3327 %} 3328 3329 instruct Repl8I(vecY dst, rRegI src) %{ 3330 predicate(n->as_Vector()->length() == 8); 3331 match(Set dst (ReplicateI src)); 3332 format %{ "movd $dst,$src\n\t" 3333 "pshufd $dst,$dst,0x00\n\t" 3334 "vinserti128h $dst,$dst,$dst\t! replicate8I" %} 3335 ins_encode %{ 3336 __ movdl($dst$$XMMRegister, $src$$Register); 3337 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3338 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3339 %} 3340 ins_pipe( pipe_slow ); 3341 %} 3342 3343 instruct Repl16I(vecZ dst, rRegI src) %{ 3344 predicate(n->as_Vector()->length() == 16); 3345 match(Set dst (ReplicateI src)); 3346 format %{ "movd $dst,$src\n\t" 3347 "pshufd $dst,$dst,0x00\n\t" 3348 "vinserti128h $dst,$dst,$dst\t! lower replicate8I\n\t" 3349 "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate8I" %} 3350 ins_encode %{ 3351 __ movdl($dst$$XMMRegister, $src$$Register); 3352 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3353 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3354 __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3355 %} 3356 ins_pipe( pipe_slow ); 3357 %} 3358 3359 // Replicate integer (4 byte) scalar immediate to be vector by loading from const table. 3360 instruct Repl2I_imm(vecD dst, immI con) %{ 3361 predicate(n->as_Vector()->length() == 2); 3362 match(Set dst (ReplicateI con)); 3363 format %{ "movq $dst,[$constantaddress]\t! replicate2I($con)" %} 3364 ins_encode %{ 3365 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3366 %} 3367 ins_pipe( fpu_reg_reg ); 3368 %} 3369 3370 instruct Repl4I_imm(vecX dst, immI con) %{ 3371 predicate(n->as_Vector()->length() == 4); 3372 match(Set dst (ReplicateI con)); 3373 format %{ "movq $dst,[$constantaddress]\t! replicate4I($con)\n\t" 3374 "punpcklqdq $dst,$dst" %} 3375 ins_encode %{ 3376 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3377 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3378 %} 3379 ins_pipe( pipe_slow ); 3380 %} 3381 3382 instruct Repl8I_imm(vecY dst, immI con) %{ 3383 predicate(n->as_Vector()->length() == 8); 3384 match(Set dst (ReplicateI con)); 3385 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 3386 "punpcklqdq $dst,$dst\n\t" 3387 "vinserti128h $dst,$dst,$dst" %} 3388 ins_encode %{ 3389 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3390 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3391 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3392 %} 3393 ins_pipe( pipe_slow ); 3394 %} 3395 3396 instruct Repl16I_imm(vecZ dst, immI con) %{ 3397 predicate(n->as_Vector()->length() == 16); 3398 match(Set dst (ReplicateI con)); 3399 format %{ "movq $dst,[$constantaddress]\t! replicate16I($con)\n\t" 3400 "punpcklqdq $dst,$dst\n\t" 3401 "vinserti128h $dst,$dst,$dst\n\t" 3402 "vinserti64x4h $dst k0,$dst,$dst" %} 3403 ins_encode %{ 3404 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3405 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3406 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3407 __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3408 %} 3409 ins_pipe( pipe_slow ); 3410 %} 3411 3412 // Integer could be loaded into xmm register directly from memory. 3413 instruct Repl2I_mem(vecD dst, memory mem) %{ 3414 predicate(n->as_Vector()->length() == 2); 3415 match(Set dst (ReplicateI (LoadI mem))); 3416 format %{ "movd $dst,$mem\n\t" 3417 "pshufd $dst,$dst,0x00\t! replicate2I" %} 3418 ins_encode %{ 3419 __ movdl($dst$$XMMRegister, $mem$$Address); 3420 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3421 %} 3422 ins_pipe( fpu_reg_reg ); 3423 %} 3424 3425 instruct Repl4I_mem(vecX dst, memory mem) %{ 3426 predicate(n->as_Vector()->length() == 4); 3427 match(Set dst (ReplicateI (LoadI mem))); 3428 format %{ "movd $dst,$mem\n\t" 3429 "pshufd $dst,$dst,0x00\t! replicate4I" %} 3430 ins_encode %{ 3431 __ movdl($dst$$XMMRegister, $mem$$Address); 3432 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3433 %} 3434 ins_pipe( pipe_slow ); 3435 %} 3436 3437 instruct Repl8I_mem(vecY dst, memory mem) %{ 3438 predicate(n->as_Vector()->length() == 8); 3439 match(Set dst (ReplicateI (LoadI mem))); 3440 format %{ "movd $dst,$mem\n\t" 3441 "pshufd $dst,$dst,0x00\n\t" 3442 "vinserti128h $dst,$dst,$dst\t! replicate8I" %} 3443 ins_encode %{ 3444 __ movdl($dst$$XMMRegister, $mem$$Address); 3445 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3446 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3447 %} 3448 ins_pipe( pipe_slow ); 3449 %} 3450 3451 instruct Repl16I_mem(vecZ dst, memory mem) %{ 3452 predicate(n->as_Vector()->length() == 16); 3453 match(Set dst (ReplicateI (LoadI mem))); 3454 format %{ "movd $dst,$mem\n\t" 3455 "pshufd $dst,$dst,0x00\n\t" 3456 "vinserti128h $dst,$dst,$dst\t! lower replicate8I\n\t" 3457 "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate8I" %} 3458 ins_encode %{ 3459 __ movdl($dst$$XMMRegister, $mem$$Address); 3460 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3461 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3462 __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3463 %} 3464 ins_pipe( pipe_slow ); 3465 %} 3466 3467 // Replicate integer (4 byte) scalar zero to be vector 3468 instruct Repl2I_zero(vecD dst, immI0 zero) %{ 3469 predicate(n->as_Vector()->length() == 2); 3470 match(Set dst (ReplicateI zero)); 3471 format %{ "pxor $dst,$dst\t! replicate2I" %} 3472 ins_encode %{ 3473 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3474 %} 3475 ins_pipe( fpu_reg_reg ); 3476 %} 3477 3478 instruct Repl4I_zero(vecX dst, immI0 zero) %{ 3479 predicate(n->as_Vector()->length() == 4); 3480 match(Set dst (ReplicateI zero)); 3481 format %{ "pxor $dst,$dst\t! replicate4I zero)" %} 3482 ins_encode %{ 3483 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3484 %} 3485 ins_pipe( fpu_reg_reg ); 3486 %} 3487 3488 instruct Repl8I_zero(vecY dst, immI0 zero) %{ 3489 predicate(n->as_Vector()->length() == 8); 3490 match(Set dst (ReplicateI zero)); 3491 format %{ "vpxor $dst,$dst,$dst\t! replicate8I zero" %} 3492 ins_encode %{ 3493 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3494 int vector_len = 1; 3495 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3496 %} 3497 ins_pipe( fpu_reg_reg ); 3498 %} 3499 3500 instruct Repl16I_zero(vecZ dst, immI0 zero) %{ 3501 predicate(n->as_Vector()->length() == 16); 3502 match(Set dst (ReplicateI zero)); 3503 format %{ "vpxor $dst k0,$dst,$dst\t! replicate16I zero" %} 3504 ins_encode %{ 3505 // Use vxorpd since AVX does not have vpxor for 512-bit (AVX2 will have it). 3506 int vector_len = 2; 3507 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3508 %} 3509 ins_pipe( fpu_reg_reg ); 3510 %} 3511 3512 // Replicate long (8 byte) scalar to be vector 3513 #ifdef _LP64 3514 instruct Repl2L(vecX dst, rRegL src) %{ 3515 predicate(n->as_Vector()->length() == 2); 3516 match(Set dst (ReplicateL src)); 3517 format %{ "movdq $dst,$src\n\t" 3518 "punpcklqdq $dst,$dst\t! replicate2L" %} 3519 ins_encode %{ 3520 __ movdq($dst$$XMMRegister, $src$$Register); 3521 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3522 %} 3523 ins_pipe( pipe_slow ); 3524 %} 3525 3526 instruct Repl4L(vecY dst, rRegL src) %{ 3527 predicate(n->as_Vector()->length() == 4); 3528 match(Set dst (ReplicateL src)); 3529 format %{ "movdq $dst,$src\n\t" 3530 "punpcklqdq $dst,$dst\n\t" 3531 "vinserti128h $dst,$dst,$dst\t! replicate4L" %} 3532 ins_encode %{ 3533 __ movdq($dst$$XMMRegister, $src$$Register); 3534 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3535 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3536 %} 3537 ins_pipe( pipe_slow ); 3538 %} 3539 3540 instruct Repl8L(vecZ dst, rRegL src) %{ 3541 predicate(n->as_Vector()->length() == 8); 3542 match(Set dst (ReplicateL src)); 3543 format %{ "movdq $dst,$src\n\t" 3544 "punpcklqdq $dst,$dst\n\t" 3545 "vinserti128h $dst,$dst,$dst\t! lower replicate4L\n\t" 3546 "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate4L" %} 3547 ins_encode %{ 3548 __ movdq($dst$$XMMRegister, $src$$Register); 3549 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3550 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3551 __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3552 %} 3553 ins_pipe( pipe_slow ); 3554 %} 3555 #else // _LP64 3556 instruct Repl2L(vecX dst, eRegL src, regD tmp) %{ 3557 predicate(n->as_Vector()->length() == 2); 3558 match(Set dst (ReplicateL src)); 3559 effect(TEMP dst, USE src, TEMP tmp); 3560 format %{ "movdl $dst,$src.lo\n\t" 3561 "movdl $tmp,$src.hi\n\t" 3562 "punpckldq $dst,$tmp\n\t" 3563 "punpcklqdq $dst,$dst\t! replicate2L"%} 3564 ins_encode %{ 3565 __ movdl($dst$$XMMRegister, $src$$Register); 3566 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3567 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3568 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3569 %} 3570 ins_pipe( pipe_slow ); 3571 %} 3572 3573 instruct Repl4L(vecY dst, eRegL src, regD tmp) %{ 3574 predicate(n->as_Vector()->length() == 4); 3575 match(Set dst (ReplicateL src)); 3576 effect(TEMP dst, USE src, TEMP tmp); 3577 format %{ "movdl $dst,$src.lo\n\t" 3578 "movdl $tmp,$src.hi\n\t" 3579 "punpckldq $dst,$tmp\n\t" 3580 "punpcklqdq $dst,$dst\n\t" 3581 "vinserti128h $dst,$dst,$dst\t! replicate4L" %} 3582 ins_encode %{ 3583 __ movdl($dst$$XMMRegister, $src$$Register); 3584 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3585 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3586 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3587 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3588 %} 3589 ins_pipe( pipe_slow ); 3590 %} 3591 3592 instruct Repl8L(vecZ dst, eRegL src, regD tmp) %{ 3593 predicate(n->as_Vector()->length() == 4); 3594 match(Set dst (ReplicateL src)); 3595 effect(TEMP dst, USE src, TEMP tmp); 3596 format %{ "movdl $dst,$src.lo\n\t" 3597 "movdl $tmp,$src.hi\n\t" 3598 "punpckldq $dst,$tmp\n\t" 3599 "vinserti128h $dst,$dst,$dst\t! lower replicate4L\n\t" 3600 "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate4L" %} 3601 ins_encode %{ 3602 __ movdl($dst$$XMMRegister, $src$$Register); 3603 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3604 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3605 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3606 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3607 __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3608 %} 3609 ins_pipe( pipe_slow ); 3610 %} 3611 #endif // _LP64 3612 3613 // Replicate long (8 byte) scalar immediate to be vector by loading from const table. 3614 instruct Repl2L_imm(vecX dst, immL con) %{ 3615 predicate(n->as_Vector()->length() == 2); 3616 match(Set dst (ReplicateL con)); 3617 format %{ "movq $dst,[$constantaddress]\n\t" 3618 "punpcklqdq $dst,$dst\t! replicate2L($con)" %} 3619 ins_encode %{ 3620 __ movq($dst$$XMMRegister, $constantaddress($con)); 3621 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3622 %} 3623 ins_pipe( pipe_slow ); 3624 %} 3625 3626 instruct Repl4L_imm(vecY dst, immL con) %{ 3627 predicate(n->as_Vector()->length() == 4); 3628 match(Set dst (ReplicateL con)); 3629 format %{ "movq $dst,[$constantaddress]\n\t" 3630 "punpcklqdq $dst,$dst\n\t" 3631 "vinserti128h $dst,$dst,$dst\t! replicate4L($con)" %} 3632 ins_encode %{ 3633 __ movq($dst$$XMMRegister, $constantaddress($con)); 3634 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3635 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3636 %} 3637 ins_pipe( pipe_slow ); 3638 %} 3639 3640 instruct Repl8L_imm(vecZ dst, immL con) %{ 3641 predicate(n->as_Vector()->length() == 8); 3642 match(Set dst (ReplicateL con)); 3643 format %{ "movq $dst,[$constantaddress]\n\t" 3644 "punpcklqdq $dst,$dst\n\t" 3645 "vinserti128h $dst,$dst,$dst\t! lower replicate4L($con)\n\t" 3646 "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate4L($con)" %} 3647 ins_encode %{ 3648 __ movq($dst$$XMMRegister, $constantaddress($con)); 3649 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3650 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3651 __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3652 %} 3653 ins_pipe( pipe_slow ); 3654 %} 3655 3656 // Long could be loaded into xmm register directly from memory. 3657 instruct Repl2L_mem(vecX dst, memory mem) %{ 3658 predicate(n->as_Vector()->length() == 2); 3659 match(Set dst (ReplicateL (LoadL mem))); 3660 format %{ "movq $dst,$mem\n\t" 3661 "punpcklqdq $dst,$dst\t! replicate2L" %} 3662 ins_encode %{ 3663 __ movq($dst$$XMMRegister, $mem$$Address); 3664 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3665 %} 3666 ins_pipe( pipe_slow ); 3667 %} 3668 3669 instruct Repl4L_mem(vecY dst, memory mem) %{ 3670 predicate(n->as_Vector()->length() == 4); 3671 match(Set dst (ReplicateL (LoadL mem))); 3672 format %{ "movq $dst,$mem\n\t" 3673 "punpcklqdq $dst,$dst\n\t" 3674 "vinserti128h $dst,$dst,$dst\t! replicate4L" %} 3675 ins_encode %{ 3676 __ movq($dst$$XMMRegister, $mem$$Address); 3677 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3678 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3679 %} 3680 ins_pipe( pipe_slow ); 3681 %} 3682 3683 instruct Repl8L_mem(vecZ dst, memory mem) %{ 3684 predicate(n->as_Vector()->length() == 8); 3685 match(Set dst (ReplicateL (LoadL mem))); 3686 format %{ "movq $dst,$mem\n\t" 3687 "punpcklqdq $dst,$dst\n\t" 3688 "vinserti128h $dst,$dst,$dst\t! lower replicate4L\n\t" 3689 "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate4L" %} 3690 ins_encode %{ 3691 __ movq($dst$$XMMRegister, $mem$$Address); 3692 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3693 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3694 __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3695 %} 3696 ins_pipe( pipe_slow ); 3697 %} 3698 3699 // Replicate long (8 byte) scalar zero to be vector 3700 instruct Repl2L_zero(vecX dst, immL0 zero) %{ 3701 predicate(n->as_Vector()->length() == 2); 3702 match(Set dst (ReplicateL zero)); 3703 format %{ "pxor $dst,$dst\t! replicate2L zero" %} 3704 ins_encode %{ 3705 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3706 %} 3707 ins_pipe( fpu_reg_reg ); 3708 %} 3709 3710 instruct Repl4L_zero(vecY dst, immL0 zero) %{ 3711 predicate(n->as_Vector()->length() == 4); 3712 match(Set dst (ReplicateL zero)); 3713 format %{ "vpxor $dst,$dst,$dst\t! replicate4L zero" %} 3714 ins_encode %{ 3715 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3716 int vector_len = 1; 3717 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3718 %} 3719 ins_pipe( fpu_reg_reg ); 3720 %} 3721 3722 instruct Repl8L_zero(vecZ dst, immL0 zero) %{ 3723 predicate(n->as_Vector()->length() == 8); 3724 match(Set dst (ReplicateL zero)); 3725 format %{ "vpxor $dst k0,$dst,$dst\t! replicate8L zero" %} 3726 ins_encode %{ 3727 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 3728 int vector_len = 2; 3729 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3730 %} 3731 ins_pipe( fpu_reg_reg ); 3732 %} 3733 3734 // Replicate float (4 byte) scalar to be vector 3735 instruct Repl2F(vecD dst, regF src) %{ 3736 predicate(n->as_Vector()->length() == 2); 3737 match(Set dst (ReplicateF src)); 3738 format %{ "pshufd $dst,$dst,0x00\t! replicate2F" %} 3739 ins_encode %{ 3740 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3741 %} 3742 ins_pipe( fpu_reg_reg ); 3743 %} 3744 3745 instruct Repl4F(vecX dst, regF src) %{ 3746 predicate(n->as_Vector()->length() == 4); 3747 match(Set dst (ReplicateF src)); 3748 format %{ "pshufd $dst,$dst,0x00\t! replicate4F" %} 3749 ins_encode %{ 3750 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3751 %} 3752 ins_pipe( pipe_slow ); 3753 %} 3754 3755 instruct Repl8F(vecY dst, regF src) %{ 3756 predicate(n->as_Vector()->length() == 8); 3757 match(Set dst (ReplicateF src)); 3758 format %{ "pshufd $dst,$src,0x00\n\t" 3759 "vinsertf128h $dst,$dst,$dst\t! replicate8F" %} 3760 ins_encode %{ 3761 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3762 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3763 %} 3764 ins_pipe( pipe_slow ); 3765 %} 3766 3767 instruct Repl16F(vecZ dst, regF src) %{ 3768 predicate(n->as_Vector()->length() == 16); 3769 match(Set dst (ReplicateF src)); 3770 format %{ "pshufd $dst,$src,0x00\n\t" 3771 "vinsertf128h $dst,$dst,$dst\t! lower replicate8F\n\t" 3772 "vinsertf64x4h $dst k0,$dst,$dst\t! lower replicate8F" %} 3773 ins_encode %{ 3774 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3775 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3776 __ vinsertf64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3777 %} 3778 ins_pipe( pipe_slow ); 3779 %} 3780 3781 // Replicate float (4 byte) scalar zero to be vector 3782 instruct Repl2F_zero(vecD dst, immF0 zero) %{ 3783 predicate(n->as_Vector()->length() == 2); 3784 match(Set dst (ReplicateF zero)); 3785 format %{ "xorps $dst,$dst\t! replicate2F zero" %} 3786 ins_encode %{ 3787 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 3788 %} 3789 ins_pipe( fpu_reg_reg ); 3790 %} 3791 3792 instruct Repl4F_zero(vecX dst, immF0 zero) %{ 3793 predicate(n->as_Vector()->length() == 4); 3794 match(Set dst (ReplicateF zero)); 3795 format %{ "xorps $dst,$dst\t! replicate4F zero" %} 3796 ins_encode %{ 3797 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 3798 %} 3799 ins_pipe( fpu_reg_reg ); 3800 %} 3801 3802 instruct Repl8F_zero(vecY dst, immF0 zero) %{ 3803 predicate(n->as_Vector()->length() == 8); 3804 match(Set dst (ReplicateF zero)); 3805 format %{ "vxorps $dst,$dst,$dst\t! replicate8F zero" %} 3806 ins_encode %{ 3807 int vector_len = 1; 3808 __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3809 %} 3810 ins_pipe( fpu_reg_reg ); 3811 %} 3812 3813 instruct Repl16F_zero(vecZ dst, immF0 zero) %{ 3814 predicate(n->as_Vector()->length() == 16); 3815 match(Set dst (ReplicateF zero)); 3816 format %{ "vxorps $dst k0,$dst,$dst\t! replicate16F zero" %} 3817 ins_encode %{ 3818 int vector_len = 2; 3819 __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3820 %} 3821 ins_pipe( fpu_reg_reg ); 3822 %} 3823 3824 // Replicate double (8 bytes) scalar to be vector 3825 instruct Repl2D(vecX dst, regD src) %{ 3826 predicate(n->as_Vector()->length() == 2); 3827 match(Set dst (ReplicateD src)); 3828 format %{ "pshufd $dst,$src,0x44\t! replicate2D" %} 3829 ins_encode %{ 3830 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 3831 %} 3832 ins_pipe( pipe_slow ); 3833 %} 3834 3835 instruct Repl4D(vecY dst, regD src) %{ 3836 predicate(n->as_Vector()->length() == 4); 3837 match(Set dst (ReplicateD src)); 3838 format %{ "pshufd $dst,$src,0x44\n\t" 3839 "vinsertf128h $dst,$dst,$dst\t! replicate4D" %} 3840 ins_encode %{ 3841 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 3842 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3843 %} 3844 ins_pipe( pipe_slow ); 3845 %} 3846 3847 instruct Repl8D(vecZ dst, regD src) %{ 3848 predicate(n->as_Vector()->length() == 8); 3849 match(Set dst (ReplicateD src)); 3850 format %{ "pshufd $dst,$src,0x44\n\t" 3851 "vinsertf128h $dst,$dst,$dst\t! lower replicate4D\n\t" 3852 "vinsertf64x4h $dst k0,$dst,$dst\t! upper replicate4D" %} 3853 ins_encode %{ 3854 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 3855 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3856 __ vinsertf64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3857 %} 3858 ins_pipe( pipe_slow ); 3859 %} 3860 3861 // Replicate double (8 byte) scalar zero to be vector 3862 instruct Repl2D_zero(vecX dst, immD0 zero) %{ 3863 predicate(n->as_Vector()->length() == 2); 3864 match(Set dst (ReplicateD zero)); 3865 format %{ "xorpd $dst,$dst\t! replicate2D zero" %} 3866 ins_encode %{ 3867 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister); 3868 %} 3869 ins_pipe( fpu_reg_reg ); 3870 %} 3871 3872 instruct Repl4D_zero(vecY dst, immD0 zero) %{ 3873 predicate(n->as_Vector()->length() == 4); 3874 match(Set dst (ReplicateD zero)); 3875 format %{ "vxorpd $dst,$dst,$dst,vect256\t! replicate4D zero" %} 3876 ins_encode %{ 3877 int vector_len = 1; 3878 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3879 %} 3880 ins_pipe( fpu_reg_reg ); 3881 %} 3882 3883 instruct Repl8D_zero(vecZ dst, immD0 zero) %{ 3884 predicate(n->as_Vector()->length() == 8); 3885 match(Set dst (ReplicateD zero)); 3886 format %{ "vxorpd $dst k0,$dst,$dst,vect512\t! replicate8D zero" %} 3887 ins_encode %{ 3888 int vector_len = 2; 3889 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3890 %} 3891 ins_pipe( fpu_reg_reg ); 3892 %} 3893 3894 // ====================REDUCTION ARITHMETIC======================================= 3895 3896 instruct rsadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 3897 predicate(UseSSE > 2 && UseAVX == 0); 3898 match(Set dst (AddReductionVI src1 src2)); 3899 effect(TEMP tmp2, TEMP tmp); 3900 format %{ "movdqu $tmp2,$src2\n\t" 3901 "phaddd $tmp2,$tmp2\n\t" 3902 "movd $tmp,$src1\n\t" 3903 "paddd $tmp,$tmp2\n\t" 3904 "movd $dst,$tmp\t! add reduction2I" %} 3905 ins_encode %{ 3906 __ movdqu($tmp2$$XMMRegister, $src2$$XMMRegister); 3907 __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister); 3908 __ movdl($tmp$$XMMRegister, $src1$$Register); 3909 __ paddd($tmp$$XMMRegister, $tmp2$$XMMRegister); 3910 __ movdl($dst$$Register, $tmp$$XMMRegister); 3911 %} 3912 ins_pipe( pipe_slow ); 3913 %} 3914 3915 instruct rvadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 3916 predicate(UseAVX > 0 && UseAVX < 3); 3917 match(Set dst (AddReductionVI src1 src2)); 3918 effect(TEMP tmp, TEMP tmp2); 3919 format %{ "vphaddd $tmp,$src2,$src2\n\t" 3920 "movd $tmp2,$src1\n\t" 3921 "vpaddd $tmp2,$tmp2,$tmp\n\t" 3922 "movd $dst,$tmp2\t! add reduction2I" %} 3923 ins_encode %{ 3924 int vector_len = 0; 3925 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 3926 __ movdl($tmp2$$XMMRegister, $src1$$Register); 3927 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 3928 __ movdl($dst$$Register, $tmp2$$XMMRegister); 3929 %} 3930 ins_pipe( pipe_slow ); 3931 %} 3932 3933 instruct rvadd2I_reduction_reg_evex(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 3934 predicate(UseAVX > 2); 3935 match(Set dst (AddReductionVI src1 src2)); 3936 effect(TEMP tmp, TEMP tmp2); 3937 format %{ "pshufd $tmp2,$src2,0x1\n\t" 3938 "vpaddd $tmp,$src2,$tmp2\n\t" 3939 "movd $tmp2,$src1\n\t" 3940 "vpaddd $tmp2,$tmp,$tmp2\n\t" 3941 "movd $dst,$tmp2\t! add reduction2I" %} 3942 ins_encode %{ 3943 int vector_len = 0; 3944 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 3945 __ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 3946 __ movdl($tmp2$$XMMRegister, $src1$$Register); 3947 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 3948 __ movdl($dst$$Register, $tmp2$$XMMRegister); 3949 %} 3950 ins_pipe( pipe_slow ); 3951 %} 3952 3953 instruct rsadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 3954 predicate(UseSSE > 2 && UseAVX == 0); 3955 match(Set dst (AddReductionVI src1 src2)); 3956 effect(TEMP tmp2, TEMP tmp); 3957 format %{ "movdqu $tmp2,$src2\n\t" 3958 "phaddd $tmp2,$tmp2\n\t" 3959 "phaddd $tmp2,$tmp2\n\t" 3960 "movd $tmp,$src1\n\t" 3961 "paddd $tmp,$tmp2\n\t" 3962 "movd $dst,$tmp\t! add reduction4I" %} 3963 ins_encode %{ 3964 __ movdqu($tmp2$$XMMRegister, $src2$$XMMRegister); 3965 __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister); 3966 __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister); 3967 __ movdl($tmp$$XMMRegister, $src1$$Register); 3968 __ paddd($tmp$$XMMRegister, $tmp2$$XMMRegister); 3969 __ movdl($dst$$Register, $tmp$$XMMRegister); 3970 %} 3971 ins_pipe( pipe_slow ); 3972 %} 3973 3974 instruct rvadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 3975 predicate(UseAVX > 0 && UseAVX < 3); 3976 match(Set dst (AddReductionVI src1 src2)); 3977 effect(TEMP tmp, TEMP tmp2); 3978 format %{ "vphaddd $tmp,$src2,$src2\n\t" 3979 "vphaddd $tmp,$tmp,$tmp2\n\t" 3980 "movd $tmp2,$src1\n\t" 3981 "vpaddd $tmp2,$tmp2,$tmp\n\t" 3982 "movd $dst,$tmp2\t! add reduction4I" %} 3983 ins_encode %{ 3984 int vector_len = 0; 3985 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 3986 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 3987 __ movdl($tmp2$$XMMRegister, $src1$$Register); 3988 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 3989 __ movdl($dst$$Register, $tmp2$$XMMRegister); 3990 %} 3991 ins_pipe( pipe_slow ); 3992 %} 3993 3994 instruct rvadd4I_reduction_reg_evex(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 3995 predicate(UseAVX > 2); 3996 match(Set dst (AddReductionVI src1 src2)); 3997 effect(TEMP tmp, TEMP tmp2); 3998 format %{ "pshufd $tmp2,$src2,0xE\n\t" 3999 "vpaddd $tmp,$src2,$tmp2\n\t" 4000 "pshufd $tmp2,$tmp,0x1\n\t" 4001 "vpaddd $tmp,$tmp,$tmp2\n\t" 4002 "movd $tmp2,$src1\n\t" 4003 "vpaddd $tmp2,$tmp,$tmp2\n\t" 4004 "movd $dst,$tmp2\t! add reduction4I" %} 4005 ins_encode %{ 4006 int vector_len = 0; 4007 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 4008 __ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4009 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 4010 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4011 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4012 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4013 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4014 %} 4015 ins_pipe( pipe_slow ); 4016 %} 4017 4018 instruct rvadd8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 4019 predicate(UseAVX > 0 && UseAVX < 3); 4020 match(Set dst (AddReductionVI src1 src2)); 4021 effect(TEMP tmp, TEMP tmp2); 4022 format %{ "vphaddd $tmp,$src2,$src2\n\t" 4023 "vphaddd $tmp,$tmp,$tmp2\n\t" 4024 "vextracti128 $tmp2,$tmp\n\t" 4025 "vpaddd $tmp,$tmp,$tmp2\n\t" 4026 "movd $tmp2,$src1\n\t" 4027 "vpaddd $tmp2,$tmp2,$tmp\n\t" 4028 "movd $dst,$tmp2\t! add reduction8I" %} 4029 ins_encode %{ 4030 int vector_len = 1; 4031 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 4032 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4033 __ vextracti128h($tmp2$$XMMRegister, $tmp$$XMMRegister); 4034 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4035 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4036 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4037 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4038 %} 4039 ins_pipe( pipe_slow ); 4040 %} 4041 4042 instruct rvadd8I_reduction_reg_evex(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 4043 predicate(UseAVX > 2); 4044 match(Set dst (AddReductionVI src1 src2)); 4045 effect(TEMP tmp, TEMP tmp2); 4046 format %{ "vextracti128 $tmp,$src2\n\t" 4047 "vpaddd $tmp,$tmp,$src2\n\t" 4048 "pshufd $tmp2,$tmp,0xE\n\t" 4049 "vpaddd $tmp,$tmp,$tmp2\n\t" 4050 "pshufd $tmp2,$tmp,0x1\n\t" 4051 "vpaddd $tmp,$tmp,$tmp2\n\t" 4052 "movd $tmp2,$src1\n\t" 4053 "vpaddd $tmp2,$tmp,$tmp2\n\t" 4054 "movd $dst,$tmp2\t! add reduction8I" %} 4055 ins_encode %{ 4056 int vector_len = 0; 4057 __ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister); 4058 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 4059 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 4060 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4061 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 4062 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4063 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4064 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4065 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4066 %} 4067 ins_pipe( pipe_slow ); 4068 %} 4069 4070 instruct rvadd16I_reduction_reg_evex(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ 4071 predicate(UseAVX > 2); 4072 match(Set dst (AddReductionVI src1 src2)); 4073 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 4074 format %{ "vextracti64x4 $tmp3,$src2\n\t" 4075 "vpaddd $tmp3,$tmp3,$src2\n\t" 4076 "vextracti128 $tmp,$tmp3\n\t" 4077 "vpaddd $tmp,$tmp,$tmp3\n\t" 4078 "pshufd $tmp2,$tmp,0xE\n\t" 4079 "vpaddd $tmp,$tmp,$tmp2\n\t" 4080 "pshufd $tmp2,$tmp,0x1\n\t" 4081 "vpaddd $tmp,$tmp,$tmp2\n\t" 4082 "movd $tmp2,$src1\n\t" 4083 "vpaddd $tmp2,$tmp,$tmp2\n\t" 4084 "movd $dst,$tmp2\t! mul reduction16I" %} 4085 ins_encode %{ 4086 __ vextracti64x4h($tmp3$$XMMRegister, $src2$$XMMRegister); 4087 __ vpaddd($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); 4088 __ vextracti128h($tmp$$XMMRegister, $tmp3$$XMMRegister); 4089 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); 4090 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 4091 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4092 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 4093 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4094 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4095 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4096 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4097 %} 4098 ins_pipe( pipe_slow ); 4099 %} 4100 4101 #ifdef _LP64 4102 instruct rvadd2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, regF tmp, regF tmp2) %{ 4103 predicate(UseAVX > 2); 4104 match(Set dst (AddReductionVL src1 src2)); 4105 effect(TEMP tmp, TEMP tmp2); 4106 format %{ "pshufd $tmp2,$src2,0xE\n\t" 4107 "vpaddq $tmp,$src2,$tmp2\n\t" 4108 "movdq $tmp2,$src1\n\t" 4109 "vpaddq $tmp2,$tmp,$tmp2\n\t" 4110 "movdq $dst,$tmp2\t! add reduction2L" %} 4111 ins_encode %{ 4112 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 4113 __ vpaddq($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, 0); 4114 __ movdq($tmp2$$XMMRegister, $src1$$Register); 4115 __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4116 __ movdq($dst$$Register, $tmp2$$XMMRegister); 4117 %} 4118 ins_pipe( pipe_slow ); 4119 %} 4120 4121 instruct rvadd4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{ 4122 predicate(UseAVX > 2); 4123 match(Set dst (AddReductionVL src1 src2)); 4124 effect(TEMP tmp, TEMP tmp2); 4125 format %{ "vextracti64x2 $tmp,$src2, 0x1\n\t" 4126 "vpaddq $tmp2,$tmp,$src2\n\t" 4127 "pshufd $tmp,$tmp2,0xE\n\t" 4128 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4129 "movdq $tmp,$src1\n\t" 4130 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4131 "movdq $dst,$tmp2\t! add reduction4L" %} 4132 ins_encode %{ 4133 __ vextracti64x2h($tmp$$XMMRegister, $src2$$XMMRegister, 0x1); 4134 __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); 4135 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 4136 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4137 __ movdq($tmp$$XMMRegister, $src1$$Register); 4138 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4139 __ movdq($dst$$Register, $tmp2$$XMMRegister); 4140 %} 4141 ins_pipe( pipe_slow ); 4142 %} 4143 4144 instruct rvadd8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{ 4145 predicate(UseAVX > 2); 4146 match(Set dst (AddReductionVL src1 src2)); 4147 effect(TEMP tmp, TEMP tmp2); 4148 format %{ "vextracti64x4 $tmp2,$src2\n\t" 4149 "vpaddq $tmp2,$tmp2,$src2\n\t" 4150 "vextracti128 $tmp,$tmp2\n\t" 4151 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4152 "pshufd $tmp,$tmp2,0xE\n\t" 4153 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4154 "movdq $tmp,$src1\n\t" 4155 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4156 "movdq $dst,$tmp2\t! add reduction8L" %} 4157 ins_encode %{ 4158 __ vextracti64x4h($tmp2$$XMMRegister, $src2$$XMMRegister); 4159 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 4160 __ vextracti128h($tmp$$XMMRegister, $tmp2$$XMMRegister); 4161 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4162 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 4163 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4164 __ movdq($tmp$$XMMRegister, $src1$$Register); 4165 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4166 __ movdq($dst$$Register, $tmp2$$XMMRegister); 4167 %} 4168 ins_pipe( pipe_slow ); 4169 %} 4170 #endif 4171 4172 instruct rsadd2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{ 4173 predicate(UseSSE >= 1 && UseAVX == 0); 4174 match(Set dst (AddReductionVF src1 src2)); 4175 effect(TEMP tmp, TEMP tmp2); 4176 format %{ "movdqu $tmp,$src1\n\t" 4177 "addss $tmp,$src2\n\t" 4178 "pshufd $tmp2,$src2,0x01\n\t" 4179 "addss $tmp,$tmp2\n\t" 4180 "movdqu $dst,$tmp\t! add reduction2F" %} 4181 ins_encode %{ 4182 __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); 4183 __ addss($tmp$$XMMRegister, $src2$$XMMRegister); 4184 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01); 4185 __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister); 4186 __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister); 4187 %} 4188 ins_pipe( pipe_slow ); 4189 %} 4190 4191 instruct rvadd2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{ 4192 predicate(UseAVX > 0); 4193 match(Set dst (AddReductionVF src1 src2)); 4194 effect(TEMP tmp2, TEMP tmp); 4195 format %{ "vaddss $tmp2,$src1,$src2\n\t" 4196 "pshufd $tmp,$src2,0x01\n\t" 4197 "vaddss $dst,$tmp2,$tmp\t! add reduction2F" %} 4198 ins_encode %{ 4199 __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 4200 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 4201 __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4202 %} 4203 ins_pipe( pipe_slow ); 4204 %} 4205 4206 instruct rsadd4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{ 4207 predicate(UseSSE >= 1 && UseAVX == 0); 4208 match(Set dst (AddReductionVF src1 src2)); 4209 effect(TEMP tmp, TEMP tmp2); 4210 format %{ "movdqu $tmp,$src1\n\t" 4211 "addss $tmp,$src2\n\t" 4212 "pshufd $tmp2,$src2,0x01\n\t" 4213 "addss $tmp,$tmp2\n\t" 4214 "pshufd $tmp2,$src2,0x02\n\t" 4215 "addss $tmp,$tmp2\n\t" 4216 "pshufd $tmp2,$src2,0x03\n\t" 4217 "addss $tmp,$tmp2\n\t" 4218 "movdqu $dst,$tmp\t! add reduction4F" %} 4219 ins_encode %{ 4220 __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); 4221 __ addss($tmp$$XMMRegister, $src2$$XMMRegister); 4222 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01); 4223 __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister); 4224 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x02); 4225 __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister); 4226 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x03); 4227 __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister); 4228 __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister); 4229 %} 4230 ins_pipe( pipe_slow ); 4231 %} 4232 4233 instruct rvadd4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{ 4234 predicate(UseAVX > 0); 4235 match(Set dst (AddReductionVF src1 src2)); 4236 effect(TEMP tmp, TEMP tmp2); 4237 format %{ "vaddss $tmp2,$src1,$src2\n\t" 4238 "pshufd $tmp,$src2,0x01\n\t" 4239 "vaddss $tmp2,$tmp2,$tmp\n\t" 4240 "pshufd $tmp,$src2,0x02\n\t" 4241 "vaddss $tmp2,$tmp2,$tmp\n\t" 4242 "pshufd $tmp,$src2,0x03\n\t" 4243 "vaddss $dst,$tmp2,$tmp\t! add reduction4F" %} 4244 ins_encode %{ 4245 __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 4246 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 4247 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4248 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 4249 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4250 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 4251 __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4252 %} 4253 ins_pipe( pipe_slow ); 4254 %} 4255 4256 instruct radd8F_reduction_reg(regF dst, regF src1, vecY src2, regF tmp, regF tmp2, regF tmp3) %{ 4257 predicate(UseAVX > 0); 4258 match(Set dst (AddReductionVF src1 src2)); 4259 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 4260 format %{ "vaddss $tmp2,$src1,$src2\n\t" 4261 "pshufd $tmp,$src2,0x01\n\t" 4262 "vaddss $tmp2,$tmp2,$tmp\n\t" 4263 "pshufd $tmp,$src2,0x02\n\t" 4264 "vaddss $tmp2,$tmp2,$tmp\n\t" 4265 "pshufd $tmp,$src2,0x03\n\t" 4266 "vaddss $tmp2,$tmp2,$tmp\n\t" 4267 "vextractf128 $tmp3,$src2\n\t" 4268 "vaddss $tmp2,$tmp2,$tmp3\n\t" 4269 "pshufd $tmp,$tmp3,0x01\n\t" 4270 "vaddss $tmp2,$tmp2,$tmp\n\t" 4271 "pshufd $tmp,$tmp3,0x02\n\t" 4272 "vaddss $tmp2,$tmp2,$tmp\n\t" 4273 "pshufd $tmp,$tmp3,0x03\n\t" 4274 "vaddss $dst,$tmp2,$tmp\t! add reduction8F" %} 4275 ins_encode %{ 4276 __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 4277 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 4278 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4279 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 4280 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4281 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 4282 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4283 __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister); 4284 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 4285 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); 4286 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4287 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); 4288 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4289 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); 4290 __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4291 %} 4292 ins_pipe( pipe_slow ); 4293 %} 4294 4295 instruct radd16F_reduction_reg(regF dst, regF src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ 4296 predicate(UseAVX > 2); 4297 match(Set dst (AddReductionVF src1 src2)); 4298 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 4299 format %{ "vaddss $tmp2,$src1,$src2\n\t" 4300 "pshufd $tmp,$src2,0x01\n\t" 4301 "vaddss $tmp2,$tmp2,$tmp\n\t" 4302 "pshufd $tmp,$src2,0x02\n\t" 4303 "vaddss $tmp2,$tmp2,$tmp\n\t" 4304 "pshufd $tmp,$src2,0x03\n\t" 4305 "vaddss $tmp2,$tmp2,$tmp\n\t" 4306 "vextractf64x2 $tmp3,$src2, 0x1\n\t" 4307 "vaddss $tmp2,$tmp2,$tmp3\n\t" 4308 "pshufd $tmp,$tmp3,0x01\n\t" 4309 "vaddss $tmp2,$tmp2,$tmp\n\t" 4310 "pshufd $tmp,$tmp3,0x02\n\t" 4311 "vaddss $tmp2,$tmp2,$tmp\n\t" 4312 "pshufd $tmp,$tmp3,0x03\n\t" 4313 "vaddss $tmp2,$tmp2,$tmp\n\t" 4314 "vextractf64x2 $tmp3,$src2, 0x2\n\t" 4315 "vaddss $tmp2,$tmp2,$tmp3\n\t" 4316 "pshufd $tmp,$tmp3,0x01\n\t" 4317 "vaddss $tmp2,$tmp2,$tmp\n\t" 4318 "pshufd $tmp,$tmp3,0x02\n\t" 4319 "vaddss $tmp2,$tmp2,$tmp\n\t" 4320 "pshufd $tmp,$tmp3,0x03\n\t" 4321 "vaddss $tmp2,$tmp2,$tmp\n\t" 4322 "vextractf64x2 $tmp3,$src2, 0x3\n\t" 4323 "vaddss $tmp2,$tmp2,$tmp3\n\t" 4324 "pshufd $tmp,$tmp3,0x01\n\t" 4325 "vaddss $tmp2,$tmp2,$tmp\n\t" 4326 "pshufd $tmp,$tmp3,0x02\n\t" 4327 "vaddss $tmp2,$tmp2,$tmp\n\t" 4328 "pshufd $tmp,$tmp3,0x03\n\t" 4329 "vaddss $dst,$tmp2,$tmp\t! add reduction16F" %} 4330 ins_encode %{ 4331 __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 4332 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 4333 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4334 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 4335 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4336 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 4337 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4338 __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1); 4339 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 4340 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); 4341 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4342 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); 4343 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4344 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); 4345 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4346 __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x2); 4347 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 4348 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); 4349 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4350 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); 4351 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4352 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); 4353 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4354 __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x3); 4355 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 4356 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); 4357 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4358 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); 4359 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4360 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); 4361 __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4362 %} 4363 ins_pipe( pipe_slow ); 4364 %} 4365 4366 instruct rsadd2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp) %{ 4367 predicate(UseSSE >= 1 && UseAVX == 0); 4368 match(Set dst (AddReductionVD src1 src2)); 4369 effect(TEMP tmp, TEMP dst); 4370 format %{ "movdqu $tmp,$src1\n\t" 4371 "addsd $tmp,$src2\n\t" 4372 "pshufd $dst,$src2,0xE\n\t" 4373 "addsd $dst,$tmp\t! add reduction2D" %} 4374 ins_encode %{ 4375 __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); 4376 __ addsd($tmp$$XMMRegister, $src2$$XMMRegister); 4377 __ pshufd($dst$$XMMRegister, $src2$$XMMRegister, 0xE); 4378 __ addsd($dst$$XMMRegister, $tmp$$XMMRegister); 4379 %} 4380 ins_pipe( pipe_slow ); 4381 %} 4382 4383 instruct rvadd2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp, regD tmp2) %{ 4384 predicate(UseAVX > 0); 4385 match(Set dst (AddReductionVD src1 src2)); 4386 effect(TEMP tmp, TEMP tmp2); 4387 format %{ "vaddsd $tmp2,$src1,$src2\n\t" 4388 "pshufd $tmp,$src2,0xE\n\t" 4389 "vaddsd $dst,$tmp2,$tmp\t! add reduction2D" %} 4390 ins_encode %{ 4391 __ vaddsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 4392 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 4393 __ vaddsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4394 %} 4395 ins_pipe( pipe_slow ); 4396 %} 4397 4398 instruct rvadd4D_reduction_reg(regD dst, regD src1, vecY src2, regD tmp, regD tmp2, regD tmp3) %{ 4399 predicate(UseAVX > 0); 4400 match(Set dst (AddReductionVD src1 src2)); 4401 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 4402 format %{ "vaddsd $tmp2,$src1,$src2\n\t" 4403 "pshufd $tmp,$src2,0xE\n\t" 4404 "vaddsd $tmp2,$tmp2,$tmp\n\t" 4405 "vextractf128 $tmp3,$src2\n\t" 4406 "vaddsd $tmp2,$tmp2,$tmp3\n\t" 4407 "pshufd $tmp,$tmp3,0xE\n\t" 4408 "vaddsd $dst,$tmp2,$tmp\t! add reduction4D" %} 4409 ins_encode %{ 4410 __ vaddsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 4411 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 4412 __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4413 __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister); 4414 __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 4415 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); 4416 __ vaddsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4417 %} 4418 ins_pipe( pipe_slow ); 4419 %} 4420 4421 instruct rvadd8D_reduction_reg(regD dst, regD src1, vecZ src2, regD tmp, regD tmp2, regD tmp3) %{ 4422 predicate(UseAVX > 2); 4423 match(Set dst (AddReductionVD src1 src2)); 4424 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 4425 format %{ "vaddsd $tmp2,$src1,$src2\n\t" 4426 "pshufd $tmp,$src2,0xE\n\t" 4427 "vaddsd $tmp2,$tmp2,$tmp\n\t" 4428 "vextractf64x2 $tmp3,$src2, 0x1\n\t" 4429 "vaddsd $tmp2,$tmp2,$tmp3\n\t" 4430 "pshufd $tmp,$tmp3,0xE\n\t" 4431 "vaddsd $tmp2,$tmp2,$tmp\n\t" 4432 "vextractf64x2 $tmp3,$src2, 0x2\n\t" 4433 "vaddsd $tmp2,$tmp2,$tmp3\n\t" 4434 "pshufd $tmp,$tmp3,0xE\n\t" 4435 "vaddsd $tmp2,$tmp2,$tmp\n\t" 4436 "vextractf64x2 $tmp3,$src2, 0x3\n\t" 4437 "vaddsd $tmp2,$tmp2,$tmp3\n\t" 4438 "pshufd $tmp,$tmp3,0xE\n\t" 4439 "vaddsd $dst,$tmp2,$tmp\t! add reduction8D" %} 4440 ins_encode %{ 4441 __ vaddsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 4442 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 4443 __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4444 __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1); 4445 __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 4446 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); 4447 __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4448 __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x2); 4449 __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 4450 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); 4451 __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4452 __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x3); 4453 __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 4454 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); 4455 __ vaddsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4456 %} 4457 ins_pipe( pipe_slow ); 4458 %} 4459 4460 instruct rsmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 4461 predicate(UseSSE > 3 && UseAVX == 0); 4462 match(Set dst (MulReductionVI src1 src2)); 4463 effect(TEMP tmp, TEMP tmp2); 4464 format %{ "pshufd $tmp2,$src2,0x1\n\t" 4465 "pmulld $tmp2,$src2\n\t" 4466 "movd $tmp,$src1\n\t" 4467 "pmulld $tmp2,$tmp\n\t" 4468 "movd $dst,$tmp2\t! mul reduction2I" %} 4469 ins_encode %{ 4470 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 4471 __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister); 4472 __ movdl($tmp$$XMMRegister, $src1$$Register); 4473 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 4474 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4475 %} 4476 ins_pipe( pipe_slow ); 4477 %} 4478 4479 instruct rvmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 4480 predicate(UseAVX > 0); 4481 match(Set dst (MulReductionVI src1 src2)); 4482 effect(TEMP tmp, TEMP tmp2); 4483 format %{ "pshufd $tmp2,$src2,0x1\n\t" 4484 "vpmulld $tmp,$src2,$tmp2\n\t" 4485 "movd $tmp2,$src1\n\t" 4486 "vpmulld $tmp2,$tmp,$tmp2\n\t" 4487 "movd $dst,$tmp2\t! mul reduction2I" %} 4488 ins_encode %{ 4489 int vector_len = 0; 4490 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 4491 __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4492 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4493 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4494 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4495 %} 4496 ins_pipe( pipe_slow ); 4497 %} 4498 4499 instruct rsmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 4500 predicate(UseSSE > 3 && UseAVX == 0); 4501 match(Set dst (MulReductionVI src1 src2)); 4502 effect(TEMP tmp, TEMP tmp2); 4503 format %{ "pshufd $tmp2,$src2,0xE\n\t" 4504 "pmulld $tmp2,$src2\n\t" 4505 "pshufd $tmp,$tmp2,0x1\n\t" 4506 "pmulld $tmp2,$tmp\n\t" 4507 "movd $tmp,$src1\n\t" 4508 "pmulld $tmp2,$tmp\n\t" 4509 "movd $dst,$tmp2\t! mul reduction4I" %} 4510 ins_encode %{ 4511 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 4512 __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister); 4513 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x1); 4514 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 4515 __ movdl($tmp$$XMMRegister, $src1$$Register); 4516 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 4517 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4518 %} 4519 ins_pipe( pipe_slow ); 4520 %} 4521 4522 instruct rvmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 4523 predicate(UseAVX > 0); 4524 match(Set dst (MulReductionVI src1 src2)); 4525 effect(TEMP tmp, TEMP tmp2); 4526 format %{ "pshufd $tmp2,$src2,0xE\n\t" 4527 "vpmulld $tmp,$src2,$tmp2\n\t" 4528 "pshufd $tmp2,$tmp,0x1\n\t" 4529 "vpmulld $tmp,$tmp,$tmp2\n\t" 4530 "movd $tmp2,$src1\n\t" 4531 "vpmulld $tmp2,$tmp,$tmp2\n\t" 4532 "movd $dst,$tmp2\t! mul reduction4I" %} 4533 ins_encode %{ 4534 int vector_len = 0; 4535 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 4536 __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4537 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 4538 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4539 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4540 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4541 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4542 %} 4543 ins_pipe( pipe_slow ); 4544 %} 4545 4546 instruct rvmul8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 4547 predicate(UseAVX > 0); 4548 match(Set dst (MulReductionVI src1 src2)); 4549 effect(TEMP tmp, TEMP tmp2); 4550 format %{ "vextracti128 $tmp,$src2\n\t" 4551 "vpmulld $tmp,$tmp,$src2\n\t" 4552 "pshufd $tmp2,$tmp,0xE\n\t" 4553 "vpmulld $tmp,$tmp,$tmp2\n\t" 4554 "pshufd $tmp2,$tmp,0x1\n\t" 4555 "vpmulld $tmp,$tmp,$tmp2\n\t" 4556 "movd $tmp2,$src1\n\t" 4557 "vpmulld $tmp2,$tmp,$tmp2\n\t" 4558 "movd $dst,$tmp2\t! mul reduction8I" %} 4559 ins_encode %{ 4560 int vector_len = 0; 4561 __ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister); 4562 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 4563 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 4564 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4565 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 4566 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4567 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4568 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4569 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4570 %} 4571 ins_pipe( pipe_slow ); 4572 %} 4573 4574 instruct rvmul16I_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ 4575 predicate(UseAVX > 2); 4576 match(Set dst (MulReductionVI src1 src2)); 4577 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 4578 format %{ "vextracti64x4 $tmp3,$src2\n\t" 4579 "vpmulld $tmp3,$tmp3,$src2\n\t" 4580 "vextracti128 $tmp,$tmp3\n\t" 4581 "vpmulld $tmp,$tmp,$src2\n\t" 4582 "pshufd $tmp2,$tmp,0xE\n\t" 4583 "vpmulld $tmp,$tmp,$tmp2\n\t" 4584 "pshufd $tmp2,$tmp,0x1\n\t" 4585 "vpmulld $tmp,$tmp,$tmp2\n\t" 4586 "movd $tmp2,$src1\n\t" 4587 "vpmulld $tmp2,$tmp,$tmp2\n\t" 4588 "movd $dst,$tmp2\t! mul reduction16I" %} 4589 ins_encode %{ 4590 __ vextracti64x4h($tmp3$$XMMRegister, $src2$$XMMRegister); 4591 __ vpmulld($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); 4592 __ vextracti128h($tmp$$XMMRegister, $tmp3$$XMMRegister); 4593 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); 4594 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 4595 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4596 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 4597 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4598 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4599 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4600 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4601 %} 4602 ins_pipe( pipe_slow ); 4603 %} 4604 4605 #ifdef _LP64 4606 instruct rvmul2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, regF tmp, regF tmp2) %{ 4607 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 4608 match(Set dst (MulReductionVL src1 src2)); 4609 effect(TEMP tmp, TEMP tmp2); 4610 format %{ "pshufd $tmp2,$src2,0xE\n\t" 4611 "vpmullq $tmp,$src2,$tmp2\n\t" 4612 "movdq $tmp2,$src1\n\t" 4613 "vpmullq $tmp2,$tmp,$tmp2\n\t" 4614 "movdq $dst,$tmp2\t! mul reduction2L" %} 4615 ins_encode %{ 4616 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 4617 __ vpmullq($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, 0); 4618 __ movdq($tmp2$$XMMRegister, $src1$$Register); 4619 __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4620 __ movdq($dst$$Register, $tmp2$$XMMRegister); 4621 %} 4622 ins_pipe( pipe_slow ); 4623 %} 4624 4625 instruct rvmul4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{ 4626 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 4627 match(Set dst (MulReductionVL src1 src2)); 4628 effect(TEMP tmp, TEMP tmp2); 4629 format %{ "vextracti64x2 $tmp,$src2, 0x1\n\t" 4630 "vpmullq $tmp2,$tmp,$src2\n\t" 4631 "pshufd $tmp,$tmp2,0xE\n\t" 4632 "vpmullq $tmp2,$tmp2,$tmp\n\t" 4633 "movdq $tmp,$src1\n\t" 4634 "vpmullq $tmp2,$tmp2,$tmp\n\t" 4635 "movdq $dst,$tmp2\t! mul reduction4L" %} 4636 ins_encode %{ 4637 __ vextracti64x2h($tmp$$XMMRegister, $src2$$XMMRegister, 0x1); 4638 __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); 4639 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 4640 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4641 __ movdq($tmp$$XMMRegister, $src1$$Register); 4642 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4643 __ movdq($dst$$Register, $tmp2$$XMMRegister); 4644 %} 4645 ins_pipe( pipe_slow ); 4646 %} 4647 4648 instruct rvmul8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{ 4649 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 4650 match(Set dst (MulReductionVL src1 src2)); 4651 effect(TEMP tmp, TEMP tmp2); 4652 format %{ "vextracti64x4 $tmp2,$src2\n\t" 4653 "vpmullq $tmp2,$tmp2,$src2\n\t" 4654 "vextracti128 $tmp,$tmp2\n\t" 4655 "vpmullq $tmp2,$tmp2,$tmp\n\t" 4656 "pshufd $tmp,$tmp2,0xE\n\t" 4657 "vpmullq $tmp2,$tmp2,$tmp\n\t" 4658 "movdq $tmp,$src1\n\t" 4659 "vpmullq $tmp2,$tmp2,$tmp\n\t" 4660 "movdq $dst,$tmp2\t! mul reduction8L" %} 4661 ins_encode %{ 4662 __ vextracti64x4h($tmp2$$XMMRegister, $src2$$XMMRegister); 4663 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 4664 __ vextracti128h($tmp$$XMMRegister, $tmp2$$XMMRegister); 4665 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4666 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 4667 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4668 __ movdq($tmp$$XMMRegister, $src1$$Register); 4669 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4670 __ movdq($dst$$Register, $tmp2$$XMMRegister); 4671 %} 4672 ins_pipe( pipe_slow ); 4673 %} 4674 #endif 4675 4676 instruct rsmul2F_reduction(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{ 4677 predicate(UseSSE >= 1 && UseAVX == 0); 4678 match(Set dst (MulReductionVF src1 src2)); 4679 effect(TEMP tmp, TEMP tmp2); 4680 format %{ "movdqu $tmp,$src1\n\t" 4681 "mulss $tmp,$src2\n\t" 4682 "pshufd $tmp2,$src2,0x01\n\t" 4683 "mulss $tmp,$tmp2\n\t" 4684 "movdqu $dst,$tmp\t! mul reduction2F" %} 4685 ins_encode %{ 4686 __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); 4687 __ mulss($tmp$$XMMRegister, $src2$$XMMRegister); 4688 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01); 4689 __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister); 4690 __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister); 4691 %} 4692 ins_pipe( pipe_slow ); 4693 %} 4694 4695 instruct rvmul2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{ 4696 predicate(UseAVX > 0); 4697 match(Set dst (MulReductionVF src1 src2)); 4698 effect(TEMP tmp, TEMP tmp2); 4699 format %{ "vmulss $tmp2,$src1,$src2\n\t" 4700 "pshufd $tmp,$src2,0x01\n\t" 4701 "vmulss $dst,$tmp2,$tmp\t! mul reduction2F" %} 4702 ins_encode %{ 4703 __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 4704 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 4705 __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4706 %} 4707 ins_pipe( pipe_slow ); 4708 %} 4709 4710 instruct rsmul4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{ 4711 predicate(UseSSE >= 1 && UseAVX == 0); 4712 match(Set dst (MulReductionVF src1 src2)); 4713 effect(TEMP tmp, TEMP tmp2); 4714 format %{ "movdqu $tmp,$src1\n\t" 4715 "mulss $tmp,$src2\n\t" 4716 "pshufd $tmp2,$src2,0x01\n\t" 4717 "mulss $tmp,$tmp2\n\t" 4718 "pshufd $tmp2,$src2,0x02\n\t" 4719 "mulss $tmp,$tmp2\n\t" 4720 "pshufd $tmp2,$src2,0x03\n\t" 4721 "mulss $tmp,$tmp2\n\t" 4722 "movdqu $dst,$tmp\t! mul reduction4F" %} 4723 ins_encode %{ 4724 __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); 4725 __ mulss($tmp$$XMMRegister, $src2$$XMMRegister); 4726 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01); 4727 __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister); 4728 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x02); 4729 __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister); 4730 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x03); 4731 __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister); 4732 __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister); 4733 %} 4734 ins_pipe( pipe_slow ); 4735 %} 4736 4737 instruct rvmul4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{ 4738 predicate(UseAVX > 0); 4739 match(Set dst (MulReductionVF src1 src2)); 4740 effect(TEMP tmp, TEMP tmp2); 4741 format %{ "vmulss $tmp2,$src1,$src2\n\t" 4742 "pshufd $tmp,$src2,0x01\n\t" 4743 "vmulss $tmp2,$tmp2,$tmp\n\t" 4744 "pshufd $tmp,$src2,0x02\n\t" 4745 "vmulss $tmp2,$tmp2,$tmp\n\t" 4746 "pshufd $tmp,$src2,0x03\n\t" 4747 "vmulss $dst,$tmp2,$tmp\t! mul reduction4F" %} 4748 ins_encode %{ 4749 __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 4750 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 4751 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4752 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 4753 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4754 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 4755 __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4756 %} 4757 ins_pipe( pipe_slow ); 4758 %} 4759 4760 instruct rvmul8F_reduction_reg(regF dst, regF src1, vecY src2, regF tmp, regF tmp2, regF tmp3) %{ 4761 predicate(UseAVX > 0); 4762 match(Set dst (MulReductionVF src1 src2)); 4763 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 4764 format %{ "vmulss $tmp2,$src1,$src2\n\t" 4765 "pshufd $tmp,$src2,0x01\n\t" 4766 "vmulss $tmp2,$tmp2,$tmp\n\t" 4767 "pshufd $tmp,$src2,0x02\n\t" 4768 "vmulss $tmp2,$tmp2,$tmp\n\t" 4769 "pshufd $tmp,$src2,0x03\n\t" 4770 "vmulss $tmp2,$tmp2,$tmp\n\t" 4771 "vextractf128 $tmp3,$src2\n\t" 4772 "vmulss $tmp2,$tmp2,$tmp3\n\t" 4773 "pshufd $tmp,$tmp3,0x01\n\t" 4774 "vmulss $tmp2,$tmp2,$tmp\n\t" 4775 "pshufd $tmp,$tmp3,0x02\n\t" 4776 "vmulss $tmp2,$tmp2,$tmp\n\t" 4777 "pshufd $tmp,$tmp3,0x03\n\t" 4778 "vmulss $dst,$tmp2,$tmp\t! mul reduction8F" %} 4779 ins_encode %{ 4780 __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 4781 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 4782 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4783 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 4784 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4785 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 4786 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4787 __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister); 4788 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 4789 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); 4790 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4791 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); 4792 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4793 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); 4794 __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4795 %} 4796 ins_pipe( pipe_slow ); 4797 %} 4798 4799 instruct rvmul16F_reduction_reg(regF dst, regF src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ 4800 predicate(UseAVX > 2); 4801 match(Set dst (MulReductionVF src1 src2)); 4802 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 4803 format %{ "vmulss $tmp2,$src1,$src2\n\t" 4804 "pshufd $tmp,$src2,0x01\n\t" 4805 "vmulss $tmp2,$tmp2,$tmp\n\t" 4806 "pshufd $tmp,$src2,0x02\n\t" 4807 "vmulss $tmp2,$tmp2,$tmp\n\t" 4808 "pshufd $tmp,$src2,0x03\n\t" 4809 "vmulss $tmp2,$tmp2,$tmp\n\t" 4810 "vextractf32x4 $tmp3,$src2, 0x1\n\t" 4811 "vmulss $tmp2,$tmp2,$tmp3\n\t" 4812 "pshufd $tmp,$tmp3,0x01\n\t" 4813 "vmulss $tmp2,$tmp2,$tmp\n\t" 4814 "pshufd $tmp,$tmp3,0x02\n\t" 4815 "vmulss $tmp2,$tmp2,$tmp\n\t" 4816 "pshufd $tmp,$tmp3,0x03\n\t" 4817 "vmulss $tmp2,$tmp2,$tmp\n\t" 4818 "vextractf32x4 $tmp3,$src2, 0x2\n\t" 4819 "vmulss $tmp2,$tmp2,$tmp3\n\t" 4820 "pshufd $tmp,$tmp3,0x01\n\t" 4821 "vmulss $tmp2,$tmp2,$tmp\n\t" 4822 "pshufd $tmp,$tmp3,0x02\n\t" 4823 "vmulss $tmp2,$tmp2,$tmp\n\t" 4824 "pshufd $tmp,$tmp3,0x03\n\t" 4825 "vmulss $tmp2,$tmp2,$tmp\n\t" 4826 "vextractf32x4 $tmp3,$src2, 0x3\n\t" 4827 "vmulss $tmp2,$tmp2,$tmp3\n\t" 4828 "pshufd $tmp,$tmp3,0x01\n\t" 4829 "vmulss $tmp2,$tmp2,$tmp\n\t" 4830 "pshufd $tmp,$tmp3,0x02\n\t" 4831 "vmulss $tmp2,$tmp2,$tmp\n\t" 4832 "pshufd $tmp,$tmp3,0x03\n\t" 4833 "vmulss $dst,$tmp2,$tmp\t! mul reduction16F" %} 4834 ins_encode %{ 4835 __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 4836 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 4837 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4838 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 4839 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4840 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 4841 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4842 __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1); 4843 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 4844 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); 4845 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4846 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); 4847 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4848 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); 4849 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4850 __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x2); 4851 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 4852 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); 4853 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4854 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); 4855 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4856 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); 4857 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4858 __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x3); 4859 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 4860 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); 4861 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4862 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); 4863 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4864 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); 4865 __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4866 %} 4867 ins_pipe( pipe_slow ); 4868 %} 4869 4870 instruct rsmul2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp) %{ 4871 predicate(UseSSE >= 1 && UseAVX == 0); 4872 match(Set dst (MulReductionVD src1 src2)); 4873 effect(TEMP tmp, TEMP dst); 4874 format %{ "movdqu $tmp,$src1\n\t" 4875 "mulsd $tmp,$src2\n\t" 4876 "pshufd $dst,$src2,0xE\n\t" 4877 "mulsd $dst,$tmp\t! mul reduction2D" %} 4878 ins_encode %{ 4879 __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); 4880 __ mulsd($tmp$$XMMRegister, $src2$$XMMRegister); 4881 __ pshufd($dst$$XMMRegister, $src2$$XMMRegister, 0xE); 4882 __ mulsd($dst$$XMMRegister, $tmp$$XMMRegister); 4883 %} 4884 ins_pipe( pipe_slow ); 4885 %} 4886 4887 instruct rvmul2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp, regD tmp2) %{ 4888 predicate(UseAVX > 0); 4889 match(Set dst (MulReductionVD src1 src2)); 4890 effect(TEMP tmp, TEMP tmp2); 4891 format %{ "vmulsd $tmp2,$src1,$src2\n\t" 4892 "pshufd $tmp,$src2,0xE\n\t" 4893 "vmulsd $dst,$tmp2,$tmp\t! mul reduction2D" %} 4894 ins_encode %{ 4895 __ vmulsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 4896 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 4897 __ vmulsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4898 %} 4899 ins_pipe( pipe_slow ); 4900 %} 4901 4902 instruct rvmul4D_reduction_reg(regD dst, regD src1, vecY src2, regD tmp, regD tmp2, regD tmp3) %{ 4903 predicate(UseAVX > 0); 4904 match(Set dst (MulReductionVD src1 src2)); 4905 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 4906 format %{ "vmulsd $tmp2,$src1,$src2\n\t" 4907 "pshufd $tmp,$src2,0xE\n\t" 4908 "vmulsd $tmp2,$tmp2,$tmp\n\t" 4909 "vextractf128 $tmp3,$src2\n\t" 4910 "vmulsd $tmp2,$tmp2,$tmp3\n\t" 4911 "pshufd $tmp,$tmp3,0xE\n\t" 4912 "vmulsd $dst,$tmp2,$tmp\t! mul reduction4D" %} 4913 ins_encode %{ 4914 __ vmulsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 4915 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 4916 __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4917 __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister); 4918 __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 4919 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); 4920 __ vmulsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4921 %} 4922 ins_pipe( pipe_slow ); 4923 %} 4924 4925 instruct rvmul8D_reduction_reg(regD dst, regD src1, vecZ src2, regD tmp, regD tmp2, regD tmp3) %{ 4926 predicate(UseAVX > 2); 4927 match(Set dst (MulReductionVD src1 src2)); 4928 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 4929 format %{ "vmulsd $tmp2,$src1,$src2\n\t" 4930 "pshufd $tmp,$src2,0xE\n\t" 4931 "vmulsd $tmp2,$tmp2,$tmp\n\t" 4932 "vextractf64x2 $tmp3,$src2, 0x1\n\t" 4933 "vmulsd $tmp2,$tmp2,$tmp3\n\t" 4934 "pshufd $tmp,$src2,0xE\n\t" 4935 "vmulsd $tmp2,$tmp2,$tmp\n\t" 4936 "vextractf64x2 $tmp3,$src2, 0x2\n\t" 4937 "vmulsd $tmp2,$tmp2,$tmp3\n\t" 4938 "pshufd $tmp,$tmp3,0xE\n\t" 4939 "vmulsd $tmp2,$tmp2,$tmp\n\t" 4940 "vextractf64x2 $tmp3,$src2, 0x3\n\t" 4941 "vmulsd $tmp2,$tmp2,$tmp3\n\t" 4942 "pshufd $tmp,$tmp3,0xE\n\t" 4943 "vmulsd $dst,$tmp2,$tmp\t! mul reduction8D" %} 4944 ins_encode %{ 4945 __ vmulsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 4946 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 4947 __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4948 __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1); 4949 __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 4950 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); 4951 __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4952 __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x2); 4953 __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 4954 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); 4955 __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4956 __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x3); 4957 __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 4958 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); 4959 __ vmulsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4960 %} 4961 ins_pipe( pipe_slow ); 4962 %} 4963 4964 // ====================VECTOR ARITHMETIC======================================= 4965 4966 // --------------------------------- ADD -------------------------------------- 4967 4968 // Bytes vector add 4969 instruct vadd4B(vecS dst, vecS src) %{ 4970 predicate(n->as_Vector()->length() == 4); 4971 match(Set dst (AddVB dst src)); 4972 format %{ "paddb $dst,$src\t! add packed4B" %} 4973 ins_encode %{ 4974 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 4975 %} 4976 ins_pipe( pipe_slow ); 4977 %} 4978 4979 instruct vadd4B_reg(vecS dst, vecS src1, vecS src2) %{ 4980 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4981 match(Set dst (AddVB src1 src2)); 4982 format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %} 4983 ins_encode %{ 4984 int vector_len = 0; 4985 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 4986 %} 4987 ins_pipe( pipe_slow ); 4988 %} 4989 4990 instruct vadd8B(vecD dst, vecD src) %{ 4991 predicate(n->as_Vector()->length() == 8); 4992 match(Set dst (AddVB dst src)); 4993 format %{ "paddb $dst,$src\t! add packed8B" %} 4994 ins_encode %{ 4995 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 4996 %} 4997 ins_pipe( pipe_slow ); 4998 %} 4999 5000 instruct vadd8B_reg(vecD dst, vecD src1, vecD src2) %{ 5001 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 5002 match(Set dst (AddVB src1 src2)); 5003 format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %} 5004 ins_encode %{ 5005 int vector_len = 0; 5006 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5007 %} 5008 ins_pipe( pipe_slow ); 5009 %} 5010 5011 instruct vadd16B(vecX dst, vecX src) %{ 5012 predicate(n->as_Vector()->length() == 16); 5013 match(Set dst (AddVB dst src)); 5014 format %{ "paddb $dst,$src\t! add packed16B" %} 5015 ins_encode %{ 5016 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5017 %} 5018 ins_pipe( pipe_slow ); 5019 %} 5020 5021 instruct vadd16B_reg(vecX dst, vecX src1, vecX src2) %{ 5022 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 5023 match(Set dst (AddVB src1 src2)); 5024 format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %} 5025 ins_encode %{ 5026 int vector_len = 0; 5027 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5028 %} 5029 ins_pipe( pipe_slow ); 5030 %} 5031 5032 instruct vadd16B_mem(vecX dst, vecX src, memory mem) %{ 5033 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 5034 match(Set dst (AddVB src (LoadVector mem))); 5035 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} 5036 ins_encode %{ 5037 int vector_len = 0; 5038 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5039 %} 5040 ins_pipe( pipe_slow ); 5041 %} 5042 5043 instruct vadd32B_reg(vecY dst, vecY src1, vecY src2) %{ 5044 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 5045 match(Set dst (AddVB src1 src2)); 5046 format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %} 5047 ins_encode %{ 5048 int vector_len = 1; 5049 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5050 %} 5051 ins_pipe( pipe_slow ); 5052 %} 5053 5054 instruct vadd32B_mem(vecY dst, vecY src, memory mem) %{ 5055 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 5056 match(Set dst (AddVB src (LoadVector mem))); 5057 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} 5058 ins_encode %{ 5059 int vector_len = 1; 5060 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5061 %} 5062 ins_pipe( pipe_slow ); 5063 %} 5064 5065 instruct vadd64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 5066 predicate(UseAVX > 2 && n->as_Vector()->length() == 64); 5067 match(Set dst (AddVB src1 src2)); 5068 format %{ "vpaddb $dst,$src1,$src2\t! add packed64B" %} 5069 ins_encode %{ 5070 int vector_len = 2; 5071 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5072 %} 5073 ins_pipe( pipe_slow ); 5074 %} 5075 5076 instruct vadd64B_mem(vecZ dst, vecZ src, memory mem) %{ 5077 predicate(UseAVX > 2 && n->as_Vector()->length() == 64); 5078 match(Set dst (AddVB src (LoadVector mem))); 5079 format %{ "vpaddb $dst,$src,$mem\t! add packed64B" %} 5080 ins_encode %{ 5081 int vector_len = 2; 5082 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5083 %} 5084 ins_pipe( pipe_slow ); 5085 %} 5086 5087 // Shorts/Chars vector add 5088 instruct vadd2S(vecS dst, vecS src) %{ 5089 predicate(n->as_Vector()->length() == 2); 5090 match(Set dst (AddVS dst src)); 5091 format %{ "paddw $dst,$src\t! add packed2S" %} 5092 ins_encode %{ 5093 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 5094 %} 5095 ins_pipe( pipe_slow ); 5096 %} 5097 5098 instruct vadd2S_reg(vecS dst, vecS src1, vecS src2) %{ 5099 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 5100 match(Set dst (AddVS src1 src2)); 5101 format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %} 5102 ins_encode %{ 5103 int vector_len = 0; 5104 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5105 %} 5106 ins_pipe( pipe_slow ); 5107 %} 5108 5109 instruct vadd4S(vecD dst, vecD src) %{ 5110 predicate(n->as_Vector()->length() == 4); 5111 match(Set dst (AddVS dst src)); 5112 format %{ "paddw $dst,$src\t! add packed4S" %} 5113 ins_encode %{ 5114 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 5115 %} 5116 ins_pipe( pipe_slow ); 5117 %} 5118 5119 instruct vadd4S_reg(vecD dst, vecD src1, vecD src2) %{ 5120 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5121 match(Set dst (AddVS src1 src2)); 5122 format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %} 5123 ins_encode %{ 5124 int vector_len = 0; 5125 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5126 %} 5127 ins_pipe( pipe_slow ); 5128 %} 5129 5130 instruct vadd8S(vecX dst, vecX src) %{ 5131 predicate(n->as_Vector()->length() == 8); 5132 match(Set dst (AddVS dst src)); 5133 format %{ "paddw $dst,$src\t! add packed8S" %} 5134 ins_encode %{ 5135 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 5136 %} 5137 ins_pipe( pipe_slow ); 5138 %} 5139 5140 instruct vadd8S_reg(vecX dst, vecX src1, vecX src2) %{ 5141 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 5142 match(Set dst (AddVS src1 src2)); 5143 format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %} 5144 ins_encode %{ 5145 int vector_len = 0; 5146 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5147 %} 5148 ins_pipe( pipe_slow ); 5149 %} 5150 5151 instruct vadd8S_mem(vecX dst, vecX src, memory mem) %{ 5152 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 5153 match(Set dst (AddVS src (LoadVector mem))); 5154 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} 5155 ins_encode %{ 5156 int vector_len = 0; 5157 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5158 %} 5159 ins_pipe( pipe_slow ); 5160 %} 5161 5162 instruct vadd16S_reg(vecY dst, vecY src1, vecY src2) %{ 5163 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 5164 match(Set dst (AddVS src1 src2)); 5165 format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %} 5166 ins_encode %{ 5167 int vector_len = 1; 5168 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5169 %} 5170 ins_pipe( pipe_slow ); 5171 %} 5172 5173 instruct vadd16S_mem(vecY dst, vecY src, memory mem) %{ 5174 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 5175 match(Set dst (AddVS src (LoadVector mem))); 5176 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} 5177 ins_encode %{ 5178 int vector_len = 1; 5179 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5180 %} 5181 ins_pipe( pipe_slow ); 5182 %} 5183 5184 instruct vadd32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 5185 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); 5186 match(Set dst (AddVS src1 src2)); 5187 format %{ "vpaddw $dst,$src1,$src2\t! add packed32S" %} 5188 ins_encode %{ 5189 int vector_len = 2; 5190 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5191 %} 5192 ins_pipe( pipe_slow ); 5193 %} 5194 5195 instruct vadd32S_mem(vecZ dst, vecZ src, memory mem) %{ 5196 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); 5197 match(Set dst (AddVS src (LoadVector mem))); 5198 format %{ "vpaddw $dst,$src,$mem\t! add packed32S" %} 5199 ins_encode %{ 5200 int vector_len = 2; 5201 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5202 %} 5203 ins_pipe( pipe_slow ); 5204 %} 5205 5206 // Integers vector add 5207 instruct vadd2I(vecD dst, vecD src) %{ 5208 predicate(n->as_Vector()->length() == 2); 5209 match(Set dst (AddVI dst src)); 5210 format %{ "paddd $dst,$src\t! add packed2I" %} 5211 ins_encode %{ 5212 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 5213 %} 5214 ins_pipe( pipe_slow ); 5215 %} 5216 5217 instruct vadd2I_reg(vecD dst, vecD src1, vecD src2) %{ 5218 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 5219 match(Set dst (AddVI src1 src2)); 5220 format %{ "vpaddd $dst,$src1,$src2\t! add packed2I" %} 5221 ins_encode %{ 5222 int vector_len = 0; 5223 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5224 %} 5225 ins_pipe( pipe_slow ); 5226 %} 5227 5228 instruct vadd4I(vecX dst, vecX src) %{ 5229 predicate(n->as_Vector()->length() == 4); 5230 match(Set dst (AddVI dst src)); 5231 format %{ "paddd $dst,$src\t! add packed4I" %} 5232 ins_encode %{ 5233 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 5234 %} 5235 ins_pipe( pipe_slow ); 5236 %} 5237 5238 instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{ 5239 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5240 match(Set dst (AddVI src1 src2)); 5241 format %{ "vpaddd $dst,$src1,$src2\t! add packed4I" %} 5242 ins_encode %{ 5243 int vector_len = 0; 5244 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5245 %} 5246 ins_pipe( pipe_slow ); 5247 %} 5248 5249 instruct vadd4I_mem(vecX dst, vecX src, memory mem) %{ 5250 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5251 match(Set dst (AddVI src (LoadVector mem))); 5252 format %{ "vpaddd $dst,$src,$mem\t! add packed4I" %} 5253 ins_encode %{ 5254 int vector_len = 0; 5255 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5256 %} 5257 ins_pipe( pipe_slow ); 5258 %} 5259 5260 instruct vadd8I_reg(vecY dst, vecY src1, vecY src2) %{ 5261 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 5262 match(Set dst (AddVI src1 src2)); 5263 format %{ "vpaddd $dst,$src1,$src2\t! add packed8I" %} 5264 ins_encode %{ 5265 int vector_len = 1; 5266 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5267 %} 5268 ins_pipe( pipe_slow ); 5269 %} 5270 5271 instruct vadd8I_mem(vecY dst, vecY src, memory mem) %{ 5272 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 5273 match(Set dst (AddVI src (LoadVector mem))); 5274 format %{ "vpaddd $dst,$src,$mem\t! add packed8I" %} 5275 ins_encode %{ 5276 int vector_len = 1; 5277 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5278 %} 5279 ins_pipe( pipe_slow ); 5280 %} 5281 5282 instruct vadd16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 5283 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 5284 match(Set dst (AddVI src1 src2)); 5285 format %{ "vpaddd $dst,$src1,$src2\t! add packed16I" %} 5286 ins_encode %{ 5287 int vector_len = 2; 5288 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5289 %} 5290 ins_pipe( pipe_slow ); 5291 %} 5292 5293 instruct vadd16I_mem(vecZ dst, vecZ src, memory mem) %{ 5294 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 5295 match(Set dst (AddVI src (LoadVector mem))); 5296 format %{ "vpaddd $dst,$src,$mem\t! add packed16I" %} 5297 ins_encode %{ 5298 int vector_len = 2; 5299 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5300 %} 5301 ins_pipe( pipe_slow ); 5302 %} 5303 5304 // Longs vector add 5305 instruct vadd2L(vecX dst, vecX src) %{ 5306 predicate(n->as_Vector()->length() == 2); 5307 match(Set dst (AddVL dst src)); 5308 format %{ "paddq $dst,$src\t! add packed2L" %} 5309 ins_encode %{ 5310 __ paddq($dst$$XMMRegister, $src$$XMMRegister); 5311 %} 5312 ins_pipe( pipe_slow ); 5313 %} 5314 5315 instruct vadd2L_reg(vecX dst, vecX src1, vecX src2) %{ 5316 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 5317 match(Set dst (AddVL src1 src2)); 5318 format %{ "vpaddq $dst,$src1,$src2\t! add packed2L" %} 5319 ins_encode %{ 5320 int vector_len = 0; 5321 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5322 %} 5323 ins_pipe( pipe_slow ); 5324 %} 5325 5326 instruct vadd2L_mem(vecX dst, vecX src, memory mem) %{ 5327 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 5328 match(Set dst (AddVL src (LoadVector mem))); 5329 format %{ "vpaddq $dst,$src,$mem\t! add packed2L" %} 5330 ins_encode %{ 5331 int vector_len = 0; 5332 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5333 %} 5334 ins_pipe( pipe_slow ); 5335 %} 5336 5337 instruct vadd4L_reg(vecY dst, vecY src1, vecY src2) %{ 5338 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 5339 match(Set dst (AddVL src1 src2)); 5340 format %{ "vpaddq $dst,$src1,$src2\t! add packed4L" %} 5341 ins_encode %{ 5342 int vector_len = 1; 5343 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5344 %} 5345 ins_pipe( pipe_slow ); 5346 %} 5347 5348 instruct vadd4L_mem(vecY dst, vecY src, memory mem) %{ 5349 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 5350 match(Set dst (AddVL src (LoadVector mem))); 5351 format %{ "vpaddq $dst,$src,$mem\t! add packed4L" %} 5352 ins_encode %{ 5353 int vector_len = 1; 5354 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5355 %} 5356 ins_pipe( pipe_slow ); 5357 %} 5358 5359 instruct vadd8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 5360 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 5361 match(Set dst (AddVL src1 src2)); 5362 format %{ "vpaddq $dst,$src1,$src2\t! add packed8L" %} 5363 ins_encode %{ 5364 int vector_len = 2; 5365 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5366 %} 5367 ins_pipe( pipe_slow ); 5368 %} 5369 5370 instruct vadd8L_mem(vecZ dst, vecZ src, memory mem) %{ 5371 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 5372 match(Set dst (AddVL src (LoadVector mem))); 5373 format %{ "vpaddq $dst,$src,$mem\t! add packed8L" %} 5374 ins_encode %{ 5375 int vector_len = 2; 5376 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5377 %} 5378 ins_pipe( pipe_slow ); 5379 %} 5380 5381 // Floats vector add 5382 instruct vadd2F(vecD dst, vecD src) %{ 5383 predicate(n->as_Vector()->length() == 2); 5384 match(Set dst (AddVF dst src)); 5385 format %{ "addps $dst,$src\t! add packed2F" %} 5386 ins_encode %{ 5387 __ addps($dst$$XMMRegister, $src$$XMMRegister); 5388 %} 5389 ins_pipe( pipe_slow ); 5390 %} 5391 5392 instruct vadd2F_reg(vecD dst, vecD src1, vecD src2) %{ 5393 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 5394 match(Set dst (AddVF src1 src2)); 5395 format %{ "vaddps $dst,$src1,$src2\t! add packed2F" %} 5396 ins_encode %{ 5397 int vector_len = 0; 5398 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5399 %} 5400 ins_pipe( pipe_slow ); 5401 %} 5402 5403 instruct vadd4F(vecX dst, vecX src) %{ 5404 predicate(n->as_Vector()->length() == 4); 5405 match(Set dst (AddVF dst src)); 5406 format %{ "addps $dst,$src\t! add packed4F" %} 5407 ins_encode %{ 5408 __ addps($dst$$XMMRegister, $src$$XMMRegister); 5409 %} 5410 ins_pipe( pipe_slow ); 5411 %} 5412 5413 instruct vadd4F_reg(vecX dst, vecX src1, vecX src2) %{ 5414 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5415 match(Set dst (AddVF src1 src2)); 5416 format %{ "vaddps $dst,$src1,$src2\t! add packed4F" %} 5417 ins_encode %{ 5418 int vector_len = 0; 5419 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5420 %} 5421 ins_pipe( pipe_slow ); 5422 %} 5423 5424 instruct vadd4F_mem(vecX dst, vecX src, memory mem) %{ 5425 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5426 match(Set dst (AddVF src (LoadVector mem))); 5427 format %{ "vaddps $dst,$src,$mem\t! add packed4F" %} 5428 ins_encode %{ 5429 int vector_len = 0; 5430 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5431 %} 5432 ins_pipe( pipe_slow ); 5433 %} 5434 5435 instruct vadd8F_reg(vecY dst, vecY src1, vecY src2) %{ 5436 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 5437 match(Set dst (AddVF src1 src2)); 5438 format %{ "vaddps $dst,$src1,$src2\t! add packed8F" %} 5439 ins_encode %{ 5440 int vector_len = 1; 5441 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5442 %} 5443 ins_pipe( pipe_slow ); 5444 %} 5445 5446 instruct vadd8F_mem(vecY dst, vecY src, memory mem) %{ 5447 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 5448 match(Set dst (AddVF src (LoadVector mem))); 5449 format %{ "vaddps $dst,$src,$mem\t! add packed8F" %} 5450 ins_encode %{ 5451 int vector_len = 1; 5452 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5453 %} 5454 ins_pipe( pipe_slow ); 5455 %} 5456 5457 instruct vadd16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 5458 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 5459 match(Set dst (AddVF src1 src2)); 5460 format %{ "vaddps $dst,$src1,$src2\t! add packed16F" %} 5461 ins_encode %{ 5462 int vector_len = 2; 5463 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5464 %} 5465 ins_pipe( pipe_slow ); 5466 %} 5467 5468 instruct vadd16F_mem(vecZ dst, vecZ src, memory mem) %{ 5469 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 5470 match(Set dst (AddVF src (LoadVector mem))); 5471 format %{ "vaddps $dst,$src,$mem\t! add packed16F" %} 5472 ins_encode %{ 5473 int vector_len = 2; 5474 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5475 %} 5476 ins_pipe( pipe_slow ); 5477 %} 5478 5479 // Doubles vector add 5480 instruct vadd2D(vecX dst, vecX src) %{ 5481 predicate(n->as_Vector()->length() == 2); 5482 match(Set dst (AddVD dst src)); 5483 format %{ "addpd $dst,$src\t! add packed2D" %} 5484 ins_encode %{ 5485 __ addpd($dst$$XMMRegister, $src$$XMMRegister); 5486 %} 5487 ins_pipe( pipe_slow ); 5488 %} 5489 5490 instruct vadd2D_reg(vecX dst, vecX src1, vecX src2) %{ 5491 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 5492 match(Set dst (AddVD src1 src2)); 5493 format %{ "vaddpd $dst,$src1,$src2\t! add packed2D" %} 5494 ins_encode %{ 5495 int vector_len = 0; 5496 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5497 %} 5498 ins_pipe( pipe_slow ); 5499 %} 5500 5501 instruct vadd2D_mem(vecX dst, vecX src, memory mem) %{ 5502 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 5503 match(Set dst (AddVD src (LoadVector mem))); 5504 format %{ "vaddpd $dst,$src,$mem\t! add packed2D" %} 5505 ins_encode %{ 5506 int vector_len = 0; 5507 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5508 %} 5509 ins_pipe( pipe_slow ); 5510 %} 5511 5512 instruct vadd4D_reg(vecY dst, vecY src1, vecY src2) %{ 5513 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5514 match(Set dst (AddVD src1 src2)); 5515 format %{ "vaddpd $dst,$src1,$src2\t! add packed4D" %} 5516 ins_encode %{ 5517 int vector_len = 1; 5518 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5519 %} 5520 ins_pipe( pipe_slow ); 5521 %} 5522 5523 instruct vadd4D_mem(vecY dst, vecY src, memory mem) %{ 5524 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5525 match(Set dst (AddVD src (LoadVector mem))); 5526 format %{ "vaddpd $dst,$src,$mem\t! add packed4D" %} 5527 ins_encode %{ 5528 int vector_len = 1; 5529 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5530 %} 5531 ins_pipe( pipe_slow ); 5532 %} 5533 5534 instruct vadd8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 5535 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 5536 match(Set dst (AddVD src1 src2)); 5537 format %{ "vaddpd $dst,$src1,$src2\t! add packed8D" %} 5538 ins_encode %{ 5539 int vector_len = 2; 5540 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5541 %} 5542 ins_pipe( pipe_slow ); 5543 %} 5544 5545 instruct vadd8D_mem(vecZ dst, vecZ src, memory mem) %{ 5546 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 5547 match(Set dst (AddVD src (LoadVector mem))); 5548 format %{ "vaddpd $dst,$src,$mem\t! add packed8D" %} 5549 ins_encode %{ 5550 int vector_len = 2; 5551 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5552 %} 5553 ins_pipe( pipe_slow ); 5554 %} 5555 5556 // --------------------------------- SUB -------------------------------------- 5557 5558 // Bytes vector sub 5559 instruct vsub4B(vecS dst, vecS src) %{ 5560 predicate(n->as_Vector()->length() == 4); 5561 match(Set dst (SubVB dst src)); 5562 format %{ "psubb $dst,$src\t! sub packed4B" %} 5563 ins_encode %{ 5564 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 5565 %} 5566 ins_pipe( pipe_slow ); 5567 %} 5568 5569 instruct vsub4B_reg(vecS dst, vecS src1, vecS src2) %{ 5570 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5571 match(Set dst (SubVB src1 src2)); 5572 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} 5573 ins_encode %{ 5574 int vector_len = 0; 5575 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5576 %} 5577 ins_pipe( pipe_slow ); 5578 %} 5579 5580 instruct vsub8B(vecD dst, vecD src) %{ 5581 predicate(n->as_Vector()->length() == 8); 5582 match(Set dst (SubVB dst src)); 5583 format %{ "psubb $dst,$src\t! sub packed8B" %} 5584 ins_encode %{ 5585 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 5586 %} 5587 ins_pipe( pipe_slow ); 5588 %} 5589 5590 instruct vsub8B_reg(vecD dst, vecD src1, vecD src2) %{ 5591 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 5592 match(Set dst (SubVB src1 src2)); 5593 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} 5594 ins_encode %{ 5595 int vector_len = 0; 5596 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5597 %} 5598 ins_pipe( pipe_slow ); 5599 %} 5600 5601 instruct vsub16B(vecX dst, vecX src) %{ 5602 predicate(n->as_Vector()->length() == 16); 5603 match(Set dst (SubVB dst src)); 5604 format %{ "psubb $dst,$src\t! sub packed16B" %} 5605 ins_encode %{ 5606 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 5607 %} 5608 ins_pipe( pipe_slow ); 5609 %} 5610 5611 instruct vsub16B_reg(vecX dst, vecX src1, vecX src2) %{ 5612 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 5613 match(Set dst (SubVB src1 src2)); 5614 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} 5615 ins_encode %{ 5616 int vector_len = 0; 5617 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5618 %} 5619 ins_pipe( pipe_slow ); 5620 %} 5621 5622 instruct vsub16B_mem(vecX dst, vecX src, memory mem) %{ 5623 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 5624 match(Set dst (SubVB src (LoadVector mem))); 5625 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} 5626 ins_encode %{ 5627 int vector_len = 0; 5628 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5629 %} 5630 ins_pipe( pipe_slow ); 5631 %} 5632 5633 instruct vsub32B_reg(vecY dst, vecY src1, vecY src2) %{ 5634 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 5635 match(Set dst (SubVB src1 src2)); 5636 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} 5637 ins_encode %{ 5638 int vector_len = 1; 5639 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5640 %} 5641 ins_pipe( pipe_slow ); 5642 %} 5643 5644 instruct vsub32B_mem(vecY dst, vecY src, memory mem) %{ 5645 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 5646 match(Set dst (SubVB src (LoadVector mem))); 5647 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} 5648 ins_encode %{ 5649 int vector_len = 1; 5650 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5651 %} 5652 ins_pipe( pipe_slow ); 5653 %} 5654 5655 instruct vsub64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 5656 predicate(UseAVX > 2 && n->as_Vector()->length() == 64); 5657 match(Set dst (SubVB src1 src2)); 5658 format %{ "vpsubb $dst,$src1,$src2\t! sub packed64B" %} 5659 ins_encode %{ 5660 int vector_len = 2; 5661 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5662 %} 5663 ins_pipe( pipe_slow ); 5664 %} 5665 5666 instruct vsub64B_mem(vecZ dst, vecZ src, memory mem) %{ 5667 predicate(UseAVX > 2 && n->as_Vector()->length() == 64); 5668 match(Set dst (SubVB src (LoadVector mem))); 5669 format %{ "vpsubb $dst,$src,$mem\t! sub packed64B" %} 5670 ins_encode %{ 5671 int vector_len = 2; 5672 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5673 %} 5674 ins_pipe( pipe_slow ); 5675 %} 5676 5677 // Shorts/Chars vector sub 5678 instruct vsub2S(vecS dst, vecS src) %{ 5679 predicate(n->as_Vector()->length() == 2); 5680 match(Set dst (SubVS dst src)); 5681 format %{ "psubw $dst,$src\t! sub packed2S" %} 5682 ins_encode %{ 5683 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 5684 %} 5685 ins_pipe( pipe_slow ); 5686 %} 5687 5688 instruct vsub2S_reg(vecS dst, vecS src1, vecS src2) %{ 5689 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 5690 match(Set dst (SubVS src1 src2)); 5691 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} 5692 ins_encode %{ 5693 int vector_len = 0; 5694 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5695 %} 5696 ins_pipe( pipe_slow ); 5697 %} 5698 5699 instruct vsub4S(vecD dst, vecD src) %{ 5700 predicate(n->as_Vector()->length() == 4); 5701 match(Set dst (SubVS dst src)); 5702 format %{ "psubw $dst,$src\t! sub packed4S" %} 5703 ins_encode %{ 5704 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 5705 %} 5706 ins_pipe( pipe_slow ); 5707 %} 5708 5709 instruct vsub4S_reg(vecD dst, vecD src1, vecD src2) %{ 5710 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5711 match(Set dst (SubVS src1 src2)); 5712 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} 5713 ins_encode %{ 5714 int vector_len = 0; 5715 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5716 %} 5717 ins_pipe( pipe_slow ); 5718 %} 5719 5720 instruct vsub8S(vecX dst, vecX src) %{ 5721 predicate(n->as_Vector()->length() == 8); 5722 match(Set dst (SubVS dst src)); 5723 format %{ "psubw $dst,$src\t! sub packed8S" %} 5724 ins_encode %{ 5725 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 5726 %} 5727 ins_pipe( pipe_slow ); 5728 %} 5729 5730 instruct vsub8S_reg(vecX dst, vecX src1, vecX src2) %{ 5731 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 5732 match(Set dst (SubVS src1 src2)); 5733 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} 5734 ins_encode %{ 5735 int vector_len = 0; 5736 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5737 %} 5738 ins_pipe( pipe_slow ); 5739 %} 5740 5741 instruct vsub8S_mem(vecX dst, vecX src, memory mem) %{ 5742 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 5743 match(Set dst (SubVS src (LoadVector mem))); 5744 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} 5745 ins_encode %{ 5746 int vector_len = 0; 5747 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5748 %} 5749 ins_pipe( pipe_slow ); 5750 %} 5751 5752 instruct vsub16S_reg(vecY dst, vecY src1, vecY src2) %{ 5753 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 5754 match(Set dst (SubVS src1 src2)); 5755 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} 5756 ins_encode %{ 5757 int vector_len = 1; 5758 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5759 %} 5760 ins_pipe( pipe_slow ); 5761 %} 5762 5763 instruct vsub16S_mem(vecY dst, vecY src, memory mem) %{ 5764 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 5765 match(Set dst (SubVS src (LoadVector mem))); 5766 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} 5767 ins_encode %{ 5768 int vector_len = 1; 5769 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5770 %} 5771 ins_pipe( pipe_slow ); 5772 %} 5773 5774 instruct vsub32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 5775 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); 5776 match(Set dst (SubVS src1 src2)); 5777 format %{ "vpsubw $dst,$src1,$src2\t! sub packed32S" %} 5778 ins_encode %{ 5779 int vector_len = 2; 5780 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5781 %} 5782 ins_pipe( pipe_slow ); 5783 %} 5784 5785 instruct vsub32S_mem(vecZ dst, vecZ src, memory mem) %{ 5786 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); 5787 match(Set dst (SubVS src (LoadVector mem))); 5788 format %{ "vpsubw $dst,$src,$mem\t! sub packed32S" %} 5789 ins_encode %{ 5790 int vector_len = 2; 5791 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5792 %} 5793 ins_pipe( pipe_slow ); 5794 %} 5795 5796 // Integers vector sub 5797 instruct vsub2I(vecD dst, vecD src) %{ 5798 predicate(n->as_Vector()->length() == 2); 5799 match(Set dst (SubVI dst src)); 5800 format %{ "psubd $dst,$src\t! sub packed2I" %} 5801 ins_encode %{ 5802 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 5803 %} 5804 ins_pipe( pipe_slow ); 5805 %} 5806 5807 instruct vsub2I_reg(vecD dst, vecD src1, vecD src2) %{ 5808 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 5809 match(Set dst (SubVI src1 src2)); 5810 format %{ "vpsubd $dst,$src1,$src2\t! sub packed2I" %} 5811 ins_encode %{ 5812 int vector_len = 0; 5813 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5814 %} 5815 ins_pipe( pipe_slow ); 5816 %} 5817 5818 instruct vsub4I(vecX dst, vecX src) %{ 5819 predicate(n->as_Vector()->length() == 4); 5820 match(Set dst (SubVI dst src)); 5821 format %{ "psubd $dst,$src\t! sub packed4I" %} 5822 ins_encode %{ 5823 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 5824 %} 5825 ins_pipe( pipe_slow ); 5826 %} 5827 5828 instruct vsub4I_reg(vecX dst, vecX src1, vecX src2) %{ 5829 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5830 match(Set dst (SubVI src1 src2)); 5831 format %{ "vpsubd $dst,$src1,$src2\t! sub packed4I" %} 5832 ins_encode %{ 5833 int vector_len = 0; 5834 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5835 %} 5836 ins_pipe( pipe_slow ); 5837 %} 5838 5839 instruct vsub4I_mem(vecX dst, vecX src, memory mem) %{ 5840 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5841 match(Set dst (SubVI src (LoadVector mem))); 5842 format %{ "vpsubd $dst,$src,$mem\t! sub packed4I" %} 5843 ins_encode %{ 5844 int vector_len = 0; 5845 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5846 %} 5847 ins_pipe( pipe_slow ); 5848 %} 5849 5850 instruct vsub8I_reg(vecY dst, vecY src1, vecY src2) %{ 5851 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 5852 match(Set dst (SubVI src1 src2)); 5853 format %{ "vpsubd $dst,$src1,$src2\t! sub packed8I" %} 5854 ins_encode %{ 5855 int vector_len = 1; 5856 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5857 %} 5858 ins_pipe( pipe_slow ); 5859 %} 5860 5861 instruct vsub8I_mem(vecY dst, vecY src, memory mem) %{ 5862 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 5863 match(Set dst (SubVI src (LoadVector mem))); 5864 format %{ "vpsubd $dst,$src,$mem\t! sub packed8I" %} 5865 ins_encode %{ 5866 int vector_len = 1; 5867 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5868 %} 5869 ins_pipe( pipe_slow ); 5870 %} 5871 5872 instruct vsub16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 5873 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 5874 match(Set dst (SubVI src1 src2)); 5875 format %{ "vpsubd $dst,$src1,$src2\t! sub packed16I" %} 5876 ins_encode %{ 5877 int vector_len = 2; 5878 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5879 %} 5880 ins_pipe( pipe_slow ); 5881 %} 5882 5883 instruct vsub16I_mem(vecZ dst, vecZ src, memory mem) %{ 5884 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 5885 match(Set dst (SubVI src (LoadVector mem))); 5886 format %{ "vpsubd $dst,$src,$mem\t! sub packed16I" %} 5887 ins_encode %{ 5888 int vector_len = 2; 5889 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5890 %} 5891 ins_pipe( pipe_slow ); 5892 %} 5893 5894 // Longs vector sub 5895 instruct vsub2L(vecX dst, vecX src) %{ 5896 predicate(n->as_Vector()->length() == 2); 5897 match(Set dst (SubVL dst src)); 5898 format %{ "psubq $dst,$src\t! sub packed2L" %} 5899 ins_encode %{ 5900 __ psubq($dst$$XMMRegister, $src$$XMMRegister); 5901 %} 5902 ins_pipe( pipe_slow ); 5903 %} 5904 5905 instruct vsub2L_reg(vecX dst, vecX src1, vecX src2) %{ 5906 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 5907 match(Set dst (SubVL src1 src2)); 5908 format %{ "vpsubq $dst,$src1,$src2\t! sub packed2L" %} 5909 ins_encode %{ 5910 int vector_len = 0; 5911 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5912 %} 5913 ins_pipe( pipe_slow ); 5914 %} 5915 5916 instruct vsub2L_mem(vecX dst, vecX src, memory mem) %{ 5917 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 5918 match(Set dst (SubVL src (LoadVector mem))); 5919 format %{ "vpsubq $dst,$src,$mem\t! sub packed2L" %} 5920 ins_encode %{ 5921 int vector_len = 0; 5922 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5923 %} 5924 ins_pipe( pipe_slow ); 5925 %} 5926 5927 instruct vsub4L_reg(vecY dst, vecY src1, vecY src2) %{ 5928 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 5929 match(Set dst (SubVL src1 src2)); 5930 format %{ "vpsubq $dst,$src1,$src2\t! sub packed4L" %} 5931 ins_encode %{ 5932 int vector_len = 1; 5933 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5934 %} 5935 ins_pipe( pipe_slow ); 5936 %} 5937 5938 instruct vsub4L_mem(vecY dst, vecY src, memory mem) %{ 5939 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 5940 match(Set dst (SubVL src (LoadVector mem))); 5941 format %{ "vpsubq $dst,$src,$mem\t! sub packed4L" %} 5942 ins_encode %{ 5943 int vector_len = 1; 5944 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5945 %} 5946 ins_pipe( pipe_slow ); 5947 %} 5948 5949 instruct vsub8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 5950 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 5951 match(Set dst (SubVL src1 src2)); 5952 format %{ "vpsubq $dst,$src1,$src2\t! sub packed8L" %} 5953 ins_encode %{ 5954 int vector_len = 2; 5955 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5956 %} 5957 ins_pipe( pipe_slow ); 5958 %} 5959 5960 instruct vsub8L_mem(vecZ dst, vecZ src, memory mem) %{ 5961 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 5962 match(Set dst (SubVL src (LoadVector mem))); 5963 format %{ "vpsubq $dst,$src,$mem\t! sub packed8L" %} 5964 ins_encode %{ 5965 int vector_len = 2; 5966 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5967 %} 5968 ins_pipe( pipe_slow ); 5969 %} 5970 5971 // Floats vector sub 5972 instruct vsub2F(vecD dst, vecD src) %{ 5973 predicate(n->as_Vector()->length() == 2); 5974 match(Set dst (SubVF dst src)); 5975 format %{ "subps $dst,$src\t! sub packed2F" %} 5976 ins_encode %{ 5977 __ subps($dst$$XMMRegister, $src$$XMMRegister); 5978 %} 5979 ins_pipe( pipe_slow ); 5980 %} 5981 5982 instruct vsub2F_reg(vecD dst, vecD src1, vecD src2) %{ 5983 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 5984 match(Set dst (SubVF src1 src2)); 5985 format %{ "vsubps $dst,$src1,$src2\t! sub packed2F" %} 5986 ins_encode %{ 5987 int vector_len = 0; 5988 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5989 %} 5990 ins_pipe( pipe_slow ); 5991 %} 5992 5993 instruct vsub4F(vecX dst, vecX src) %{ 5994 predicate(n->as_Vector()->length() == 4); 5995 match(Set dst (SubVF dst src)); 5996 format %{ "subps $dst,$src\t! sub packed4F" %} 5997 ins_encode %{ 5998 __ subps($dst$$XMMRegister, $src$$XMMRegister); 5999 %} 6000 ins_pipe( pipe_slow ); 6001 %} 6002 6003 instruct vsub4F_reg(vecX dst, vecX src1, vecX src2) %{ 6004 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6005 match(Set dst (SubVF src1 src2)); 6006 format %{ "vsubps $dst,$src1,$src2\t! sub packed4F" %} 6007 ins_encode %{ 6008 int vector_len = 0; 6009 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6010 %} 6011 ins_pipe( pipe_slow ); 6012 %} 6013 6014 instruct vsub4F_mem(vecX dst, vecX src, memory mem) %{ 6015 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6016 match(Set dst (SubVF src (LoadVector mem))); 6017 format %{ "vsubps $dst,$src,$mem\t! sub packed4F" %} 6018 ins_encode %{ 6019 int vector_len = 0; 6020 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6021 %} 6022 ins_pipe( pipe_slow ); 6023 %} 6024 6025 instruct vsub8F_reg(vecY dst, vecY src1, vecY src2) %{ 6026 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6027 match(Set dst (SubVF src1 src2)); 6028 format %{ "vsubps $dst,$src1,$src2\t! sub packed8F" %} 6029 ins_encode %{ 6030 int vector_len = 1; 6031 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6032 %} 6033 ins_pipe( pipe_slow ); 6034 %} 6035 6036 instruct vsub8F_mem(vecY dst, vecY src, memory mem) %{ 6037 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6038 match(Set dst (SubVF src (LoadVector mem))); 6039 format %{ "vsubps $dst,$src,$mem\t! sub packed8F" %} 6040 ins_encode %{ 6041 int vector_len = 1; 6042 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6043 %} 6044 ins_pipe( pipe_slow ); 6045 %} 6046 6047 instruct vsub16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6048 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6049 match(Set dst (SubVF src1 src2)); 6050 format %{ "vsubps $dst,$src1,$src2\t! sub packed16F" %} 6051 ins_encode %{ 6052 int vector_len = 2; 6053 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6054 %} 6055 ins_pipe( pipe_slow ); 6056 %} 6057 6058 instruct vsub16F_mem(vecZ dst, vecZ src, memory mem) %{ 6059 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6060 match(Set dst (SubVF src (LoadVector mem))); 6061 format %{ "vsubps $dst,$src,$mem\t! sub packed16F" %} 6062 ins_encode %{ 6063 int vector_len = 2; 6064 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6065 %} 6066 ins_pipe( pipe_slow ); 6067 %} 6068 6069 // Doubles vector sub 6070 instruct vsub2D(vecX dst, vecX src) %{ 6071 predicate(n->as_Vector()->length() == 2); 6072 match(Set dst (SubVD dst src)); 6073 format %{ "subpd $dst,$src\t! sub packed2D" %} 6074 ins_encode %{ 6075 __ subpd($dst$$XMMRegister, $src$$XMMRegister); 6076 %} 6077 ins_pipe( pipe_slow ); 6078 %} 6079 6080 instruct vsub2D_reg(vecX dst, vecX src1, vecX src2) %{ 6081 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6082 match(Set dst (SubVD src1 src2)); 6083 format %{ "vsubpd $dst,$src1,$src2\t! sub packed2D" %} 6084 ins_encode %{ 6085 int vector_len = 0; 6086 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6087 %} 6088 ins_pipe( pipe_slow ); 6089 %} 6090 6091 instruct vsub2D_mem(vecX dst, vecX src, memory mem) %{ 6092 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6093 match(Set dst (SubVD src (LoadVector mem))); 6094 format %{ "vsubpd $dst,$src,$mem\t! sub packed2D" %} 6095 ins_encode %{ 6096 int vector_len = 0; 6097 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6098 %} 6099 ins_pipe( pipe_slow ); 6100 %} 6101 6102 instruct vsub4D_reg(vecY dst, vecY src1, vecY src2) %{ 6103 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6104 match(Set dst (SubVD src1 src2)); 6105 format %{ "vsubpd $dst,$src1,$src2\t! sub packed4D" %} 6106 ins_encode %{ 6107 int vector_len = 1; 6108 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6109 %} 6110 ins_pipe( pipe_slow ); 6111 %} 6112 6113 instruct vsub4D_mem(vecY dst, vecY src, memory mem) %{ 6114 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6115 match(Set dst (SubVD src (LoadVector mem))); 6116 format %{ "vsubpd $dst,$src,$mem\t! sub packed4D" %} 6117 ins_encode %{ 6118 int vector_len = 1; 6119 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6120 %} 6121 ins_pipe( pipe_slow ); 6122 %} 6123 6124 instruct vsub8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6125 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6126 match(Set dst (SubVD src1 src2)); 6127 format %{ "vsubpd $dst,$src1,$src2\t! sub packed8D" %} 6128 ins_encode %{ 6129 int vector_len = 2; 6130 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6131 %} 6132 ins_pipe( pipe_slow ); 6133 %} 6134 6135 instruct vsub8D_mem(vecZ dst, vecZ src, memory mem) %{ 6136 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6137 match(Set dst (SubVD src (LoadVector mem))); 6138 format %{ "vsubpd $dst,$src,$mem\t! sub packed8D" %} 6139 ins_encode %{ 6140 int vector_len = 2; 6141 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6142 %} 6143 ins_pipe( pipe_slow ); 6144 %} 6145 6146 // --------------------------------- MUL -------------------------------------- 6147 6148 // Shorts/Chars vector mul 6149 instruct vmul2S(vecS dst, vecS src) %{ 6150 predicate(n->as_Vector()->length() == 2); 6151 match(Set dst (MulVS dst src)); 6152 format %{ "pmullw $dst,$src\t! mul packed2S" %} 6153 ins_encode %{ 6154 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 6155 %} 6156 ins_pipe( pipe_slow ); 6157 %} 6158 6159 instruct vmul2S_reg(vecS dst, vecS src1, vecS src2) %{ 6160 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6161 match(Set dst (MulVS src1 src2)); 6162 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} 6163 ins_encode %{ 6164 int vector_len = 0; 6165 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6166 %} 6167 ins_pipe( pipe_slow ); 6168 %} 6169 6170 instruct vmul4S(vecD dst, vecD src) %{ 6171 predicate(n->as_Vector()->length() == 4); 6172 match(Set dst (MulVS dst src)); 6173 format %{ "pmullw $dst,$src\t! mul packed4S" %} 6174 ins_encode %{ 6175 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 6176 %} 6177 ins_pipe( pipe_slow ); 6178 %} 6179 6180 instruct vmul4S_reg(vecD dst, vecD src1, vecD src2) %{ 6181 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6182 match(Set dst (MulVS src1 src2)); 6183 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} 6184 ins_encode %{ 6185 int vector_len = 0; 6186 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6187 %} 6188 ins_pipe( pipe_slow ); 6189 %} 6190 6191 instruct vmul8S(vecX dst, vecX src) %{ 6192 predicate(n->as_Vector()->length() == 8); 6193 match(Set dst (MulVS dst src)); 6194 format %{ "pmullw $dst,$src\t! mul packed8S" %} 6195 ins_encode %{ 6196 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 6197 %} 6198 ins_pipe( pipe_slow ); 6199 %} 6200 6201 instruct vmul8S_reg(vecX dst, vecX src1, vecX src2) %{ 6202 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6203 match(Set dst (MulVS src1 src2)); 6204 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} 6205 ins_encode %{ 6206 int vector_len = 0; 6207 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6208 %} 6209 ins_pipe( pipe_slow ); 6210 %} 6211 6212 instruct vmul8S_mem(vecX dst, vecX src, memory mem) %{ 6213 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6214 match(Set dst (MulVS src (LoadVector mem))); 6215 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} 6216 ins_encode %{ 6217 int vector_len = 0; 6218 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6219 %} 6220 ins_pipe( pipe_slow ); 6221 %} 6222 6223 instruct vmul16S_reg(vecY dst, vecY src1, vecY src2) %{ 6224 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 6225 match(Set dst (MulVS src1 src2)); 6226 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} 6227 ins_encode %{ 6228 int vector_len = 1; 6229 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6230 %} 6231 ins_pipe( pipe_slow ); 6232 %} 6233 6234 instruct vmul16S_mem(vecY dst, vecY src, memory mem) %{ 6235 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 6236 match(Set dst (MulVS src (LoadVector mem))); 6237 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} 6238 ins_encode %{ 6239 int vector_len = 1; 6240 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6241 %} 6242 ins_pipe( pipe_slow ); 6243 %} 6244 6245 instruct vmul32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6246 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); 6247 match(Set dst (MulVS src1 src2)); 6248 format %{ "vpmullw $dst,$src1,$src2\t! mul packed32S" %} 6249 ins_encode %{ 6250 int vector_len = 2; 6251 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6252 %} 6253 ins_pipe( pipe_slow ); 6254 %} 6255 6256 instruct vmul32S_mem(vecZ dst, vecZ src, memory mem) %{ 6257 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); 6258 match(Set dst (MulVS src (LoadVector mem))); 6259 format %{ "vpmullw $dst,$src,$mem\t! mul packed32S" %} 6260 ins_encode %{ 6261 int vector_len = 2; 6262 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6263 %} 6264 ins_pipe( pipe_slow ); 6265 %} 6266 6267 // Integers vector mul (sse4_1) 6268 instruct vmul2I(vecD dst, vecD src) %{ 6269 predicate(UseSSE > 3 && n->as_Vector()->length() == 2); 6270 match(Set dst (MulVI dst src)); 6271 format %{ "pmulld $dst,$src\t! mul packed2I" %} 6272 ins_encode %{ 6273 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 6274 %} 6275 ins_pipe( pipe_slow ); 6276 %} 6277 6278 instruct vmul2I_reg(vecD dst, vecD src1, vecD src2) %{ 6279 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6280 match(Set dst (MulVI src1 src2)); 6281 format %{ "vpmulld $dst,$src1,$src2\t! mul packed2I" %} 6282 ins_encode %{ 6283 int vector_len = 0; 6284 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6285 %} 6286 ins_pipe( pipe_slow ); 6287 %} 6288 6289 instruct vmul2L_reg(vecX dst, vecX src1, vecX src2) %{ 6290 predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && VM_Version::supports_avx512dq()); 6291 match(Set dst (MulVL src1 src2)); 6292 format %{ "vpmullq $dst,$src1,$src2\t! mul packed2L" %} 6293 ins_encode %{ 6294 int vector_len = 0; 6295 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6296 %} 6297 ins_pipe( pipe_slow ); 6298 %} 6299 6300 instruct vmul4I(vecX dst, vecX src) %{ 6301 predicate(UseSSE > 3 && n->as_Vector()->length() == 4); 6302 match(Set dst (MulVI dst src)); 6303 format %{ "pmulld $dst,$src\t! mul packed4I" %} 6304 ins_encode %{ 6305 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 6306 %} 6307 ins_pipe( pipe_slow ); 6308 %} 6309 6310 instruct vmul4I_reg(vecX dst, vecX src1, vecX src2) %{ 6311 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6312 match(Set dst (MulVI src1 src2)); 6313 format %{ "vpmulld $dst,$src1,$src2\t! mul packed4I" %} 6314 ins_encode %{ 6315 int vector_len = 0; 6316 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6317 %} 6318 ins_pipe( pipe_slow ); 6319 %} 6320 6321 instruct vmul4I_mem(vecX dst, vecX src, memory mem) %{ 6322 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6323 match(Set dst (MulVI src (LoadVector mem))); 6324 format %{ "vpmulld $dst,$src,$mem\t! mul packed4I" %} 6325 ins_encode %{ 6326 int vector_len = 0; 6327 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6328 %} 6329 ins_pipe( pipe_slow ); 6330 %} 6331 6332 instruct vmul4L_reg(vecY dst, vecY src1, vecY src2) %{ 6333 predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && VM_Version::supports_avx512dq()); 6334 match(Set dst (MulVL src1 src2)); 6335 format %{ "vpmullq $dst,$src1,$src2\t! mul packed4L" %} 6336 ins_encode %{ 6337 int vector_len = 1; 6338 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6339 %} 6340 ins_pipe( pipe_slow ); 6341 %} 6342 6343 instruct vmul4L_mem(vecY dst, vecY src, memory mem) %{ 6344 predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && VM_Version::supports_avx512dq()); 6345 match(Set dst (MulVL src (LoadVector mem))); 6346 format %{ "vpmullq $dst,$src,$mem\t! mul packed4L" %} 6347 ins_encode %{ 6348 int vector_len = 1; 6349 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6350 %} 6351 ins_pipe( pipe_slow ); 6352 %} 6353 6354 instruct vmul8I_reg(vecY dst, vecY src1, vecY src2) %{ 6355 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 6356 match(Set dst (MulVI src1 src2)); 6357 format %{ "vpmulld $dst,$src1,$src2\t! mul packed8I" %} 6358 ins_encode %{ 6359 int vector_len = 1; 6360 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6361 %} 6362 ins_pipe( pipe_slow ); 6363 %} 6364 6365 instruct vmul8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6366 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq()); 6367 match(Set dst (MulVL src1 src2)); 6368 format %{ "vpmullq $dst,$src1,$src2\t! mul packed8L" %} 6369 ins_encode %{ 6370 int vector_len = 2; 6371 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6372 %} 6373 ins_pipe( pipe_slow ); 6374 %} 6375 6376 instruct vmul16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6377 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6378 match(Set dst (MulVI src1 src2)); 6379 format %{ "vpmulld $dst,$src1,$src2\t! mul packed16I" %} 6380 ins_encode %{ 6381 int vector_len = 2; 6382 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6383 %} 6384 ins_pipe( pipe_slow ); 6385 %} 6386 6387 instruct vmul8I_mem(vecY dst, vecY src, memory mem) %{ 6388 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 6389 match(Set dst (MulVI src (LoadVector mem))); 6390 format %{ "vpmulld $dst,$src,$mem\t! mul packed8I" %} 6391 ins_encode %{ 6392 int vector_len = 1; 6393 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6394 %} 6395 ins_pipe( pipe_slow ); 6396 %} 6397 6398 instruct vmul8L_mem(vecZ dst, vecZ src, memory mem) %{ 6399 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq()); 6400 match(Set dst (MulVL src (LoadVector mem))); 6401 format %{ "vpmullq $dst,$src,$mem\t! mul packed8L" %} 6402 ins_encode %{ 6403 int vector_len = 2; 6404 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6405 %} 6406 ins_pipe( pipe_slow ); 6407 %} 6408 6409 instruct vmul16I_mem(vecZ dst, vecZ src, memory mem) %{ 6410 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6411 match(Set dst (MulVI src (LoadVector mem))); 6412 format %{ "vpmulld $dst,$src,$mem\t! mul packed16I" %} 6413 ins_encode %{ 6414 int vector_len = 2; 6415 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6416 %} 6417 ins_pipe( pipe_slow ); 6418 %} 6419 6420 // Floats vector mul 6421 instruct vmul2F(vecD dst, vecD src) %{ 6422 predicate(n->as_Vector()->length() == 2); 6423 match(Set dst (MulVF dst src)); 6424 format %{ "mulps $dst,$src\t! mul packed2F" %} 6425 ins_encode %{ 6426 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 6427 %} 6428 ins_pipe( pipe_slow ); 6429 %} 6430 6431 instruct vmul2F_reg(vecD dst, vecD src1, vecD src2) %{ 6432 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6433 match(Set dst (MulVF src1 src2)); 6434 format %{ "vmulps $dst,$src1,$src2\t! mul packed2F" %} 6435 ins_encode %{ 6436 int vector_len = 0; 6437 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6438 %} 6439 ins_pipe( pipe_slow ); 6440 %} 6441 6442 instruct vmul4F(vecX dst, vecX src) %{ 6443 predicate(n->as_Vector()->length() == 4); 6444 match(Set dst (MulVF dst src)); 6445 format %{ "mulps $dst,$src\t! mul packed4F" %} 6446 ins_encode %{ 6447 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 6448 %} 6449 ins_pipe( pipe_slow ); 6450 %} 6451 6452 instruct vmul4F_reg(vecX dst, vecX src1, vecX src2) %{ 6453 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6454 match(Set dst (MulVF src1 src2)); 6455 format %{ "vmulps $dst,$src1,$src2\t! mul packed4F" %} 6456 ins_encode %{ 6457 int vector_len = 0; 6458 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6459 %} 6460 ins_pipe( pipe_slow ); 6461 %} 6462 6463 instruct vmul4F_mem(vecX dst, vecX src, memory mem) %{ 6464 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6465 match(Set dst (MulVF src (LoadVector mem))); 6466 format %{ "vmulps $dst,$src,$mem\t! mul packed4F" %} 6467 ins_encode %{ 6468 int vector_len = 0; 6469 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6470 %} 6471 ins_pipe( pipe_slow ); 6472 %} 6473 6474 instruct vmul8F_reg(vecY dst, vecY src1, vecY src2) %{ 6475 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6476 match(Set dst (MulVF src1 src2)); 6477 format %{ "vmulps $dst,$src1,$src2\t! mul packed8F" %} 6478 ins_encode %{ 6479 int vector_len = 1; 6480 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6481 %} 6482 ins_pipe( pipe_slow ); 6483 %} 6484 6485 instruct vmul8F_mem(vecY dst, vecY src, memory mem) %{ 6486 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6487 match(Set dst (MulVF src (LoadVector mem))); 6488 format %{ "vmulps $dst,$src,$mem\t! mul packed8F" %} 6489 ins_encode %{ 6490 int vector_len = 1; 6491 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6492 %} 6493 ins_pipe( pipe_slow ); 6494 %} 6495 6496 instruct vmul16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6497 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6498 match(Set dst (MulVF src1 src2)); 6499 format %{ "vmulps $dst,$src1,$src2\t! mul packed16F" %} 6500 ins_encode %{ 6501 int vector_len = 2; 6502 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6503 %} 6504 ins_pipe( pipe_slow ); 6505 %} 6506 6507 instruct vmul16F_mem(vecZ dst, vecZ src, memory mem) %{ 6508 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6509 match(Set dst (MulVF src (LoadVector mem))); 6510 format %{ "vmulps $dst,$src,$mem\t! mul packed16F" %} 6511 ins_encode %{ 6512 int vector_len = 2; 6513 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6514 %} 6515 ins_pipe( pipe_slow ); 6516 %} 6517 6518 // Doubles vector mul 6519 instruct vmul2D(vecX dst, vecX src) %{ 6520 predicate(n->as_Vector()->length() == 2); 6521 match(Set dst (MulVD dst src)); 6522 format %{ "mulpd $dst,$src\t! mul packed2D" %} 6523 ins_encode %{ 6524 __ mulpd($dst$$XMMRegister, $src$$XMMRegister); 6525 %} 6526 ins_pipe( pipe_slow ); 6527 %} 6528 6529 instruct vmul2D_reg(vecX dst, vecX src1, vecX src2) %{ 6530 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6531 match(Set dst (MulVD src1 src2)); 6532 format %{ "vmulpd $dst,$src1,$src2\t! mul packed2D" %} 6533 ins_encode %{ 6534 int vector_len = 0; 6535 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6536 %} 6537 ins_pipe( pipe_slow ); 6538 %} 6539 6540 instruct vmul2D_mem(vecX dst, vecX src, memory mem) %{ 6541 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6542 match(Set dst (MulVD src (LoadVector mem))); 6543 format %{ "vmulpd $dst,$src,$mem\t! mul packed2D" %} 6544 ins_encode %{ 6545 int vector_len = 0; 6546 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6547 %} 6548 ins_pipe( pipe_slow ); 6549 %} 6550 6551 instruct vmul4D_reg(vecY dst, vecY src1, vecY src2) %{ 6552 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6553 match(Set dst (MulVD src1 src2)); 6554 format %{ "vmulpd $dst,$src1,$src2\t! mul packed4D" %} 6555 ins_encode %{ 6556 int vector_len = 1; 6557 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6558 %} 6559 ins_pipe( pipe_slow ); 6560 %} 6561 6562 instruct vmul4D_mem(vecY dst, vecY src, memory mem) %{ 6563 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6564 match(Set dst (MulVD src (LoadVector mem))); 6565 format %{ "vmulpd $dst,$src,$mem\t! mul packed4D" %} 6566 ins_encode %{ 6567 int vector_len = 1; 6568 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6569 %} 6570 ins_pipe( pipe_slow ); 6571 %} 6572 6573 instruct vmul8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6574 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6575 match(Set dst (MulVD src1 src2)); 6576 format %{ "vmulpd $dst k0,$src1,$src2\t! mul packed8D" %} 6577 ins_encode %{ 6578 int vector_len = 2; 6579 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6580 %} 6581 ins_pipe( pipe_slow ); 6582 %} 6583 6584 instruct vmul8D_mem(vecZ dst, vecZ src, memory mem) %{ 6585 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6586 match(Set dst (MulVD src (LoadVector mem))); 6587 format %{ "vmulpd $dst k0,$src,$mem\t! mul packed8D" %} 6588 ins_encode %{ 6589 int vector_len = 2; 6590 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6591 %} 6592 ins_pipe( pipe_slow ); 6593 %} 6594 6595 // --------------------------------- DIV -------------------------------------- 6596 6597 // Floats vector div 6598 instruct vdiv2F(vecD dst, vecD src) %{ 6599 predicate(n->as_Vector()->length() == 2); 6600 match(Set dst (DivVF dst src)); 6601 format %{ "divps $dst,$src\t! div packed2F" %} 6602 ins_encode %{ 6603 __ divps($dst$$XMMRegister, $src$$XMMRegister); 6604 %} 6605 ins_pipe( pipe_slow ); 6606 %} 6607 6608 instruct vdiv2F_reg(vecD dst, vecD src1, vecD src2) %{ 6609 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6610 match(Set dst (DivVF src1 src2)); 6611 format %{ "vdivps $dst,$src1,$src2\t! div packed2F" %} 6612 ins_encode %{ 6613 int vector_len = 0; 6614 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6615 %} 6616 ins_pipe( pipe_slow ); 6617 %} 6618 6619 instruct vdiv4F(vecX dst, vecX src) %{ 6620 predicate(n->as_Vector()->length() == 4); 6621 match(Set dst (DivVF dst src)); 6622 format %{ "divps $dst,$src\t! div packed4F" %} 6623 ins_encode %{ 6624 __ divps($dst$$XMMRegister, $src$$XMMRegister); 6625 %} 6626 ins_pipe( pipe_slow ); 6627 %} 6628 6629 instruct vdiv4F_reg(vecX dst, vecX src1, vecX src2) %{ 6630 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6631 match(Set dst (DivVF src1 src2)); 6632 format %{ "vdivps $dst,$src1,$src2\t! div packed4F" %} 6633 ins_encode %{ 6634 int vector_len = 0; 6635 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6636 %} 6637 ins_pipe( pipe_slow ); 6638 %} 6639 6640 instruct vdiv4F_mem(vecX dst, vecX src, memory mem) %{ 6641 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6642 match(Set dst (DivVF src (LoadVector mem))); 6643 format %{ "vdivps $dst,$src,$mem\t! div packed4F" %} 6644 ins_encode %{ 6645 int vector_len = 0; 6646 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6647 %} 6648 ins_pipe( pipe_slow ); 6649 %} 6650 6651 instruct vdiv8F_reg(vecY dst, vecY src1, vecY src2) %{ 6652 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6653 match(Set dst (DivVF src1 src2)); 6654 format %{ "vdivps $dst,$src1,$src2\t! div packed8F" %} 6655 ins_encode %{ 6656 int vector_len = 1; 6657 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6658 %} 6659 ins_pipe( pipe_slow ); 6660 %} 6661 6662 instruct vdiv8F_mem(vecY dst, vecY src, memory mem) %{ 6663 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6664 match(Set dst (DivVF src (LoadVector mem))); 6665 format %{ "vdivps $dst,$src,$mem\t! div packed8F" %} 6666 ins_encode %{ 6667 int vector_len = 1; 6668 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6669 %} 6670 ins_pipe( pipe_slow ); 6671 %} 6672 6673 instruct vdiv16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6674 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 6675 match(Set dst (DivVF src1 src2)); 6676 format %{ "vdivps $dst,$src1,$src2\t! div packed16F" %} 6677 ins_encode %{ 6678 int vector_len = 2; 6679 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6680 %} 6681 ins_pipe( pipe_slow ); 6682 %} 6683 6684 instruct vdiv16F_mem(vecZ dst, vecZ src, memory mem) %{ 6685 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 6686 match(Set dst (DivVF src (LoadVector mem))); 6687 format %{ "vdivps $dst,$src,$mem\t! div packed16F" %} 6688 ins_encode %{ 6689 int vector_len = 2; 6690 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6691 %} 6692 ins_pipe( pipe_slow ); 6693 %} 6694 6695 // Doubles vector div 6696 instruct vdiv2D(vecX dst, vecX src) %{ 6697 predicate(n->as_Vector()->length() == 2); 6698 match(Set dst (DivVD dst src)); 6699 format %{ "divpd $dst,$src\t! div packed2D" %} 6700 ins_encode %{ 6701 __ divpd($dst$$XMMRegister, $src$$XMMRegister); 6702 %} 6703 ins_pipe( pipe_slow ); 6704 %} 6705 6706 instruct vdiv2D_reg(vecX dst, vecX src1, vecX src2) %{ 6707 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6708 match(Set dst (DivVD src1 src2)); 6709 format %{ "vdivpd $dst,$src1,$src2\t! div packed2D" %} 6710 ins_encode %{ 6711 int vector_len = 0; 6712 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6713 %} 6714 ins_pipe( pipe_slow ); 6715 %} 6716 6717 instruct vdiv2D_mem(vecX dst, vecX src, memory mem) %{ 6718 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6719 match(Set dst (DivVD src (LoadVector mem))); 6720 format %{ "vdivpd $dst,$src,$mem\t! div packed2D" %} 6721 ins_encode %{ 6722 int vector_len = 0; 6723 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6724 %} 6725 ins_pipe( pipe_slow ); 6726 %} 6727 6728 instruct vdiv4D_reg(vecY dst, vecY src1, vecY src2) %{ 6729 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6730 match(Set dst (DivVD src1 src2)); 6731 format %{ "vdivpd $dst,$src1,$src2\t! div packed4D" %} 6732 ins_encode %{ 6733 int vector_len = 1; 6734 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6735 %} 6736 ins_pipe( pipe_slow ); 6737 %} 6738 6739 instruct vdiv4D_mem(vecY dst, vecY src, memory mem) %{ 6740 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6741 match(Set dst (DivVD src (LoadVector mem))); 6742 format %{ "vdivpd $dst,$src,$mem\t! div packed4D" %} 6743 ins_encode %{ 6744 int vector_len = 1; 6745 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6746 %} 6747 ins_pipe( pipe_slow ); 6748 %} 6749 6750 instruct vdiv8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6751 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6752 match(Set dst (DivVD src1 src2)); 6753 format %{ "vdivpd $dst,$src1,$src2\t! div packed8D" %} 6754 ins_encode %{ 6755 int vector_len = 2; 6756 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6757 %} 6758 ins_pipe( pipe_slow ); 6759 %} 6760 6761 instruct vdiv8D_mem(vecZ dst, vecZ src, memory mem) %{ 6762 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6763 match(Set dst (DivVD src (LoadVector mem))); 6764 format %{ "vdivpd $dst,$src,$mem\t! div packed8D" %} 6765 ins_encode %{ 6766 int vector_len = 2; 6767 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6768 %} 6769 ins_pipe( pipe_slow ); 6770 %} 6771 6772 // ------------------------------ Shift --------------------------------------- 6773 6774 // Left and right shift count vectors are the same on x86 6775 // (only lowest bits of xmm reg are used for count). 6776 instruct vshiftcnt(vecS dst, rRegI cnt) %{ 6777 match(Set dst (LShiftCntV cnt)); 6778 match(Set dst (RShiftCntV cnt)); 6779 format %{ "movd $dst,$cnt\t! load shift count" %} 6780 ins_encode %{ 6781 __ movdl($dst$$XMMRegister, $cnt$$Register); 6782 %} 6783 ins_pipe( pipe_slow ); 6784 %} 6785 6786 // ------------------------------ LeftShift ----------------------------------- 6787 6788 // Shorts/Chars vector left shift 6789 instruct vsll2S(vecS dst, vecS shift) %{ 6790 predicate(n->as_Vector()->length() == 2); 6791 match(Set dst (LShiftVS dst shift)); 6792 format %{ "psllw $dst,$shift\t! left shift packed2S" %} 6793 ins_encode %{ 6794 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 6795 %} 6796 ins_pipe( pipe_slow ); 6797 %} 6798 6799 instruct vsll2S_imm(vecS dst, immI8 shift) %{ 6800 predicate(n->as_Vector()->length() == 2); 6801 match(Set dst (LShiftVS dst shift)); 6802 format %{ "psllw $dst,$shift\t! left shift packed2S" %} 6803 ins_encode %{ 6804 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 6805 %} 6806 ins_pipe( pipe_slow ); 6807 %} 6808 6809 instruct vsll2S_reg(vecS dst, vecS src, vecS shift) %{ 6810 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6811 match(Set dst (LShiftVS src shift)); 6812 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 6813 ins_encode %{ 6814 int vector_len = 0; 6815 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 6816 %} 6817 ins_pipe( pipe_slow ); 6818 %} 6819 6820 instruct vsll2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ 6821 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6822 match(Set dst (LShiftVS src shift)); 6823 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 6824 ins_encode %{ 6825 int vector_len = 0; 6826 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 6827 %} 6828 ins_pipe( pipe_slow ); 6829 %} 6830 6831 instruct vsll4S(vecD dst, vecS shift) %{ 6832 predicate(n->as_Vector()->length() == 4); 6833 match(Set dst (LShiftVS dst shift)); 6834 format %{ "psllw $dst,$shift\t! left shift packed4S" %} 6835 ins_encode %{ 6836 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 6837 %} 6838 ins_pipe( pipe_slow ); 6839 %} 6840 6841 instruct vsll4S_imm(vecD dst, immI8 shift) %{ 6842 predicate(n->as_Vector()->length() == 4); 6843 match(Set dst (LShiftVS dst shift)); 6844 format %{ "psllw $dst,$shift\t! left shift packed4S" %} 6845 ins_encode %{ 6846 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 6847 %} 6848 ins_pipe( pipe_slow ); 6849 %} 6850 6851 instruct vsll4S_reg(vecD dst, vecD src, vecS shift) %{ 6852 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6853 match(Set dst (LShiftVS src shift)); 6854 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 6855 ins_encode %{ 6856 int vector_len = 0; 6857 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 6858 %} 6859 ins_pipe( pipe_slow ); 6860 %} 6861 6862 instruct vsll4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ 6863 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6864 match(Set dst (LShiftVS src shift)); 6865 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 6866 ins_encode %{ 6867 int vector_len = 0; 6868 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 6869 %} 6870 ins_pipe( pipe_slow ); 6871 %} 6872 6873 instruct vsll8S(vecX dst, vecS shift) %{ 6874 predicate(n->as_Vector()->length() == 8); 6875 match(Set dst (LShiftVS dst shift)); 6876 format %{ "psllw $dst,$shift\t! left shift packed8S" %} 6877 ins_encode %{ 6878 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 6879 %} 6880 ins_pipe( pipe_slow ); 6881 %} 6882 6883 instruct vsll8S_imm(vecX dst, immI8 shift) %{ 6884 predicate(n->as_Vector()->length() == 8); 6885 match(Set dst (LShiftVS dst shift)); 6886 format %{ "psllw $dst,$shift\t! left shift packed8S" %} 6887 ins_encode %{ 6888 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 6889 %} 6890 ins_pipe( pipe_slow ); 6891 %} 6892 6893 instruct vsll8S_reg(vecX dst, vecX src, vecS shift) %{ 6894 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6895 match(Set dst (LShiftVS src shift)); 6896 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 6897 ins_encode %{ 6898 int vector_len = 0; 6899 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 6900 %} 6901 ins_pipe( pipe_slow ); 6902 %} 6903 6904 instruct vsll8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ 6905 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6906 match(Set dst (LShiftVS src shift)); 6907 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 6908 ins_encode %{ 6909 int vector_len = 0; 6910 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 6911 %} 6912 ins_pipe( pipe_slow ); 6913 %} 6914 6915 instruct vsll16S_reg(vecY dst, vecY src, vecS shift) %{ 6916 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 6917 match(Set dst (LShiftVS src shift)); 6918 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 6919 ins_encode %{ 6920 int vector_len = 1; 6921 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 6922 %} 6923 ins_pipe( pipe_slow ); 6924 %} 6925 6926 instruct vsll16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ 6927 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 6928 match(Set dst (LShiftVS src shift)); 6929 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 6930 ins_encode %{ 6931 int vector_len = 1; 6932 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 6933 %} 6934 ins_pipe( pipe_slow ); 6935 %} 6936 6937 instruct vsll32S_reg(vecZ dst, vecZ src, vecS shift) %{ 6938 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); 6939 match(Set dst (LShiftVS src shift)); 6940 format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %} 6941 ins_encode %{ 6942 int vector_len = 2; 6943 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 6944 %} 6945 ins_pipe( pipe_slow ); 6946 %} 6947 6948 instruct vsll32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 6949 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); 6950 match(Set dst (LShiftVS src shift)); 6951 format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %} 6952 ins_encode %{ 6953 int vector_len = 2; 6954 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 6955 %} 6956 ins_pipe( pipe_slow ); 6957 %} 6958 6959 // Integers vector left shift 6960 instruct vsll2I(vecD dst, vecS shift) %{ 6961 predicate(n->as_Vector()->length() == 2); 6962 match(Set dst (LShiftVI dst shift)); 6963 format %{ "pslld $dst,$shift\t! left shift packed2I" %} 6964 ins_encode %{ 6965 __ pslld($dst$$XMMRegister, $shift$$XMMRegister); 6966 %} 6967 ins_pipe( pipe_slow ); 6968 %} 6969 6970 instruct vsll2I_imm(vecD dst, immI8 shift) %{ 6971 predicate(n->as_Vector()->length() == 2); 6972 match(Set dst (LShiftVI dst shift)); 6973 format %{ "pslld $dst,$shift\t! left shift packed2I" %} 6974 ins_encode %{ 6975 __ pslld($dst$$XMMRegister, (int)$shift$$constant); 6976 %} 6977 ins_pipe( pipe_slow ); 6978 %} 6979 6980 instruct vsll2I_reg(vecD dst, vecD src, vecS shift) %{ 6981 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6982 match(Set dst (LShiftVI src shift)); 6983 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} 6984 ins_encode %{ 6985 int vector_len = 0; 6986 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 6987 %} 6988 ins_pipe( pipe_slow ); 6989 %} 6990 6991 instruct vsll2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 6992 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6993 match(Set dst (LShiftVI src shift)); 6994 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} 6995 ins_encode %{ 6996 int vector_len = 0; 6997 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 6998 %} 6999 ins_pipe( pipe_slow ); 7000 %} 7001 7002 instruct vsll4I(vecX dst, vecS shift) %{ 7003 predicate(n->as_Vector()->length() == 4); 7004 match(Set dst (LShiftVI dst shift)); 7005 format %{ "pslld $dst,$shift\t! left shift packed4I" %} 7006 ins_encode %{ 7007 __ pslld($dst$$XMMRegister, $shift$$XMMRegister); 7008 %} 7009 ins_pipe( pipe_slow ); 7010 %} 7011 7012 instruct vsll4I_imm(vecX dst, immI8 shift) %{ 7013 predicate(n->as_Vector()->length() == 4); 7014 match(Set dst (LShiftVI dst shift)); 7015 format %{ "pslld $dst,$shift\t! left shift packed4I" %} 7016 ins_encode %{ 7017 __ pslld($dst$$XMMRegister, (int)$shift$$constant); 7018 %} 7019 ins_pipe( pipe_slow ); 7020 %} 7021 7022 instruct vsll4I_reg(vecX dst, vecX src, vecS shift) %{ 7023 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7024 match(Set dst (LShiftVI src shift)); 7025 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} 7026 ins_encode %{ 7027 int vector_len = 0; 7028 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7029 %} 7030 ins_pipe( pipe_slow ); 7031 %} 7032 7033 instruct vsll4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 7034 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7035 match(Set dst (LShiftVI src shift)); 7036 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} 7037 ins_encode %{ 7038 int vector_len = 0; 7039 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7040 %} 7041 ins_pipe( pipe_slow ); 7042 %} 7043 7044 instruct vsll8I_reg(vecY dst, vecY src, vecS shift) %{ 7045 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 7046 match(Set dst (LShiftVI src shift)); 7047 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} 7048 ins_encode %{ 7049 int vector_len = 1; 7050 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7051 %} 7052 ins_pipe( pipe_slow ); 7053 %} 7054 7055 instruct vsll8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 7056 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 7057 match(Set dst (LShiftVI src shift)); 7058 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} 7059 ins_encode %{ 7060 int vector_len = 1; 7061 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7062 %} 7063 ins_pipe( pipe_slow ); 7064 %} 7065 7066 instruct vsll16I_reg(vecZ dst, vecZ src, vecS shift) %{ 7067 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7068 match(Set dst (LShiftVI src shift)); 7069 format %{ "vpslld $dst,$src,$shift\t! left shift packed16I" %} 7070 ins_encode %{ 7071 int vector_len = 2; 7072 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7073 %} 7074 ins_pipe( pipe_slow ); 7075 %} 7076 7077 instruct vsll16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 7078 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7079 match(Set dst (LShiftVI src shift)); 7080 format %{ "vpslld $dst,$src,$shift\t! left shift packed16I" %} 7081 ins_encode %{ 7082 int vector_len = 2; 7083 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7084 %} 7085 ins_pipe( pipe_slow ); 7086 %} 7087 7088 // Longs vector left shift 7089 instruct vsll2L(vecX dst, vecS shift) %{ 7090 predicate(n->as_Vector()->length() == 2); 7091 match(Set dst (LShiftVL dst shift)); 7092 format %{ "psllq $dst,$shift\t! left shift packed2L" %} 7093 ins_encode %{ 7094 __ psllq($dst$$XMMRegister, $shift$$XMMRegister); 7095 %} 7096 ins_pipe( pipe_slow ); 7097 %} 7098 7099 instruct vsll2L_imm(vecX dst, immI8 shift) %{ 7100 predicate(n->as_Vector()->length() == 2); 7101 match(Set dst (LShiftVL dst shift)); 7102 format %{ "psllq $dst,$shift\t! left shift packed2L" %} 7103 ins_encode %{ 7104 __ psllq($dst$$XMMRegister, (int)$shift$$constant); 7105 %} 7106 ins_pipe( pipe_slow ); 7107 %} 7108 7109 instruct vsll2L_reg(vecX dst, vecX src, vecS shift) %{ 7110 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7111 match(Set dst (LShiftVL src shift)); 7112 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} 7113 ins_encode %{ 7114 int vector_len = 0; 7115 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7116 %} 7117 ins_pipe( pipe_slow ); 7118 %} 7119 7120 instruct vsll2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ 7121 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7122 match(Set dst (LShiftVL src shift)); 7123 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} 7124 ins_encode %{ 7125 int vector_len = 0; 7126 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7127 %} 7128 ins_pipe( pipe_slow ); 7129 %} 7130 7131 instruct vsll4L_reg(vecY dst, vecY src, vecS shift) %{ 7132 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 7133 match(Set dst (LShiftVL src shift)); 7134 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} 7135 ins_encode %{ 7136 int vector_len = 1; 7137 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7138 %} 7139 ins_pipe( pipe_slow ); 7140 %} 7141 7142 instruct vsll4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ 7143 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 7144 match(Set dst (LShiftVL src shift)); 7145 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} 7146 ins_encode %{ 7147 int vector_len = 1; 7148 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7149 %} 7150 ins_pipe( pipe_slow ); 7151 %} 7152 7153 instruct vsll8L_reg(vecZ dst, vecZ src, vecS shift) %{ 7154 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7155 match(Set dst (LShiftVL src shift)); 7156 format %{ "vpsllq $dst,$src,$shift\t! left shift packed8L" %} 7157 ins_encode %{ 7158 int vector_len = 2; 7159 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7160 %} 7161 ins_pipe( pipe_slow ); 7162 %} 7163 7164 instruct vsll8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 7165 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7166 match(Set dst (LShiftVL src shift)); 7167 format %{ "vpsllq $dst,$src,$shift\t! left shift packed8L" %} 7168 ins_encode %{ 7169 int vector_len = 2; 7170 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7171 %} 7172 ins_pipe( pipe_slow ); 7173 %} 7174 7175 // ----------------------- LogicalRightShift ----------------------------------- 7176 7177 // Shorts vector logical right shift produces incorrect Java result 7178 // for negative data because java code convert short value into int with 7179 // sign extension before a shift. But char vectors are fine since chars are 7180 // unsigned values. 7181 7182 instruct vsrl2S(vecS dst, vecS shift) %{ 7183 predicate(n->as_Vector()->length() == 2); 7184 match(Set dst (URShiftVS dst shift)); 7185 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} 7186 ins_encode %{ 7187 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 7188 %} 7189 ins_pipe( pipe_slow ); 7190 %} 7191 7192 instruct vsrl2S_imm(vecS dst, immI8 shift) %{ 7193 predicate(n->as_Vector()->length() == 2); 7194 match(Set dst (URShiftVS dst shift)); 7195 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} 7196 ins_encode %{ 7197 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 7198 %} 7199 ins_pipe( pipe_slow ); 7200 %} 7201 7202 instruct vsrl2S_reg(vecS dst, vecS src, vecS shift) %{ 7203 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7204 match(Set dst (URShiftVS src shift)); 7205 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 7206 ins_encode %{ 7207 int vector_len = 0; 7208 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7209 %} 7210 ins_pipe( pipe_slow ); 7211 %} 7212 7213 instruct vsrl2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ 7214 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7215 match(Set dst (URShiftVS src shift)); 7216 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 7217 ins_encode %{ 7218 int vector_len = 0; 7219 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7220 %} 7221 ins_pipe( pipe_slow ); 7222 %} 7223 7224 instruct vsrl4S(vecD dst, vecS shift) %{ 7225 predicate(n->as_Vector()->length() == 4); 7226 match(Set dst (URShiftVS dst shift)); 7227 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} 7228 ins_encode %{ 7229 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 7230 %} 7231 ins_pipe( pipe_slow ); 7232 %} 7233 7234 instruct vsrl4S_imm(vecD dst, immI8 shift) %{ 7235 predicate(n->as_Vector()->length() == 4); 7236 match(Set dst (URShiftVS dst shift)); 7237 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} 7238 ins_encode %{ 7239 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 7240 %} 7241 ins_pipe( pipe_slow ); 7242 %} 7243 7244 instruct vsrl4S_reg(vecD dst, vecD src, vecS shift) %{ 7245 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7246 match(Set dst (URShiftVS src shift)); 7247 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 7248 ins_encode %{ 7249 int vector_len = 0; 7250 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7251 %} 7252 ins_pipe( pipe_slow ); 7253 %} 7254 7255 instruct vsrl4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ 7256 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7257 match(Set dst (URShiftVS src shift)); 7258 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 7259 ins_encode %{ 7260 int vector_len = 0; 7261 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7262 %} 7263 ins_pipe( pipe_slow ); 7264 %} 7265 7266 instruct vsrl8S(vecX dst, vecS shift) %{ 7267 predicate(n->as_Vector()->length() == 8); 7268 match(Set dst (URShiftVS dst shift)); 7269 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} 7270 ins_encode %{ 7271 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 7272 %} 7273 ins_pipe( pipe_slow ); 7274 %} 7275 7276 instruct vsrl8S_imm(vecX dst, immI8 shift) %{ 7277 predicate(n->as_Vector()->length() == 8); 7278 match(Set dst (URShiftVS dst shift)); 7279 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} 7280 ins_encode %{ 7281 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 7282 %} 7283 ins_pipe( pipe_slow ); 7284 %} 7285 7286 instruct vsrl8S_reg(vecX dst, vecX src, vecS shift) %{ 7287 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7288 match(Set dst (URShiftVS src shift)); 7289 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 7290 ins_encode %{ 7291 int vector_len = 0; 7292 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7293 %} 7294 ins_pipe( pipe_slow ); 7295 %} 7296 7297 instruct vsrl8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ 7298 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7299 match(Set dst (URShiftVS src shift)); 7300 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 7301 ins_encode %{ 7302 int vector_len = 0; 7303 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7304 %} 7305 ins_pipe( pipe_slow ); 7306 %} 7307 7308 instruct vsrl16S_reg(vecY dst, vecY src, vecS shift) %{ 7309 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 7310 match(Set dst (URShiftVS src shift)); 7311 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 7312 ins_encode %{ 7313 int vector_len = 1; 7314 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7315 %} 7316 ins_pipe( pipe_slow ); 7317 %} 7318 7319 instruct vsrl16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ 7320 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 7321 match(Set dst (URShiftVS src shift)); 7322 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 7323 ins_encode %{ 7324 int vector_len = 1; 7325 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7326 %} 7327 ins_pipe( pipe_slow ); 7328 %} 7329 7330 instruct vsrl32S_reg(vecZ dst, vecZ src, vecS shift) %{ 7331 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); 7332 match(Set dst (URShiftVS src shift)); 7333 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %} 7334 ins_encode %{ 7335 int vector_len = 2; 7336 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7337 %} 7338 ins_pipe( pipe_slow ); 7339 %} 7340 7341 instruct vsrl32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 7342 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); 7343 match(Set dst (URShiftVS src shift)); 7344 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %} 7345 ins_encode %{ 7346 int vector_len = 2; 7347 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7348 %} 7349 ins_pipe( pipe_slow ); 7350 %} 7351 7352 // Integers vector logical right shift 7353 instruct vsrl2I(vecD dst, vecS shift) %{ 7354 predicate(n->as_Vector()->length() == 2); 7355 match(Set dst (URShiftVI dst shift)); 7356 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} 7357 ins_encode %{ 7358 __ psrld($dst$$XMMRegister, $shift$$XMMRegister); 7359 %} 7360 ins_pipe( pipe_slow ); 7361 %} 7362 7363 instruct vsrl2I_imm(vecD dst, immI8 shift) %{ 7364 predicate(n->as_Vector()->length() == 2); 7365 match(Set dst (URShiftVI dst shift)); 7366 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} 7367 ins_encode %{ 7368 __ psrld($dst$$XMMRegister, (int)$shift$$constant); 7369 %} 7370 ins_pipe( pipe_slow ); 7371 %} 7372 7373 instruct vsrl2I_reg(vecD dst, vecD src, vecS shift) %{ 7374 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7375 match(Set dst (URShiftVI src shift)); 7376 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} 7377 ins_encode %{ 7378 int vector_len = 0; 7379 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7380 %} 7381 ins_pipe( pipe_slow ); 7382 %} 7383 7384 instruct vsrl2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 7385 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7386 match(Set dst (URShiftVI src shift)); 7387 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} 7388 ins_encode %{ 7389 int vector_len = 0; 7390 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7391 %} 7392 ins_pipe( pipe_slow ); 7393 %} 7394 7395 instruct vsrl4I(vecX dst, vecS shift) %{ 7396 predicate(n->as_Vector()->length() == 4); 7397 match(Set dst (URShiftVI dst shift)); 7398 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} 7399 ins_encode %{ 7400 __ psrld($dst$$XMMRegister, $shift$$XMMRegister); 7401 %} 7402 ins_pipe( pipe_slow ); 7403 %} 7404 7405 instruct vsrl4I_imm(vecX dst, immI8 shift) %{ 7406 predicate(n->as_Vector()->length() == 4); 7407 match(Set dst (URShiftVI dst shift)); 7408 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} 7409 ins_encode %{ 7410 __ psrld($dst$$XMMRegister, (int)$shift$$constant); 7411 %} 7412 ins_pipe( pipe_slow ); 7413 %} 7414 7415 instruct vsrl4I_reg(vecX dst, vecX src, vecS shift) %{ 7416 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7417 match(Set dst (URShiftVI src shift)); 7418 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} 7419 ins_encode %{ 7420 int vector_len = 0; 7421 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7422 %} 7423 ins_pipe( pipe_slow ); 7424 %} 7425 7426 instruct vsrl4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 7427 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7428 match(Set dst (URShiftVI src shift)); 7429 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} 7430 ins_encode %{ 7431 int vector_len = 0; 7432 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7433 %} 7434 ins_pipe( pipe_slow ); 7435 %} 7436 7437 instruct vsrl8I_reg(vecY dst, vecY src, vecS shift) %{ 7438 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 7439 match(Set dst (URShiftVI src shift)); 7440 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} 7441 ins_encode %{ 7442 int vector_len = 1; 7443 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7444 %} 7445 ins_pipe( pipe_slow ); 7446 %} 7447 7448 instruct vsrl8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 7449 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 7450 match(Set dst (URShiftVI src shift)); 7451 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} 7452 ins_encode %{ 7453 int vector_len = 1; 7454 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7455 %} 7456 ins_pipe( pipe_slow ); 7457 %} 7458 7459 instruct vsrl16I_reg(vecZ dst, vecZ src, vecS shift) %{ 7460 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7461 match(Set dst (URShiftVI src shift)); 7462 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed16I" %} 7463 ins_encode %{ 7464 int vector_len = 2; 7465 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7466 %} 7467 ins_pipe( pipe_slow ); 7468 %} 7469 7470 instruct vsrl16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 7471 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7472 match(Set dst (URShiftVI src shift)); 7473 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed16I" %} 7474 ins_encode %{ 7475 int vector_len = 2; 7476 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7477 %} 7478 ins_pipe( pipe_slow ); 7479 %} 7480 7481 // Longs vector logical right shift 7482 instruct vsrl2L(vecX dst, vecS shift) %{ 7483 predicate(n->as_Vector()->length() == 2); 7484 match(Set dst (URShiftVL dst shift)); 7485 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} 7486 ins_encode %{ 7487 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister); 7488 %} 7489 ins_pipe( pipe_slow ); 7490 %} 7491 7492 instruct vsrl2L_imm(vecX dst, immI8 shift) %{ 7493 predicate(n->as_Vector()->length() == 2); 7494 match(Set dst (URShiftVL dst shift)); 7495 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} 7496 ins_encode %{ 7497 __ psrlq($dst$$XMMRegister, (int)$shift$$constant); 7498 %} 7499 ins_pipe( pipe_slow ); 7500 %} 7501 7502 instruct vsrl2L_reg(vecX dst, vecX src, vecS shift) %{ 7503 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7504 match(Set dst (URShiftVL src shift)); 7505 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} 7506 ins_encode %{ 7507 int vector_len = 0; 7508 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7509 %} 7510 ins_pipe( pipe_slow ); 7511 %} 7512 7513 instruct vsrl2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ 7514 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7515 match(Set dst (URShiftVL src shift)); 7516 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} 7517 ins_encode %{ 7518 int vector_len = 0; 7519 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7520 %} 7521 ins_pipe( pipe_slow ); 7522 %} 7523 7524 instruct vsrl4L_reg(vecY dst, vecY src, vecS shift) %{ 7525 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 7526 match(Set dst (URShiftVL src shift)); 7527 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} 7528 ins_encode %{ 7529 int vector_len = 1; 7530 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7531 %} 7532 ins_pipe( pipe_slow ); 7533 %} 7534 7535 instruct vsrl4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ 7536 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 7537 match(Set dst (URShiftVL src shift)); 7538 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} 7539 ins_encode %{ 7540 int vector_len = 1; 7541 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7542 %} 7543 ins_pipe( pipe_slow ); 7544 %} 7545 7546 instruct vsrl8L_reg(vecZ dst, vecZ src, vecS shift) %{ 7547 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7548 match(Set dst (URShiftVL src shift)); 7549 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed8L" %} 7550 ins_encode %{ 7551 int vector_len = 2; 7552 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7553 %} 7554 ins_pipe( pipe_slow ); 7555 %} 7556 7557 instruct vsrl8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 7558 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7559 match(Set dst (URShiftVL src shift)); 7560 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed8L" %} 7561 ins_encode %{ 7562 int vector_len = 2; 7563 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7564 %} 7565 ins_pipe( pipe_slow ); 7566 %} 7567 7568 // ------------------- ArithmeticRightShift ----------------------------------- 7569 7570 // Shorts/Chars vector arithmetic right shift 7571 instruct vsra2S(vecS dst, vecS shift) %{ 7572 predicate(n->as_Vector()->length() == 2); 7573 match(Set dst (RShiftVS dst shift)); 7574 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} 7575 ins_encode %{ 7576 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 7577 %} 7578 ins_pipe( pipe_slow ); 7579 %} 7580 7581 instruct vsra2S_imm(vecS dst, immI8 shift) %{ 7582 predicate(n->as_Vector()->length() == 2); 7583 match(Set dst (RShiftVS dst shift)); 7584 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} 7585 ins_encode %{ 7586 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 7587 %} 7588 ins_pipe( pipe_slow ); 7589 %} 7590 7591 instruct vsra2S_reg(vecS dst, vecS src, vecS shift) %{ 7592 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7593 match(Set dst (RShiftVS src shift)); 7594 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 7595 ins_encode %{ 7596 int vector_len = 0; 7597 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7598 %} 7599 ins_pipe( pipe_slow ); 7600 %} 7601 7602 instruct vsra2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ 7603 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7604 match(Set dst (RShiftVS src shift)); 7605 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 7606 ins_encode %{ 7607 int vector_len = 0; 7608 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7609 %} 7610 ins_pipe( pipe_slow ); 7611 %} 7612 7613 instruct vsra4S(vecD dst, vecS shift) %{ 7614 predicate(n->as_Vector()->length() == 4); 7615 match(Set dst (RShiftVS dst shift)); 7616 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} 7617 ins_encode %{ 7618 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 7619 %} 7620 ins_pipe( pipe_slow ); 7621 %} 7622 7623 instruct vsra4S_imm(vecD dst, immI8 shift) %{ 7624 predicate(n->as_Vector()->length() == 4); 7625 match(Set dst (RShiftVS dst shift)); 7626 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} 7627 ins_encode %{ 7628 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 7629 %} 7630 ins_pipe( pipe_slow ); 7631 %} 7632 7633 instruct vsra4S_reg(vecD dst, vecD src, vecS shift) %{ 7634 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7635 match(Set dst (RShiftVS src shift)); 7636 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 7637 ins_encode %{ 7638 int vector_len = 0; 7639 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7640 %} 7641 ins_pipe( pipe_slow ); 7642 %} 7643 7644 instruct vsra4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ 7645 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7646 match(Set dst (RShiftVS src shift)); 7647 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 7648 ins_encode %{ 7649 int vector_len = 0; 7650 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7651 %} 7652 ins_pipe( pipe_slow ); 7653 %} 7654 7655 instruct vsra8S(vecX dst, vecS shift) %{ 7656 predicate(n->as_Vector()->length() == 8); 7657 match(Set dst (RShiftVS dst shift)); 7658 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} 7659 ins_encode %{ 7660 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 7661 %} 7662 ins_pipe( pipe_slow ); 7663 %} 7664 7665 instruct vsra8S_imm(vecX dst, immI8 shift) %{ 7666 predicate(n->as_Vector()->length() == 8); 7667 match(Set dst (RShiftVS dst shift)); 7668 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} 7669 ins_encode %{ 7670 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 7671 %} 7672 ins_pipe( pipe_slow ); 7673 %} 7674 7675 instruct vsra8S_reg(vecX dst, vecX src, vecS shift) %{ 7676 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7677 match(Set dst (RShiftVS src shift)); 7678 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 7679 ins_encode %{ 7680 int vector_len = 0; 7681 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7682 %} 7683 ins_pipe( pipe_slow ); 7684 %} 7685 7686 instruct vsra8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ 7687 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7688 match(Set dst (RShiftVS src shift)); 7689 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 7690 ins_encode %{ 7691 int vector_len = 0; 7692 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7693 %} 7694 ins_pipe( pipe_slow ); 7695 %} 7696 7697 instruct vsra16S_reg(vecY dst, vecY src, vecS shift) %{ 7698 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 7699 match(Set dst (RShiftVS src shift)); 7700 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 7701 ins_encode %{ 7702 int vector_len = 1; 7703 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7704 %} 7705 ins_pipe( pipe_slow ); 7706 %} 7707 7708 instruct vsra16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ 7709 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 7710 match(Set dst (RShiftVS src shift)); 7711 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 7712 ins_encode %{ 7713 int vector_len = 1; 7714 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7715 %} 7716 ins_pipe( pipe_slow ); 7717 %} 7718 7719 instruct vsra32S_reg(vecZ dst, vecZ src, vecS shift) %{ 7720 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); 7721 match(Set dst (RShiftVS src shift)); 7722 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %} 7723 ins_encode %{ 7724 int vector_len = 2; 7725 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7726 %} 7727 ins_pipe( pipe_slow ); 7728 %} 7729 7730 instruct vsra32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 7731 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); 7732 match(Set dst (RShiftVS src shift)); 7733 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %} 7734 ins_encode %{ 7735 int vector_len = 2; 7736 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7737 %} 7738 ins_pipe( pipe_slow ); 7739 %} 7740 7741 // Integers vector arithmetic right shift 7742 instruct vsra2I(vecD dst, vecS shift) %{ 7743 predicate(n->as_Vector()->length() == 2); 7744 match(Set dst (RShiftVI dst shift)); 7745 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} 7746 ins_encode %{ 7747 __ psrad($dst$$XMMRegister, $shift$$XMMRegister); 7748 %} 7749 ins_pipe( pipe_slow ); 7750 %} 7751 7752 instruct vsra2I_imm(vecD dst, immI8 shift) %{ 7753 predicate(n->as_Vector()->length() == 2); 7754 match(Set dst (RShiftVI dst shift)); 7755 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} 7756 ins_encode %{ 7757 __ psrad($dst$$XMMRegister, (int)$shift$$constant); 7758 %} 7759 ins_pipe( pipe_slow ); 7760 %} 7761 7762 instruct vsra2I_reg(vecD dst, vecD src, vecS shift) %{ 7763 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7764 match(Set dst (RShiftVI src shift)); 7765 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} 7766 ins_encode %{ 7767 int vector_len = 0; 7768 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7769 %} 7770 ins_pipe( pipe_slow ); 7771 %} 7772 7773 instruct vsra2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 7774 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7775 match(Set dst (RShiftVI src shift)); 7776 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} 7777 ins_encode %{ 7778 int vector_len = 0; 7779 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7780 %} 7781 ins_pipe( pipe_slow ); 7782 %} 7783 7784 instruct vsra4I(vecX dst, vecS shift) %{ 7785 predicate(n->as_Vector()->length() == 4); 7786 match(Set dst (RShiftVI dst shift)); 7787 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} 7788 ins_encode %{ 7789 __ psrad($dst$$XMMRegister, $shift$$XMMRegister); 7790 %} 7791 ins_pipe( pipe_slow ); 7792 %} 7793 7794 instruct vsra4I_imm(vecX dst, immI8 shift) %{ 7795 predicate(n->as_Vector()->length() == 4); 7796 match(Set dst (RShiftVI dst shift)); 7797 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} 7798 ins_encode %{ 7799 __ psrad($dst$$XMMRegister, (int)$shift$$constant); 7800 %} 7801 ins_pipe( pipe_slow ); 7802 %} 7803 7804 instruct vsra4I_reg(vecX dst, vecX src, vecS shift) %{ 7805 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7806 match(Set dst (RShiftVI src shift)); 7807 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} 7808 ins_encode %{ 7809 int vector_len = 0; 7810 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7811 %} 7812 ins_pipe( pipe_slow ); 7813 %} 7814 7815 instruct vsra4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 7816 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7817 match(Set dst (RShiftVI src shift)); 7818 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} 7819 ins_encode %{ 7820 int vector_len = 0; 7821 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7822 %} 7823 ins_pipe( pipe_slow ); 7824 %} 7825 7826 instruct vsra8I_reg(vecY dst, vecY src, vecS shift) %{ 7827 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 7828 match(Set dst (RShiftVI src shift)); 7829 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} 7830 ins_encode %{ 7831 int vector_len = 1; 7832 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7833 %} 7834 ins_pipe( pipe_slow ); 7835 %} 7836 7837 instruct vsra8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 7838 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 7839 match(Set dst (RShiftVI src shift)); 7840 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} 7841 ins_encode %{ 7842 int vector_len = 1; 7843 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7844 %} 7845 ins_pipe( pipe_slow ); 7846 %} 7847 7848 instruct vsra16I_reg(vecZ dst, vecZ src, vecS shift) %{ 7849 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7850 match(Set dst (RShiftVI src shift)); 7851 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed16I" %} 7852 ins_encode %{ 7853 int vector_len = 2; 7854 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7855 %} 7856 ins_pipe( pipe_slow ); 7857 %} 7858 7859 instruct vsra16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 7860 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7861 match(Set dst (RShiftVI src shift)); 7862 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed16I" %} 7863 ins_encode %{ 7864 int vector_len = 2; 7865 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7866 %} 7867 ins_pipe( pipe_slow ); 7868 %} 7869 7870 // There are no longs vector arithmetic right shift instructions. 7871 7872 7873 // --------------------------------- AND -------------------------------------- 7874 7875 instruct vand4B(vecS dst, vecS src) %{ 7876 predicate(n->as_Vector()->length_in_bytes() == 4); 7877 match(Set dst (AndV dst src)); 7878 format %{ "pand $dst,$src\t! and vectors (4 bytes)" %} 7879 ins_encode %{ 7880 __ pand($dst$$XMMRegister, $src$$XMMRegister); 7881 %} 7882 ins_pipe( pipe_slow ); 7883 %} 7884 7885 instruct vand4B_reg(vecS dst, vecS src1, vecS src2) %{ 7886 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 7887 match(Set dst (AndV src1 src2)); 7888 format %{ "vpand $dst,$src1,$src2\t! and vectors (4 bytes)" %} 7889 ins_encode %{ 7890 int vector_len = 0; 7891 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7892 %} 7893 ins_pipe( pipe_slow ); 7894 %} 7895 7896 instruct vand8B(vecD dst, vecD src) %{ 7897 predicate(n->as_Vector()->length_in_bytes() == 8); 7898 match(Set dst (AndV dst src)); 7899 format %{ "pand $dst,$src\t! and vectors (8 bytes)" %} 7900 ins_encode %{ 7901 __ pand($dst$$XMMRegister, $src$$XMMRegister); 7902 %} 7903 ins_pipe( pipe_slow ); 7904 %} 7905 7906 instruct vand8B_reg(vecD dst, vecD src1, vecD src2) %{ 7907 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 7908 match(Set dst (AndV src1 src2)); 7909 format %{ "vpand $dst,$src1,$src2\t! and vectors (8 bytes)" %} 7910 ins_encode %{ 7911 int vector_len = 0; 7912 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7913 %} 7914 ins_pipe( pipe_slow ); 7915 %} 7916 7917 instruct vand16B(vecX dst, vecX src) %{ 7918 predicate(n->as_Vector()->length_in_bytes() == 16); 7919 match(Set dst (AndV dst src)); 7920 format %{ "pand $dst,$src\t! and vectors (16 bytes)" %} 7921 ins_encode %{ 7922 __ pand($dst$$XMMRegister, $src$$XMMRegister); 7923 %} 7924 ins_pipe( pipe_slow ); 7925 %} 7926 7927 instruct vand16B_reg(vecX dst, vecX src1, vecX src2) %{ 7928 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 7929 match(Set dst (AndV src1 src2)); 7930 format %{ "vpand $dst,$src1,$src2\t! and vectors (16 bytes)" %} 7931 ins_encode %{ 7932 int vector_len = 0; 7933 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7934 %} 7935 ins_pipe( pipe_slow ); 7936 %} 7937 7938 instruct vand16B_mem(vecX dst, vecX src, memory mem) %{ 7939 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 7940 match(Set dst (AndV src (LoadVector mem))); 7941 format %{ "vpand $dst,$src,$mem\t! and vectors (16 bytes)" %} 7942 ins_encode %{ 7943 int vector_len = 0; 7944 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7945 %} 7946 ins_pipe( pipe_slow ); 7947 %} 7948 7949 instruct vand32B_reg(vecY dst, vecY src1, vecY src2) %{ 7950 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 7951 match(Set dst (AndV src1 src2)); 7952 format %{ "vpand $dst,$src1,$src2\t! and vectors (32 bytes)" %} 7953 ins_encode %{ 7954 int vector_len = 1; 7955 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7956 %} 7957 ins_pipe( pipe_slow ); 7958 %} 7959 7960 instruct vand32B_mem(vecY dst, vecY src, memory mem) %{ 7961 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 7962 match(Set dst (AndV src (LoadVector mem))); 7963 format %{ "vpand $dst,$src,$mem\t! and vectors (32 bytes)" %} 7964 ins_encode %{ 7965 int vector_len = 1; 7966 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7967 %} 7968 ins_pipe( pipe_slow ); 7969 %} 7970 7971 instruct vand64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7972 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 7973 match(Set dst (AndV src1 src2)); 7974 format %{ "vpand $dst,$src1,$src2\t! and vectors (64 bytes)" %} 7975 ins_encode %{ 7976 int vector_len = 2; 7977 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7978 %} 7979 ins_pipe( pipe_slow ); 7980 %} 7981 7982 instruct vand64B_mem(vecZ dst, vecZ src, memory mem) %{ 7983 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 7984 match(Set dst (AndV src (LoadVector mem))); 7985 format %{ "vpand $dst,$src,$mem\t! and vectors (64 bytes)" %} 7986 ins_encode %{ 7987 int vector_len = 2; 7988 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7989 %} 7990 ins_pipe( pipe_slow ); 7991 %} 7992 7993 // --------------------------------- OR --------------------------------------- 7994 7995 instruct vor4B(vecS dst, vecS src) %{ 7996 predicate(n->as_Vector()->length_in_bytes() == 4); 7997 match(Set dst (OrV dst src)); 7998 format %{ "por $dst,$src\t! or vectors (4 bytes)" %} 7999 ins_encode %{ 8000 __ por($dst$$XMMRegister, $src$$XMMRegister); 8001 %} 8002 ins_pipe( pipe_slow ); 8003 %} 8004 8005 instruct vor4B_reg(vecS dst, vecS src1, vecS src2) %{ 8006 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 8007 match(Set dst (OrV src1 src2)); 8008 format %{ "vpor $dst,$src1,$src2\t! or vectors (4 bytes)" %} 8009 ins_encode %{ 8010 int vector_len = 0; 8011 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8012 %} 8013 ins_pipe( pipe_slow ); 8014 %} 8015 8016 instruct vor8B(vecD dst, vecD src) %{ 8017 predicate(n->as_Vector()->length_in_bytes() == 8); 8018 match(Set dst (OrV dst src)); 8019 format %{ "por $dst,$src\t! or vectors (8 bytes)" %} 8020 ins_encode %{ 8021 __ por($dst$$XMMRegister, $src$$XMMRegister); 8022 %} 8023 ins_pipe( pipe_slow ); 8024 %} 8025 8026 instruct vor8B_reg(vecD dst, vecD src1, vecD src2) %{ 8027 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 8028 match(Set dst (OrV src1 src2)); 8029 format %{ "vpor $dst,$src1,$src2\t! or vectors (8 bytes)" %} 8030 ins_encode %{ 8031 int vector_len = 0; 8032 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8033 %} 8034 ins_pipe( pipe_slow ); 8035 %} 8036 8037 instruct vor16B(vecX dst, vecX src) %{ 8038 predicate(n->as_Vector()->length_in_bytes() == 16); 8039 match(Set dst (OrV dst src)); 8040 format %{ "por $dst,$src\t! or vectors (16 bytes)" %} 8041 ins_encode %{ 8042 __ por($dst$$XMMRegister, $src$$XMMRegister); 8043 %} 8044 ins_pipe( pipe_slow ); 8045 %} 8046 8047 instruct vor16B_reg(vecX dst, vecX src1, vecX src2) %{ 8048 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 8049 match(Set dst (OrV src1 src2)); 8050 format %{ "vpor $dst,$src1,$src2\t! or vectors (16 bytes)" %} 8051 ins_encode %{ 8052 int vector_len = 0; 8053 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8054 %} 8055 ins_pipe( pipe_slow ); 8056 %} 8057 8058 instruct vor16B_mem(vecX dst, vecX src, memory mem) %{ 8059 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 8060 match(Set dst (OrV src (LoadVector mem))); 8061 format %{ "vpor $dst,$src,$mem\t! or vectors (16 bytes)" %} 8062 ins_encode %{ 8063 int vector_len = 0; 8064 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8065 %} 8066 ins_pipe( pipe_slow ); 8067 %} 8068 8069 instruct vor32B_reg(vecY dst, vecY src1, vecY src2) %{ 8070 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 8071 match(Set dst (OrV src1 src2)); 8072 format %{ "vpor $dst,$src1,$src2\t! or vectors (32 bytes)" %} 8073 ins_encode %{ 8074 int vector_len = 1; 8075 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8076 %} 8077 ins_pipe( pipe_slow ); 8078 %} 8079 8080 instruct vor32B_mem(vecY dst, vecY src, memory mem) %{ 8081 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 8082 match(Set dst (OrV src (LoadVector mem))); 8083 format %{ "vpor $dst,$src,$mem\t! or vectors (32 bytes)" %} 8084 ins_encode %{ 8085 int vector_len = 1; 8086 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8087 %} 8088 ins_pipe( pipe_slow ); 8089 %} 8090 8091 instruct vor64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8092 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 8093 match(Set dst (OrV src1 src2)); 8094 format %{ "vpor $dst,$src1,$src2\t! or vectors (64 bytes)" %} 8095 ins_encode %{ 8096 int vector_len = 2; 8097 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8098 %} 8099 ins_pipe( pipe_slow ); 8100 %} 8101 8102 instruct vor64B_mem(vecZ dst, vecZ src, memory mem) %{ 8103 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 8104 match(Set dst (OrV src (LoadVector mem))); 8105 format %{ "vpor $dst,$src,$mem\t! or vectors (64 bytes)" %} 8106 ins_encode %{ 8107 int vector_len = 2; 8108 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8109 %} 8110 ins_pipe( pipe_slow ); 8111 %} 8112 8113 // --------------------------------- XOR -------------------------------------- 8114 8115 instruct vxor4B(vecS dst, vecS src) %{ 8116 predicate(n->as_Vector()->length_in_bytes() == 4); 8117 match(Set dst (XorV dst src)); 8118 format %{ "pxor $dst,$src\t! xor vectors (4 bytes)" %} 8119 ins_encode %{ 8120 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 8121 %} 8122 ins_pipe( pipe_slow ); 8123 %} 8124 8125 instruct vxor4B_reg(vecS dst, vecS src1, vecS src2) %{ 8126 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 8127 match(Set dst (XorV src1 src2)); 8128 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (4 bytes)" %} 8129 ins_encode %{ 8130 int vector_len = 0; 8131 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8132 %} 8133 ins_pipe( pipe_slow ); 8134 %} 8135 8136 instruct vxor8B(vecD dst, vecD src) %{ 8137 predicate(n->as_Vector()->length_in_bytes() == 8); 8138 match(Set dst (XorV dst src)); 8139 format %{ "pxor $dst,$src\t! xor vectors (8 bytes)" %} 8140 ins_encode %{ 8141 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 8142 %} 8143 ins_pipe( pipe_slow ); 8144 %} 8145 8146 instruct vxor8B_reg(vecD dst, vecD src1, vecD src2) %{ 8147 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 8148 match(Set dst (XorV src1 src2)); 8149 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (8 bytes)" %} 8150 ins_encode %{ 8151 int vector_len = 0; 8152 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8153 %} 8154 ins_pipe( pipe_slow ); 8155 %} 8156 8157 instruct vxor16B(vecX dst, vecX src) %{ 8158 predicate(n->as_Vector()->length_in_bytes() == 16); 8159 match(Set dst (XorV dst src)); 8160 format %{ "pxor $dst,$src\t! xor vectors (16 bytes)" %} 8161 ins_encode %{ 8162 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 8163 %} 8164 ins_pipe( pipe_slow ); 8165 %} 8166 8167 instruct vxor16B_reg(vecX dst, vecX src1, vecX src2) %{ 8168 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 8169 match(Set dst (XorV src1 src2)); 8170 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (16 bytes)" %} 8171 ins_encode %{ 8172 int vector_len = 0; 8173 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8174 %} 8175 ins_pipe( pipe_slow ); 8176 %} 8177 8178 instruct vxor16B_mem(vecX dst, vecX src, memory mem) %{ 8179 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 8180 match(Set dst (XorV src (LoadVector mem))); 8181 format %{ "vpxor $dst,$src,$mem\t! xor vectors (16 bytes)" %} 8182 ins_encode %{ 8183 int vector_len = 0; 8184 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8185 %} 8186 ins_pipe( pipe_slow ); 8187 %} 8188 8189 instruct vxor32B_reg(vecY dst, vecY src1, vecY src2) %{ 8190 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 8191 match(Set dst (XorV src1 src2)); 8192 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (32 bytes)" %} 8193 ins_encode %{ 8194 int vector_len = 1; 8195 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8196 %} 8197 ins_pipe( pipe_slow ); 8198 %} 8199 8200 instruct vxor32B_mem(vecY dst, vecY src, memory mem) %{ 8201 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 8202 match(Set dst (XorV src (LoadVector mem))); 8203 format %{ "vpxor $dst,$src,$mem\t! xor vectors (32 bytes)" %} 8204 ins_encode %{ 8205 int vector_len = 1; 8206 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8207 %} 8208 ins_pipe( pipe_slow ); 8209 %} 8210 8211 instruct vxor64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8212 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 8213 match(Set dst (XorV src1 src2)); 8214 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (64 bytes)" %} 8215 ins_encode %{ 8216 int vector_len = 2; 8217 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8218 %} 8219 ins_pipe( pipe_slow ); 8220 %} 8221 8222 instruct vxor64B_mem(vecZ dst, vecZ src, memory mem) %{ 8223 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 8224 match(Set dst (XorV src (LoadVector mem))); 8225 format %{ "vpxor $dst,$src,$mem\t! xor vectors (64 bytes)" %} 8226 ins_encode %{ 8227 int vector_len = 2; 8228 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8229 %} 8230 ins_pipe( pipe_slow ); 8231 %} 8232