1 // 2 // Copyright (c) 2011, 2014, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Common Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // archtecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // XMM registers. 512-bit registers or 8 words each, labeled (a)-p. 63 // Word a in each register holds a Float, words ab hold a Double. 64 // The whole registers are used in SSE4.2 version intrinsics, 65 // array copy stubs and superword operations (see UseSSE42Intrinsics, 66 // UseXMMForArrayCopy and UseSuperword flags). 67 // For pre EVEX enabled architectures: 68 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX) 69 // For EVEX enabled architectures: 70 // XMM8-XMM31 must be encoded with REX (EVEX for UseAVX). 71 // 72 // Linux ABI: No register preserved across function calls 73 // XMM0-XMM7 might hold parameters 74 // Windows ABI: XMM6-XMM31 preserved across function calls 75 // XMM0-XMM3 might hold parameters 76 77 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()); 78 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1)); 79 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2)); 80 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3)); 81 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4)); 82 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5)); 83 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6)); 84 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7)); 85 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8)); 86 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9)); 87 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10)); 88 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11)); 89 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12)); 90 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13)); 91 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14)); 92 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15)); 93 94 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()); 95 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1)); 96 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2)); 97 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3)); 98 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4)); 99 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5)); 100 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6)); 101 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7)); 102 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8)); 103 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9)); 104 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10)); 105 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11)); 106 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12)); 107 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13)); 108 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14)); 109 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15)); 110 111 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()); 112 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1)); 113 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2)); 114 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3)); 115 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4)); 116 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5)); 117 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6)); 118 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7)); 119 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8)); 120 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9)); 121 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10)); 122 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11)); 123 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12)); 124 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13)); 125 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14)); 126 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15)); 127 128 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()); 129 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1)); 130 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2)); 131 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3)); 132 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4)); 133 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5)); 134 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6)); 135 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7)); 136 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8)); 137 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9)); 138 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10)); 139 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11)); 140 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12)); 141 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13)); 142 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14)); 143 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15)); 144 145 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()); 146 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1)); 147 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2)); 148 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3)); 149 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4)); 150 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5)); 151 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6)); 152 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7)); 153 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8)); 154 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9)); 155 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10)); 156 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11)); 157 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12)); 158 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13)); 159 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14)); 160 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15)); 161 162 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()); 163 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1)); 164 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2)); 165 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3)); 166 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4)); 167 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5)); 168 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6)); 169 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7)); 170 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8)); 171 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9)); 172 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10)); 173 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11)); 174 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12)); 175 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13)); 176 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14)); 177 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15)); 178 179 #ifdef _WIN64 180 181 reg_def XMM6 ( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()); 182 reg_def XMM6b( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(1)); 183 reg_def XMM6c( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(2)); 184 reg_def XMM6d( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(3)); 185 reg_def XMM6e( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(4)); 186 reg_def XMM6f( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(5)); 187 reg_def XMM6g( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(6)); 188 reg_def XMM6h( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(7)); 189 reg_def XMM6i( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(8)); 190 reg_def XMM6j( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(9)); 191 reg_def XMM6k( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(10)); 192 reg_def XMM6l( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(11)); 193 reg_def XMM6m( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(12)); 194 reg_def XMM6n( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(13)); 195 reg_def XMM6o( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(14)); 196 reg_def XMM6p( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(15)); 197 198 reg_def XMM7 ( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()); 199 reg_def XMM7b( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(1)); 200 reg_def XMM7c( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(2)); 201 reg_def XMM7d( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(3)); 202 reg_def XMM7e( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(4)); 203 reg_def XMM7f( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(5)); 204 reg_def XMM7g( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(6)); 205 reg_def XMM7h( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(7)); 206 reg_def XMM7i( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(8)); 207 reg_def XMM7j( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(9)); 208 reg_def XMM7k( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(10)); 209 reg_def XMM7l( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(11)); 210 reg_def XMM7m( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(12)); 211 reg_def XMM7n( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(13)); 212 reg_def XMM7o( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(14)); 213 reg_def XMM7p( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(15)); 214 215 reg_def XMM8 ( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()); 216 reg_def XMM8b( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(1)); 217 reg_def XMM8c( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(2)); 218 reg_def XMM8d( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(3)); 219 reg_def XMM8e( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(4)); 220 reg_def XMM8f( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(5)); 221 reg_def XMM8g( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(6)); 222 reg_def XMM8h( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(7)); 223 reg_def XMM8i( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(8)); 224 reg_def XMM8j( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(9)); 225 reg_def XMM8k( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(10)); 226 reg_def XMM8l( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(11)); 227 reg_def XMM8m( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(12)); 228 reg_def XMM8n( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(13)); 229 reg_def XMM8o( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(14)); 230 reg_def XMM8p( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(15)); 231 232 reg_def XMM9 ( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()); 233 reg_def XMM9b( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(1)); 234 reg_def XMM9c( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(2)); 235 reg_def XMM9d( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(3)); 236 reg_def XMM9e( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(4)); 237 reg_def XMM9f( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(5)); 238 reg_def XMM9g( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(6)); 239 reg_def XMM9h( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(7)); 240 reg_def XMM9i( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(8)); 241 reg_def XMM9j( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(9)); 242 reg_def XMM9k( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(10)); 243 reg_def XMM9l( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(11)); 244 reg_def XMM9m( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(12)); 245 reg_def XMM9n( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(13)); 246 reg_def XMM9o( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(14)); 247 reg_def XMM9p( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(15)); 248 249 reg_def XMM10 ( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()); 250 reg_def XMM10b( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(1)); 251 reg_def XMM10c( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(2)); 252 reg_def XMM10d( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(3)); 253 reg_def XMM10e( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(4)); 254 reg_def XMM10f( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(5)); 255 reg_def XMM10g( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(6)); 256 reg_def XMM10h( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(7)); 257 reg_def XMM10i( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(8)); 258 reg_def XMM10j( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(9)); 259 reg_def XMM10k( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(10)); 260 reg_def XMM10l( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(11)); 261 reg_def XMM10m( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(12)); 262 reg_def XMM10n( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(13)); 263 reg_def XMM10o( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(14)); 264 reg_def XMM10p( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(15)); 265 266 reg_def XMM11 ( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()); 267 reg_def XMM11b( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(1)); 268 reg_def XMM11c( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(2)); 269 reg_def XMM11d( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(3)); 270 reg_def XMM11e( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(4)); 271 reg_def XMM11f( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(5)); 272 reg_def XMM11g( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(6)); 273 reg_def XMM11h( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(7)); 274 reg_def XMM11i( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(8)); 275 reg_def XMM11j( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(9)); 276 reg_def XMM11k( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(10)); 277 reg_def XMM11l( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(11)); 278 reg_def XMM11m( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(12)); 279 reg_def XMM11n( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(13)); 280 reg_def XMM11o( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(14)); 281 reg_def XMM11p( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(15)); 282 283 reg_def XMM12 ( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()); 284 reg_def XMM12b( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(1)); 285 reg_def XMM12c( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(2)); 286 reg_def XMM12d( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(3)); 287 reg_def XMM12e( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(4)); 288 reg_def XMM12f( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(5)); 289 reg_def XMM12g( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(6)); 290 reg_def XMM12h( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(7)); 291 reg_def XMM12i( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(8)); 292 reg_def XMM12j( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(9)); 293 reg_def XMM12k( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(10)); 294 reg_def XMM12l( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(11)); 295 reg_def XMM12m( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(12)); 296 reg_def XMM12n( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(13)); 297 reg_def XMM12o( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(14)); 298 reg_def XMM12p( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(15)); 299 300 reg_def XMM13 ( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()); 301 reg_def XMM13b( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(1)); 302 reg_def XMM13c( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(2)); 303 reg_def XMM13d( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(3)); 304 reg_def XMM13e( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(4)); 305 reg_def XMM13f( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(5)); 306 reg_def XMM13g( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(6)); 307 reg_def XMM13h( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(7)); 308 reg_def XMM13i( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(8)); 309 reg_def XMM13j( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(9)); 310 reg_def XMM13k( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(10)); 311 reg_def XMM13l( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(11)); 312 reg_def XMM13m( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(12)); 313 reg_def XMM13n( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(13)); 314 reg_def XMM13o( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(14)); 315 reg_def XMM13p( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(15)); 316 317 reg_def XMM14 ( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()); 318 reg_def XMM14b( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(1)); 319 reg_def XMM14c( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(2)); 320 reg_def XMM14d( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(3)); 321 reg_def XMM14e( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(4)); 322 reg_def XMM14f( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(5)); 323 reg_def XMM14g( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(6)); 324 reg_def XMM14h( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(7)); 325 reg_def XMM14i( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(8)); 326 reg_def XMM14j( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(9)); 327 reg_def XMM14k( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(10)); 328 reg_def XMM14l( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(11)); 329 reg_def XMM14m( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(12)); 330 reg_def XMM14n( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(13)); 331 reg_def XMM14o( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(14)); 332 reg_def XMM14p( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(15)); 333 334 reg_def XMM15 ( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()); 335 reg_def XMM15b( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(1)); 336 reg_def XMM15c( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(2)); 337 reg_def XMM15d( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(3)); 338 reg_def XMM15e( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(4)); 339 reg_def XMM15f( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(5)); 340 reg_def XMM15g( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(6)); 341 reg_def XMM15h( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(7)); 342 reg_def XMM15i( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(8)); 343 reg_def XMM15j( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(9)); 344 reg_def XMM15k( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(10)); 345 reg_def XMM15l( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(11)); 346 reg_def XMM15m( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(12)); 347 reg_def XMM15n( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(13)); 348 reg_def XMM15o( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(14)); 349 reg_def XMM15p( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(15)); 350 351 reg_def XMM16 ( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()); 352 reg_def XMM16b( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(1)); 353 reg_def XMM16c( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(2)); 354 reg_def XMM16d( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(3)); 355 reg_def XMM16e( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(4)); 356 reg_def XMM16f( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(5)); 357 reg_def XMM16g( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(6)); 358 reg_def XMM16h( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(7)); 359 reg_def XMM16i( SOC, SOE, Op_RegF, 16, xmm15->as_VMReg()->next(8)); 360 reg_def XMM16j( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(9)); 361 reg_def XMM16k( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(10)); 362 reg_def XMM16l( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(11)); 363 reg_def XMM16m( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(12)); 364 reg_def XMM16n( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(13)); 365 reg_def XMM16o( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(14)); 366 reg_def XMM16p( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(15)); 367 368 reg_def XMM17 ( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()); 369 reg_def XMM17b( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(1)); 370 reg_def XMM17c( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(2)); 371 reg_def XMM17d( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(3)); 372 reg_def XMM17e( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(4)); 373 reg_def XMM17f( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(5)); 374 reg_def XMM17g( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(6)); 375 reg_def XMM17h( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(7)); 376 reg_def XMM17i( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(8)); 377 reg_def XMM17j( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(9)); 378 reg_def XMM17k( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(10)); 379 reg_def XMM17l( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(11)); 380 reg_def XMM17m( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(12)); 381 reg_def XMM17n( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(13)); 382 reg_def XMM17o( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(14)); 383 reg_def XMM17p( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(15)); 384 385 reg_def XMM18 ( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()); 386 reg_def XMM18b( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(1)); 387 reg_def XMM18c( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(2)); 388 reg_def XMM18d( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(3)); 389 reg_def XMM18e( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(4)); 390 reg_def XMM18f( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(5)); 391 reg_def XMM18g( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(6)); 392 reg_def XMM18h( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(7)); 393 reg_def XMM18i( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(8)); 394 reg_def XMM18j( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(9)); 395 reg_def XMM18k( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(10)); 396 reg_def XMM18l( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(11)); 397 reg_def XMM18m( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(12)); 398 reg_def XMM18n( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(13)); 399 reg_def XMM18o( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(14)); 400 reg_def XMM18p( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(15)); 401 402 reg_def XMM19 ( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()); 403 reg_def XMM19b( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(1)); 404 reg_def XMM19c( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(2)); 405 reg_def XMM19d( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(3)); 406 reg_def XMM19e( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(4)); 407 reg_def XMM19f( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(5)); 408 reg_def XMM19g( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(6)); 409 reg_def XMM19h( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(7)); 410 reg_def XMM19i( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(8)); 411 reg_def XMM19j( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(9)); 412 reg_def XMM19k( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(10)); 413 reg_def XMM19l( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(11)); 414 reg_def XMM19m( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(12)); 415 reg_def XMM19n( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(13)); 416 reg_def XMM19o( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(14)); 417 reg_def XMM19p( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(15)); 418 419 reg_def XMM20 ( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()); 420 reg_def XMM20b( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(1)); 421 reg_def XMM20c( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(2)); 422 reg_def XMM20d( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(3)); 423 reg_def XMM20e( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(4)); 424 reg_def XMM20f( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(5)); 425 reg_def XMM20g( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(6)); 426 reg_def XMM20h( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(7)); 427 reg_def XMM20i( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(8)); 428 reg_def XMM20j( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(9)); 429 reg_def XMM20k( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(10)); 430 reg_def XMM20l( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(11)); 431 reg_def XMM20m( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(12)); 432 reg_def XMM20n( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(13)); 433 reg_def XMM20o( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(14)); 434 reg_def XMM20p( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(15)); 435 436 reg_def XMM21 ( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()); 437 reg_def XMM21b( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(1)); 438 reg_def XMM21c( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(2)); 439 reg_def XMM21d( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(3)); 440 reg_def XMM21e( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(4)); 441 reg_def XMM21f( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(5)); 442 reg_def XMM21g( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(6)); 443 reg_def XMM21h( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(7)); 444 reg_def XMM21i( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(8)); 445 reg_def XMM21j( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(9)); 446 reg_def XMM21k( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(10)); 447 reg_def XMM21l( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(11)); 448 reg_def XMM21m( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(12)); 449 reg_def XMM21n( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(13)); 450 reg_def XMM21o( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(14)); 451 reg_def XMM21p( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(15)); 452 453 reg_def XMM22 ( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()); 454 reg_def XMM22b( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(1)); 455 reg_def XMM22c( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(2)); 456 reg_def XMM22d( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(3)); 457 reg_def XMM22e( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(4)); 458 reg_def XMM22f( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(5)); 459 reg_def XMM22g( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(6)); 460 reg_def XMM22h( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(7)); 461 reg_def XMM22i( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(8)); 462 reg_def XMM22j( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(9)); 463 reg_def XMM22k( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(10)); 464 reg_def XMM22l( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(11)); 465 reg_def XMM22m( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(12)); 466 reg_def XMM22n( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(13)); 467 reg_def XMM22o( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(14)); 468 reg_def XMM22p( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(15)); 469 470 reg_def XMM23 ( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()); 471 reg_def XMM23b( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(1)); 472 reg_def XMM23c( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(2)); 473 reg_def XMM23d( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(3)); 474 reg_def XMM23e( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(4)); 475 reg_def XMM23f( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(5)); 476 reg_def XMM23g( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(6)); 477 reg_def XMM23h( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(7)); 478 reg_def XMM23i( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(8)); 479 reg_def XMM23j( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(9)); 480 reg_def XMM23k( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(10)); 481 reg_def XMM23l( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(11)); 482 reg_def XMM23m( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(12)); 483 reg_def XMM23n( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(13)); 484 reg_def XMM23o( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(14)); 485 reg_def XMM23p( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(15)); 486 487 reg_def XMM24 ( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()); 488 reg_def XMM24b( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(1)); 489 reg_def XMM24c( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(2)); 490 reg_def XMM24d( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(3)); 491 reg_def XMM24e( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(4)); 492 reg_def XMM24f( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(5)); 493 reg_def XMM24g( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(6)); 494 reg_def XMM24h( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(7)); 495 reg_def XMM24i( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(8)); 496 reg_def XMM24j( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(9)); 497 reg_def XMM24k( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(10)); 498 reg_def XMM24l( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(11)); 499 reg_def XMM24m( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(12)); 500 reg_def XMM24n( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(13)); 501 reg_def XMM24o( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(14)); 502 reg_def XMM24p( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(15)); 503 504 reg_def XMM25 ( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()); 505 reg_def XMM25b( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(1)); 506 reg_def XMM25c( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(2)); 507 reg_def XMM25d( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(3)); 508 reg_def XMM25e( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(4)); 509 reg_def XMM25f( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(5)); 510 reg_def XMM25g( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(6)); 511 reg_def XMM25h( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(7)); 512 reg_def XMM25i( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(8)); 513 reg_def XMM25j( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(9)); 514 reg_def XMM25k( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(10)); 515 reg_def XMM25l( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(11)); 516 reg_def XMM25m( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(12)); 517 reg_def XMM25n( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(13)); 518 reg_def XMM25o( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(14)); 519 reg_def XMM25p( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(15)); 520 521 reg_def XMM26 ( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()); 522 reg_def XMM26b( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(1)); 523 reg_def XMM26c( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(2)); 524 reg_def XMM26d( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(3)); 525 reg_def XMM26e( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(4)); 526 reg_def XMM26f( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(5)); 527 reg_def XMM26g( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(6)); 528 reg_def XMM26h( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(7)); 529 reg_def XMM26i( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(8)); 530 reg_def XMM26j( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(9)); 531 reg_def XMM26k( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(10)); 532 reg_def XMM26l( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(11)); 533 reg_def XMM26m( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(12)); 534 reg_def XMM26n( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(13)); 535 reg_def XMM26o( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(14)); 536 reg_def XMM26p( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(15)); 537 538 reg_def XMM27g( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(1)); 539 reg_def XMM27c( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(2)); 540 reg_def XMM27d( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(3)); 541 reg_def XMM27e( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(4)); 542 reg_def XMM27f( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(5)); 543 reg_def XMM27g( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(6)); 544 reg_def XMM27h( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(7)); 545 reg_def XMM27i( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(8)); 546 reg_def XMM27j( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(9)); 547 reg_def XMM27k( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(10)); 548 reg_def XMM27l( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(11)); 549 reg_def XMM27m( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(12)); 550 reg_def XMM27n( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(13)); 551 reg_def XMM27o( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(14)); 552 reg_def XMM27p( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(15)); 553 554 reg_def XMM28 ( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()); 555 reg_def XMM28b( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(1)); 556 reg_def XMM28c( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(2)); 557 reg_def XMM28d( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(3)); 558 reg_def XMM28e( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(4)); 559 reg_def XMM28f( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(5)); 560 reg_def XMM28g( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(6)); 561 reg_def XMM28h( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(7)); 562 reg_def XMM28i( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(8)); 563 reg_def XMM28j( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(9)); 564 reg_def XMM28k( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(10)); 565 reg_def XMM28l( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(11)); 566 reg_def XMM28m( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(12)); 567 reg_def XMM28n( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(13)); 568 reg_def XMM28o( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(14)); 569 reg_def XMM28p( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(15)); 570 571 reg_def XMM29 ( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()); 572 reg_def XMM29b( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(1)); 573 reg_def XMM29c( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(2)); 574 reg_def XMM29d( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(3)); 575 reg_def XMM29e( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(4)); 576 reg_def XMM29f( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(5)); 577 reg_def XMM29g( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(6)); 578 reg_def XMM29h( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(7)); 579 reg_def XMM29i( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(8)); 580 reg_def XMM29j( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(9)); 581 reg_def XMM29k( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(10)); 582 reg_def XMM29l( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(11)); 583 reg_def XMM29m( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(12)); 584 reg_def XMM29n( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(13)); 585 reg_def XMM29o( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(14)); 586 reg_def XMM29p( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(15)); 587 588 reg_def XMM30 ( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()); 589 reg_def XMM30b( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(1)); 590 reg_def XMM30c( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(2)); 591 reg_def XMM30d( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(3)); 592 reg_def XMM30e( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(4)); 593 reg_def XMM30f( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(5)); 594 reg_def XMM30g( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(6)); 595 reg_def XMM30h( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(7)); 596 reg_def XMM30i( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(8)); 597 reg_def XMM30j( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(9)); 598 reg_def XMM30k( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(10)); 599 reg_def XMM30l( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(11)); 600 reg_def XMM30m( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(12)); 601 reg_def XMM30n( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(13)); 602 reg_def XMM30o( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(14)); 603 reg_def XMM30p( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(15)); 604 605 reg_def XMM31 ( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()); 606 reg_def XMM31b( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(1)); 607 reg_def XMM31c( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(2)); 608 reg_def XMM31d( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(3)); 609 reg_def XMM31e( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(4)); 610 reg_def XMM31f( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(5)); 611 reg_def XMM31g( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(6)); 612 reg_def XMM31h( SOC, SOE, Op_RegF, 31, xmm31>-as_VMReg()->next(7)); 613 reg_def XMM31i( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(8)); 614 reg_def XMM31j( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(9)); 615 reg_def XMM31k( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(10)); 616 reg_def XMM31l( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(11)); 617 reg_def XMM31m( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(12)); 618 reg_def XMM31n( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(13)); 619 reg_def XMM31o( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(14)); 620 reg_def XMM31p( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(15)); 621 622 #else // _WIN64 623 624 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()); 625 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1)); 626 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2)); 627 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3)); 628 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4)); 629 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5)); 630 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6)); 631 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7)); 632 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8)); 633 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9)); 634 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10)); 635 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11)); 636 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12)); 637 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13)); 638 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14)); 639 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15)); 640 641 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()); 642 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1)); 643 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2)); 644 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3)); 645 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4)); 646 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5)); 647 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6)); 648 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7)); 649 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8)); 650 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9)); 651 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10)); 652 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11)); 653 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12)); 654 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13)); 655 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14)); 656 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15)); 657 658 #ifdef _LP64 659 660 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()); 661 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1)); 662 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2)); 663 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3)); 664 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4)); 665 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5)); 666 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6)); 667 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7)); 668 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8)); 669 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9)); 670 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10)); 671 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11)); 672 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12)); 673 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13)); 674 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14)); 675 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15)); 676 677 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()); 678 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1)); 679 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2)); 680 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3)); 681 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4)); 682 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5)); 683 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6)); 684 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7)); 685 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8)); 686 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9)); 687 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10)); 688 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11)); 689 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12)); 690 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13)); 691 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14)); 692 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15)); 693 694 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()); 695 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1)); 696 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2)); 697 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3)); 698 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4)); 699 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5)); 700 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6)); 701 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7)); 702 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8)); 703 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9)); 704 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10)); 705 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11)); 706 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12)); 707 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13)); 708 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14)); 709 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15)); 710 711 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()); 712 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1)); 713 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2)); 714 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3)); 715 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4)); 716 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5)); 717 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6)); 718 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7)); 719 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8)); 720 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9)); 721 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10)); 722 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11)); 723 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12)); 724 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13)); 725 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14)); 726 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15)); 727 728 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()); 729 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1)); 730 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2)); 731 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3)); 732 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4)); 733 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5)); 734 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6)); 735 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7)); 736 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8)); 737 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9)); 738 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10)); 739 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11)); 740 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12)); 741 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13)); 742 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14)); 743 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15)); 744 745 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()); 746 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1)); 747 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2)); 748 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3)); 749 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4)); 750 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5)); 751 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6)); 752 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7)); 753 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8)); 754 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9)); 755 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10)); 756 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11)); 757 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12)); 758 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13)); 759 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14)); 760 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15)); 761 762 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()); 763 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1)); 764 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2)); 765 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3)); 766 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4)); 767 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5)); 768 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6)); 769 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7)); 770 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8)); 771 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9)); 772 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10)); 773 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11)); 774 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12)); 775 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13)); 776 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14)); 777 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15)); 778 779 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()); 780 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1)); 781 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2)); 782 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3)); 783 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4)); 784 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5)); 785 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6)); 786 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7)); 787 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8)); 788 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9)); 789 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10)); 790 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11)); 791 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12)); 792 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13)); 793 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14)); 794 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15)); 795 796 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()); 797 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1)); 798 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2)); 799 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3)); 800 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4)); 801 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5)); 802 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6)); 803 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7)); 804 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8)); 805 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9)); 806 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10)); 807 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11)); 808 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12)); 809 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13)); 810 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14)); 811 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15)); 812 813 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()); 814 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1)); 815 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2)); 816 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3)); 817 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4)); 818 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5)); 819 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6)); 820 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7)); 821 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8)); 822 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9)); 823 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10)); 824 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11)); 825 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12)); 826 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13)); 827 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14)); 828 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15)); 829 830 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()); 831 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1)); 832 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2)); 833 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3)); 834 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4)); 835 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5)); 836 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6)); 837 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7)); 838 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8)); 839 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9)); 840 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10)); 841 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11)); 842 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12)); 843 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13)); 844 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14)); 845 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15)); 846 847 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()); 848 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1)); 849 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2)); 850 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3)); 851 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4)); 852 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5)); 853 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6)); 854 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7)); 855 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8)); 856 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9)); 857 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10)); 858 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11)); 859 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12)); 860 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13)); 861 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14)); 862 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15)); 863 864 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()); 865 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1)); 866 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2)); 867 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3)); 868 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4)); 869 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5)); 870 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6)); 871 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7)); 872 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8)); 873 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9)); 874 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10)); 875 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11)); 876 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12)); 877 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13)); 878 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14)); 879 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15)); 880 881 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()); 882 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1)); 883 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2)); 884 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3)); 885 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4)); 886 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5)); 887 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6)); 888 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7)); 889 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8)); 890 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9)); 891 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10)); 892 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11)); 893 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12)); 894 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13)); 895 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14)); 896 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15)); 897 898 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()); 899 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1)); 900 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2)); 901 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3)); 902 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4)); 903 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5)); 904 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6)); 905 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7)); 906 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8)); 907 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9)); 908 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10)); 909 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11)); 910 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12)); 911 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13)); 912 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14)); 913 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15)); 914 915 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()); 916 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1)); 917 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2)); 918 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3)); 919 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4)); 920 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5)); 921 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6)); 922 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7)); 923 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8)); 924 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9)); 925 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10)); 926 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11)); 927 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12)); 928 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13)); 929 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14)); 930 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15)); 931 932 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()); 933 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1)); 934 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2)); 935 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3)); 936 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4)); 937 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5)); 938 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6)); 939 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7)); 940 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8)); 941 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9)); 942 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10)); 943 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11)); 944 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12)); 945 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13)); 946 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14)); 947 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15)); 948 949 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()); 950 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1)); 951 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2)); 952 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3)); 953 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4)); 954 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5)); 955 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6)); 956 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7)); 957 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8)); 958 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9)); 959 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10)); 960 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11)); 961 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12)); 962 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13)); 963 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14)); 964 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15)); 965 966 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()); 967 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1)); 968 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2)); 969 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3)); 970 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4)); 971 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5)); 972 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6)); 973 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7)); 974 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8)); 975 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9)); 976 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10)); 977 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11)); 978 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12)); 979 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13)); 980 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14)); 981 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15)); 982 983 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()); 984 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1)); 985 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2)); 986 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3)); 987 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4)); 988 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5)); 989 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6)); 990 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7)); 991 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8)); 992 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9)); 993 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10)); 994 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11)); 995 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12)); 996 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13)); 997 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14)); 998 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15)); 999 1000 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()); 1001 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1)); 1002 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2)); 1003 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3)); 1004 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4)); 1005 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5)); 1006 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6)); 1007 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7)); 1008 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8)); 1009 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9)); 1010 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10)); 1011 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11)); 1012 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12)); 1013 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13)); 1014 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14)); 1015 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15)); 1016 1017 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()); 1018 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1)); 1019 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2)); 1020 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3)); 1021 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4)); 1022 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5)); 1023 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6)); 1024 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7)); 1025 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8)); 1026 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9)); 1027 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10)); 1028 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11)); 1029 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12)); 1030 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13)); 1031 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14)); 1032 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15)); 1033 1034 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()); 1035 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1)); 1036 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2)); 1037 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3)); 1038 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4)); 1039 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5)); 1040 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6)); 1041 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7)); 1042 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8)); 1043 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9)); 1044 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10)); 1045 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11)); 1046 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12)); 1047 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13)); 1048 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14)); 1049 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15)); 1050 1051 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()); 1052 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1)); 1053 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2)); 1054 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3)); 1055 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4)); 1056 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5)); 1057 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6)); 1058 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7)); 1059 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8)); 1060 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9)); 1061 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10)); 1062 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11)); 1063 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12)); 1064 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13)); 1065 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14)); 1066 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15)); 1067 1068 #endif // _LP64 1069 1070 #endif // _WIN64 1071 1072 #ifdef _LP64 1073 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad()); 1074 #else 1075 reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad()); 1076 #endif // _LP64 1077 1078 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1079 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1080 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1081 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1082 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1083 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1084 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1085 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1086 #ifdef _LP64 1087 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1088 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1089 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1090 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1091 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1092 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1093 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1094 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1095 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 1096 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 1097 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 1098 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 1099 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 1100 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 1101 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 1102 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 1103 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 1104 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 1105 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 1106 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 1107 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 1108 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 1109 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 1110 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 1111 #endif 1112 ); 1113 1114 // flags allocation class should be last. 1115 alloc_class chunk2(RFLAGS); 1116 1117 // Singleton class for condition codes 1118 reg_class int_flags(RFLAGS); 1119 1120 // Class for pre evex float registers 1121 reg_class float_reg_legacy(XMM0, 1122 XMM1, 1123 XMM2, 1124 XMM3, 1125 XMM4, 1126 XMM5, 1127 XMM6, 1128 XMM7 1129 #ifdef _LP64 1130 ,XMM8, 1131 XMM9, 1132 XMM10, 1133 XMM11, 1134 XMM12, 1135 XMM13, 1136 XMM14, 1137 XMM15 1138 #endif 1139 ); 1140 1141 // Class for evex float registers 1142 reg_class float_reg_evex(XMM0, 1143 XMM1, 1144 XMM2, 1145 XMM3, 1146 XMM4, 1147 XMM5, 1148 XMM6, 1149 XMM7 1150 #ifdef _LP64 1151 ,XMM8, 1152 XMM9, 1153 XMM10, 1154 XMM11, 1155 XMM12, 1156 XMM13, 1157 XMM14, 1158 XMM15, 1159 XMM16, 1160 XMM17, 1161 XMM18, 1162 XMM19, 1163 XMM20, 1164 XMM21, 1165 XMM22, 1166 XMM23, 1167 XMM24, 1168 XMM25, 1169 XMM26, 1170 XMM27, 1171 XMM28, 1172 XMM29, 1173 XMM30, 1174 XMM31 1175 #endif 1176 ); 1177 1178 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} ); 1179 1180 // Class for pre evex double registers 1181 reg_class double_reg_legacy(XMM0, XMM0b, 1182 XMM1, XMM1b, 1183 XMM2, XMM2b, 1184 XMM3, XMM3b, 1185 XMM4, XMM4b, 1186 XMM5, XMM5b, 1187 XMM6, XMM6b, 1188 XMM7, XMM7b 1189 #ifdef _LP64 1190 ,XMM8, XMM8b, 1191 XMM9, XMM9b, 1192 XMM10, XMM10b, 1193 XMM11, XMM11b, 1194 XMM12, XMM12b, 1195 XMM13, XMM13b, 1196 XMM14, XMM14b, 1197 XMM15, XMM15b 1198 #endif 1199 ); 1200 1201 // Class for evex double registers 1202 reg_class double_reg_evex(XMM0, XMM0b, 1203 XMM1, XMM1b, 1204 XMM2, XMM2b, 1205 XMM3, XMM3b, 1206 XMM4, XMM4b, 1207 XMM5, XMM5b, 1208 XMM6, XMM6b, 1209 XMM7, XMM7b 1210 #ifdef _LP64 1211 ,XMM8, XMM8b, 1212 XMM9, XMM9b, 1213 XMM10, XMM10b, 1214 XMM11, XMM11b, 1215 XMM12, XMM12b, 1216 XMM13, XMM13b, 1217 XMM14, XMM14b, 1218 XMM15, XMM15b, 1219 XMM16, XMM16b, 1220 XMM17, XMM17b, 1221 XMM18, XMM18b, 1222 XMM19, XMM19b, 1223 XMM20, XMM20b, 1224 XMM21, XMM21b, 1225 XMM22, XMM22b, 1226 XMM23, XMM23b, 1227 XMM24, XMM24b, 1228 XMM25, XMM25b, 1229 XMM26, XMM26b, 1230 XMM27, XMM27b, 1231 XMM28, XMM28b, 1232 XMM29, XMM29b, 1233 XMM30, XMM30b, 1234 XMM31, XMM31b 1235 #endif 1236 ); 1237 1238 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} ); 1239 1240 // Class for pre evex 32bit vector registers 1241 reg_class vectors_reg_legacy(XMM0, 1242 XMM1, 1243 XMM2, 1244 XMM3, 1245 XMM4, 1246 XMM5, 1247 XMM6, 1248 XMM7 1249 #ifdef _LP64 1250 ,XMM8, 1251 XMM9, 1252 XMM10, 1253 XMM11, 1254 XMM12, 1255 XMM13, 1256 XMM14, 1257 XMM15 1258 #endif 1259 ); 1260 1261 // Class for evex 32bit vector registers 1262 reg_class vectors_reg_evex(XMM0, 1263 XMM1, 1264 XMM2, 1265 XMM3, 1266 XMM4, 1267 XMM5, 1268 XMM6, 1269 XMM7 1270 #ifdef _LP64 1271 ,XMM8, 1272 XMM9, 1273 XMM10, 1274 XMM11, 1275 XMM12, 1276 XMM13, 1277 XMM14, 1278 XMM15, 1279 XMM16, 1280 XMM17, 1281 XMM18, 1282 XMM19, 1283 XMM20, 1284 XMM21, 1285 XMM22, 1286 XMM23, 1287 XMM24, 1288 XMM25, 1289 XMM26, 1290 XMM27, 1291 XMM28, 1292 XMM29, 1293 XMM30, 1294 XMM31 1295 #endif 1296 ); 1297 1298 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} ); 1299 1300 // Class for all 64bit vector registers 1301 reg_class vectord_reg_legacy(XMM0, XMM0b, 1302 XMM1, XMM1b, 1303 XMM2, XMM2b, 1304 XMM3, XMM3b, 1305 XMM4, XMM4b, 1306 XMM5, XMM5b, 1307 XMM6, XMM6b, 1308 XMM7, XMM7b 1309 #ifdef _LP64 1310 ,XMM8, XMM8b, 1311 XMM9, XMM9b, 1312 XMM10, XMM10b, 1313 XMM11, XMM11b, 1314 XMM12, XMM12b, 1315 XMM13, XMM13b, 1316 XMM14, XMM14b, 1317 XMM15, XMM15b 1318 #endif 1319 ); 1320 1321 // Class for all 64bit vector registers 1322 reg_class vectord_reg_evex(XMM0, XMM0b, 1323 XMM1, XMM1b, 1324 XMM2, XMM2b, 1325 XMM3, XMM3b, 1326 XMM4, XMM4b, 1327 XMM5, XMM5b, 1328 XMM6, XMM6b, 1329 XMM7, XMM7b 1330 #ifdef _LP64 1331 ,XMM8, XMM8b, 1332 XMM9, XMM9b, 1333 XMM10, XMM10b, 1334 XMM11, XMM11b, 1335 XMM12, XMM12b, 1336 XMM13, XMM13b, 1337 XMM14, XMM14b, 1338 XMM15, XMM15b, 1339 XMM16, XMM16b, 1340 XMM17, XMM17b, 1341 XMM18, XMM18b, 1342 XMM19, XMM19b, 1343 XMM20, XMM20b, 1344 XMM21, XMM21b, 1345 XMM22, XMM22b, 1346 XMM23, XMM23b, 1347 XMM24, XMM24b, 1348 XMM25, XMM25b, 1349 XMM26, XMM26b, 1350 XMM27, XMM27b, 1351 XMM28, XMM28b, 1352 XMM29, XMM29b, 1353 XMM30, XMM30b, 1354 XMM31, XMM31b 1355 #endif 1356 ); 1357 1358 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} ); 1359 1360 // Class for all 128bit vector registers 1361 reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, 1362 XMM1, XMM1b, XMM1c, XMM1d, 1363 XMM2, XMM2b, XMM2c, XMM2d, 1364 XMM3, XMM3b, XMM3c, XMM3d, 1365 XMM4, XMM4b, XMM4c, XMM4d, 1366 XMM5, XMM5b, XMM5c, XMM5d, 1367 XMM6, XMM6b, XMM6c, XMM6d, 1368 XMM7, XMM7b, XMM7c, XMM7d 1369 #ifdef _LP64 1370 ,XMM8, XMM8b, XMM8c, XMM8d, 1371 XMM9, XMM9b, XMM9c, XMM9d, 1372 XMM10, XMM10b, XMM10c, XMM10d, 1373 XMM11, XMM11b, XMM11c, XMM11d, 1374 XMM12, XMM12b, XMM12c, XMM12d, 1375 XMM13, XMM13b, XMM13c, XMM13d, 1376 XMM14, XMM14b, XMM14c, XMM14d, 1377 XMM15, XMM15b, XMM15c, XMM15d 1378 #endif 1379 ); 1380 1381 // Class for all 128bit vector registers 1382 reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, 1383 XMM1, XMM1b, XMM1c, XMM1d, 1384 XMM2, XMM2b, XMM2c, XMM2d, 1385 XMM3, XMM3b, XMM3c, XMM3d, 1386 XMM4, XMM4b, XMM4c, XMM4d, 1387 XMM5, XMM5b, XMM5c, XMM5d, 1388 XMM6, XMM6b, XMM6c, XMM6d, 1389 XMM7, XMM7b, XMM7c, XMM7d 1390 #ifdef _LP64 1391 ,XMM8, XMM8b, XMM8c, XMM8d, 1392 XMM9, XMM9b, XMM9c, XMM9d, 1393 XMM10, XMM10b, XMM10c, XMM10d, 1394 XMM11, XMM11b, XMM11c, XMM11d, 1395 XMM12, XMM12b, XMM12c, XMM12d, 1396 XMM13, XMM13b, XMM13c, XMM13d, 1397 XMM14, XMM14b, XMM14c, XMM14d, 1398 XMM15, XMM15b, XMM15c, XMM15d, 1399 XMM16, XMM16b, XMM16c, XMM16d, 1400 XMM17, XMM17b, XMM17c, XMM17d, 1401 XMM18, XMM18b, XMM18c, XMM18d, 1402 XMM19, XMM19b, XMM19c, XMM19d, 1403 XMM20, XMM20b, XMM20c, XMM20d, 1404 XMM21, XMM21b, XMM21c, XMM21d, 1405 XMM22, XMM22b, XMM22c, XMM22d, 1406 XMM23, XMM23b, XMM23c, XMM23d, 1407 XMM24, XMM24b, XMM24c, XMM24d, 1408 XMM25, XMM25b, XMM25c, XMM25d, 1409 XMM26, XMM26b, XMM26c, XMM26d, 1410 XMM27, XMM27b, XMM27c, XMM27d, 1411 XMM28, XMM28b, XMM28c, XMM28d, 1412 XMM29, XMM29b, XMM29c, XMM29d, 1413 XMM30, XMM30b, XMM30c, XMM30d, 1414 XMM31, XMM31b, XMM31c, XMM31d 1415 #endif 1416 ); 1417 1418 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} ); 1419 1420 // Class for all 256bit vector registers 1421 reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 1422 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 1423 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 1424 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 1425 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 1426 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 1427 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 1428 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 1429 #ifdef _LP64 1430 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 1431 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 1432 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 1433 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 1434 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 1435 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1436 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1437 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h 1438 #endif 1439 ); 1440 1441 // Class for all 256bit vector registers 1442 reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 1443 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 1444 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 1445 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 1446 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 1447 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 1448 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 1449 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 1450 #ifdef _LP64 1451 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 1452 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 1453 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 1454 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 1455 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 1456 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1457 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1458 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, 1459 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, 1460 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, 1461 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, 1462 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, 1463 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, 1464 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, 1465 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, 1466 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, 1467 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, 1468 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, 1469 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, 1470 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, 1471 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, 1472 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, 1473 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, 1474 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h 1475 #endif 1476 ); 1477 1478 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} ); 1479 1480 // Class for all 512bit vector registers 1481 reg_class vectorz_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1482 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1483 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1484 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1485 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1486 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1487 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1488 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1489 #ifdef _LP64 1490 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1491 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1492 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1493 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1494 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1495 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1496 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1497 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1498 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 1499 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 1500 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 1501 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 1502 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 1503 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 1504 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 1505 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 1506 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 1507 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 1508 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 1509 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 1510 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 1511 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 1512 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 1513 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 1514 #endif 1515 ); 1516 1517 %} 1518 1519 1520 //----------SOURCE BLOCK------------------------------------------------------- 1521 // This is a block of C++ code which provides values, functions, and 1522 // definitions necessary in the rest of the architecture description 1523 1524 source_hpp %{ 1525 // Header information of the source block. 1526 // Method declarations/definitions which are used outside 1527 // the ad-scope can conveniently be defined here. 1528 // 1529 // To keep related declarations/definitions/uses close together, 1530 // we switch between source %{ }% and source_hpp %{ }% freely as needed. 1531 1532 class NativeJump; 1533 1534 class CallStubImpl { 1535 1536 //-------------------------------------------------------------- 1537 //---< Used for optimization in Compile::shorten_branches >--- 1538 //-------------------------------------------------------------- 1539 1540 public: 1541 // Size of call trampoline stub. 1542 static uint size_call_trampoline() { 1543 return 0; // no call trampolines on this platform 1544 } 1545 1546 // number of relocations needed by a call trampoline stub 1547 static uint reloc_call_trampoline() { 1548 return 0; // no call trampolines on this platform 1549 } 1550 }; 1551 1552 class HandlerImpl { 1553 1554 public: 1555 1556 static int emit_exception_handler(CodeBuffer &cbuf); 1557 static int emit_deopt_handler(CodeBuffer& cbuf); 1558 1559 static uint size_exception_handler() { 1560 // NativeCall instruction size is the same as NativeJump. 1561 // exception handler starts out as jump and can be patched to 1562 // a call be deoptimization. (4932387) 1563 // Note that this value is also credited (in output.cpp) to 1564 // the size of the code section. 1565 return NativeJump::instruction_size; 1566 } 1567 1568 #ifdef _LP64 1569 static uint size_deopt_handler() { 1570 // three 5 byte instructions 1571 return 15; 1572 } 1573 #else 1574 static uint size_deopt_handler() { 1575 // NativeCall instruction size is the same as NativeJump. 1576 // exception handler starts out as jump and can be patched to 1577 // a call be deoptimization. (4932387) 1578 // Note that this value is also credited (in output.cpp) to 1579 // the size of the code section. 1580 return 5 + NativeJump::instruction_size; // pushl(); jmp; 1581 } 1582 #endif 1583 }; 1584 1585 %} // end source_hpp 1586 1587 source %{ 1588 1589 // Emit exception handler code. 1590 // Stuff framesize into a register and call a VM stub routine. 1591 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) { 1592 1593 // Note that the code buffer's insts_mark is always relative to insts. 1594 // That's why we must use the macroassembler to generate a handler. 1595 MacroAssembler _masm(&cbuf); 1596 address base = __ start_a_stub(size_exception_handler()); 1597 if (base == NULL) { 1598 ciEnv::current()->record_failure("CodeCache is full"); 1599 return 0; // CodeBuffer::expand failed 1600 } 1601 int offset = __ offset(); 1602 __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point())); 1603 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); 1604 __ end_a_stub(); 1605 return offset; 1606 } 1607 1608 // Emit deopt handler code. 1609 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) { 1610 1611 // Note that the code buffer's insts_mark is always relative to insts. 1612 // That's why we must use the macroassembler to generate a handler. 1613 MacroAssembler _masm(&cbuf); 1614 address base = __ start_a_stub(size_deopt_handler()); 1615 if (base == NULL) { 1616 ciEnv::current()->record_failure("CodeCache is full"); 1617 return 0; // CodeBuffer::expand failed 1618 } 1619 int offset = __ offset(); 1620 1621 #ifdef _LP64 1622 address the_pc = (address) __ pc(); 1623 Label next; 1624 // push a "the_pc" on the stack without destroying any registers 1625 // as they all may be live. 1626 1627 // push address of "next" 1628 __ call(next, relocInfo::none); // reloc none is fine since it is a disp32 1629 __ bind(next); 1630 // adjust it so it matches "the_pc" 1631 __ subptr(Address(rsp, 0), __ offset() - offset); 1632 #else 1633 InternalAddress here(__ pc()); 1634 __ pushptr(here.addr()); 1635 #endif 1636 1637 __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack())); 1638 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow"); 1639 __ end_a_stub(); 1640 return offset; 1641 } 1642 1643 1644 //============================================================================= 1645 1646 // Float masks come from different places depending on platform. 1647 #ifdef _LP64 1648 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); } 1649 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); } 1650 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); } 1651 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); } 1652 #else 1653 static address float_signmask() { return (address)float_signmask_pool; } 1654 static address float_signflip() { return (address)float_signflip_pool; } 1655 static address double_signmask() { return (address)double_signmask_pool; } 1656 static address double_signflip() { return (address)double_signflip_pool; } 1657 #endif 1658 1659 1660 const bool Matcher::match_rule_supported(int opcode) { 1661 if (!has_match_rule(opcode)) 1662 return false; 1663 1664 switch (opcode) { 1665 case Op_PopCountI: 1666 case Op_PopCountL: 1667 if (!UsePopCountInstruction) 1668 return false; 1669 break; 1670 case Op_MulVI: 1671 if ((UseSSE < 4) && (UseAVX < 1)) // only with SSE4_1 or AVX 1672 return false; 1673 break; 1674 case Op_MulVL: 1675 case Op_MulReductionVL: 1676 if (VM_Version::supports_avx512dq() == false) 1677 return false; 1678 case Op_AddReductionVL: 1679 if (UseAVX < 3) // only EVEX : vector connectivity becomes an issue here 1680 return false; 1681 case Op_AddReductionVI: 1682 if (UseSSE < 3) // requires at least SSE3 1683 return false; 1684 case Op_MulReductionVI: 1685 if (UseSSE < 4) // requires at least SSE4 1686 return false; 1687 case Op_AddReductionVF: 1688 case Op_AddReductionVD: 1689 case Op_MulReductionVF: 1690 case Op_MulReductionVD: 1691 if (UseSSE < 1) // requires at least SSE 1692 return false; 1693 break; 1694 case Op_SqrtVD: 1695 if (UseAVX < 1) // enabled for AVX only 1696 return false; 1697 break; 1698 case Op_CompareAndSwapL: 1699 #ifdef _LP64 1700 case Op_CompareAndSwapP: 1701 #endif 1702 if (!VM_Version::supports_cx8()) 1703 return false; 1704 break; 1705 } 1706 1707 return true; // Per default match rules are supported. 1708 } 1709 1710 // Max vector size in bytes. 0 if not supported. 1711 const int Matcher::vector_width_in_bytes(BasicType bt) { 1712 assert(is_java_primitive(bt), "only primitive type vectors"); 1713 if (UseSSE < 2) return 0; 1714 // SSE2 supports 128bit vectors for all types. 1715 // AVX2 supports 256bit vectors for all types. 1716 // AVX2/EVEX supports 512bit vectors for all types. 1717 int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16; 1718 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE. 1719 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE)) 1720 size = (UseAVX > 2) ? 64 : 32; 1721 // Use flag to limit vector size. 1722 size = MIN2(size,(int)MaxVectorSize); 1723 // Minimum 2 values in vector (or 4 for bytes). 1724 switch (bt) { 1725 case T_DOUBLE: 1726 case T_LONG: 1727 if (size < 16) return 0; 1728 case T_FLOAT: 1729 case T_INT: 1730 if (size < 8) return 0; 1731 case T_BOOLEAN: 1732 case T_BYTE: 1733 case T_CHAR: 1734 case T_SHORT: 1735 if (size < 4) return 0; 1736 break; 1737 default: 1738 ShouldNotReachHere(); 1739 } 1740 return size; 1741 } 1742 1743 // Limits on vector size (number of elements) loaded into vector. 1744 const int Matcher::max_vector_size(const BasicType bt) { 1745 return vector_width_in_bytes(bt)/type2aelembytes(bt); 1746 } 1747 const int Matcher::min_vector_size(const BasicType bt) { 1748 int max_size = max_vector_size(bt); 1749 // Min size which can be loaded into vector is 4 bytes. 1750 int size = (type2aelembytes(bt) == 1) ? 4 : 2; 1751 return MIN2(size,max_size); 1752 } 1753 1754 // Vector ideal reg corresponding to specidied size in bytes 1755 const int Matcher::vector_ideal_reg(int size) { 1756 assert(MaxVectorSize >= size, ""); 1757 switch(size) { 1758 case 4: return Op_VecS; 1759 case 8: return Op_VecD; 1760 case 16: return Op_VecX; 1761 case 32: return Op_VecY; 1762 case 64: return Op_VecZ; 1763 } 1764 ShouldNotReachHere(); 1765 return 0; 1766 } 1767 1768 // Only lowest bits of xmm reg are used for vector shift count. 1769 const int Matcher::vector_shift_count_ideal_reg(int size) { 1770 return Op_VecS; 1771 } 1772 1773 // x86 supports misaligned vectors store/load. 1774 const bool Matcher::misaligned_vectors_ok() { 1775 return !AlignVector; // can be changed by flag 1776 } 1777 1778 // x86 AES instructions are compatible with SunJCE expanded 1779 // keys, hence we do not need to pass the original key to stubs 1780 const bool Matcher::pass_original_key_for_aes() { 1781 return false; 1782 } 1783 1784 // Helper methods for MachSpillCopyNode::implementation(). 1785 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 1786 int src_hi, int dst_hi, uint ireg, outputStream* st) { 1787 // In 64-bit VM size calculation is very complex. Emitting instructions 1788 // into scratch buffer is used to get size in 64-bit VM. 1789 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 1790 assert(ireg == Op_VecS || // 32bit vector 1791 (src_lo & 1) == 0 && (src_lo + 1) == src_hi && 1792 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi, 1793 "no non-adjacent vector moves" ); 1794 if (cbuf) { 1795 MacroAssembler _masm(cbuf); 1796 int offset = __ offset(); 1797 switch (ireg) { 1798 case Op_VecS: // copy whole register 1799 case Op_VecD: 1800 case Op_VecX: 1801 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1802 break; 1803 case Op_VecY: 1804 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1805 break; 1806 case Op_VecZ: 1807 __ evmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2); 1808 break; 1809 default: 1810 ShouldNotReachHere(); 1811 } 1812 int size = __ offset() - offset; 1813 #ifdef ASSERT 1814 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1815 assert(!do_size || size == 4, "incorrect size calculattion"); 1816 #endif 1817 return size; 1818 #ifndef PRODUCT 1819 } else if (!do_size) { 1820 switch (ireg) { 1821 case Op_VecS: 1822 case Op_VecD: 1823 case Op_VecX: 1824 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 1825 break; 1826 case Op_VecY: 1827 case Op_VecZ: 1828 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 1829 break; 1830 default: 1831 ShouldNotReachHere(); 1832 } 1833 #endif 1834 } 1835 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 1836 return (UseAVX > 2) ? 6 : 4; 1837 } 1838 1839 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, 1840 int stack_offset, int reg, uint ireg, outputStream* st) { 1841 // In 64-bit VM size calculation is very complex. Emitting instructions 1842 // into scratch buffer is used to get size in 64-bit VM. 1843 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 1844 if (cbuf) { 1845 MacroAssembler _masm(cbuf); 1846 int offset = __ offset(); 1847 if (is_load) { 1848 switch (ireg) { 1849 case Op_VecS: 1850 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1851 break; 1852 case Op_VecD: 1853 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1854 break; 1855 case Op_VecX: 1856 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1857 break; 1858 case Op_VecY: 1859 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1860 break; 1861 case Op_VecZ: 1862 __ evmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2); 1863 break; 1864 default: 1865 ShouldNotReachHere(); 1866 } 1867 } else { // store 1868 switch (ireg) { 1869 case Op_VecS: 1870 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1871 break; 1872 case Op_VecD: 1873 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1874 break; 1875 case Op_VecX: 1876 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1877 break; 1878 case Op_VecY: 1879 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1880 break; 1881 case Op_VecZ: 1882 __ evmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2); 1883 break; 1884 default: 1885 ShouldNotReachHere(); 1886 } 1887 } 1888 int size = __ offset() - offset; 1889 #ifdef ASSERT 1890 int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : (UseAVX > 2) ? 6 : 4); 1891 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1892 assert(!do_size || size == (5+offset_size), "incorrect size calculattion"); 1893 #endif 1894 return size; 1895 #ifndef PRODUCT 1896 } else if (!do_size) { 1897 if (is_load) { 1898 switch (ireg) { 1899 case Op_VecS: 1900 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1901 break; 1902 case Op_VecD: 1903 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1904 break; 1905 case Op_VecX: 1906 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1907 break; 1908 case Op_VecY: 1909 case Op_VecZ: 1910 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1911 break; 1912 default: 1913 ShouldNotReachHere(); 1914 } 1915 } else { // store 1916 switch (ireg) { 1917 case Op_VecS: 1918 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1919 break; 1920 case Op_VecD: 1921 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1922 break; 1923 case Op_VecX: 1924 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1925 break; 1926 case Op_VecY: 1927 case Op_VecZ: 1928 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1929 break; 1930 default: 1931 ShouldNotReachHere(); 1932 } 1933 } 1934 #endif 1935 } 1936 int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : (UseAVX > 2) ? 6 : 4); 1937 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1938 return 5+offset_size; 1939 } 1940 1941 static inline jfloat replicate4_imm(int con, int width) { 1942 // Load a constant of "width" (in bytes) and replicate it to fill 32bit. 1943 assert(width == 1 || width == 2, "only byte or short types here"); 1944 int bit_width = width * 8; 1945 jint val = con; 1946 val &= (1 << bit_width) - 1; // mask off sign bits 1947 while(bit_width < 32) { 1948 val |= (val << bit_width); 1949 bit_width <<= 1; 1950 } 1951 jfloat fval = *((jfloat*) &val); // coerce to float type 1952 return fval; 1953 } 1954 1955 static inline jdouble replicate8_imm(int con, int width) { 1956 // Load a constant of "width" (in bytes) and replicate it to fill 64bit. 1957 assert(width == 1 || width == 2 || width == 4, "only byte, short or int types here"); 1958 int bit_width = width * 8; 1959 jlong val = con; 1960 val &= (((jlong) 1) << bit_width) - 1; // mask off sign bits 1961 while(bit_width < 64) { 1962 val |= (val << bit_width); 1963 bit_width <<= 1; 1964 } 1965 jdouble dval = *((jdouble*) &val); // coerce to double type 1966 return dval; 1967 } 1968 1969 #ifndef PRODUCT 1970 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const { 1971 st->print("nop \t# %d bytes pad for loops and calls", _count); 1972 } 1973 #endif 1974 1975 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const { 1976 MacroAssembler _masm(&cbuf); 1977 __ nop(_count); 1978 } 1979 1980 uint MachNopNode::size(PhaseRegAlloc*) const { 1981 return _count; 1982 } 1983 1984 #ifndef PRODUCT 1985 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const { 1986 st->print("# breakpoint"); 1987 } 1988 #endif 1989 1990 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const { 1991 MacroAssembler _masm(&cbuf); 1992 __ int3(); 1993 } 1994 1995 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const { 1996 return MachNode::size(ra_); 1997 } 1998 1999 %} 2000 2001 encode %{ 2002 2003 enc_class call_epilog %{ 2004 if (VerifyStackAtCalls) { 2005 // Check that stack depth is unchanged: find majik cookie on stack 2006 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word)); 2007 MacroAssembler _masm(&cbuf); 2008 Label L; 2009 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d); 2010 __ jccb(Assembler::equal, L); 2011 // Die if stack mismatch 2012 __ int3(); 2013 __ bind(L); 2014 } 2015 %} 2016 2017 %} 2018 2019 2020 //----------OPERANDS----------------------------------------------------------- 2021 // Operand definitions must precede instruction definitions for correct parsing 2022 // in the ADLC because operands constitute user defined types which are used in 2023 // instruction definitions. 2024 2025 // This one generically applies only for evex, so only one version 2026 operand vecZ() %{ 2027 constraint(ALLOC_IN_RC(vectorz_reg)); 2028 match(VecZ); 2029 2030 format %{ %} 2031 interface(REG_INTER); 2032 %} 2033 2034 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit) 2035 2036 // ============================================================================ 2037 2038 instruct ShouldNotReachHere() %{ 2039 match(Halt); 2040 format %{ "int3\t# ShouldNotReachHere" %} 2041 ins_encode %{ 2042 __ int3(); 2043 %} 2044 ins_pipe(pipe_slow); 2045 %} 2046 2047 // ============================================================================ 2048 2049 instruct addF_reg(regF dst, regF src) %{ 2050 predicate((UseSSE>=1) && (UseAVX == 0)); 2051 match(Set dst (AddF dst src)); 2052 2053 format %{ "addss $dst, $src" %} 2054 ins_cost(150); 2055 ins_encode %{ 2056 __ addss($dst$$XMMRegister, $src$$XMMRegister); 2057 %} 2058 ins_pipe(pipe_slow); 2059 %} 2060 2061 instruct addF_mem(regF dst, memory src) %{ 2062 predicate((UseSSE>=1) && (UseAVX == 0)); 2063 match(Set dst (AddF dst (LoadF src))); 2064 2065 format %{ "addss $dst, $src" %} 2066 ins_cost(150); 2067 ins_encode %{ 2068 __ addss($dst$$XMMRegister, $src$$Address); 2069 %} 2070 ins_pipe(pipe_slow); 2071 %} 2072 2073 instruct addF_imm(regF dst, immF con) %{ 2074 predicate((UseSSE>=1) && (UseAVX == 0)); 2075 match(Set dst (AddF dst con)); 2076 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2077 ins_cost(150); 2078 ins_encode %{ 2079 __ addss($dst$$XMMRegister, $constantaddress($con)); 2080 %} 2081 ins_pipe(pipe_slow); 2082 %} 2083 2084 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{ 2085 predicate(UseAVX > 0); 2086 match(Set dst (AddF src1 src2)); 2087 2088 format %{ "vaddss $dst, $src1, $src2" %} 2089 ins_cost(150); 2090 ins_encode %{ 2091 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2092 %} 2093 ins_pipe(pipe_slow); 2094 %} 2095 2096 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{ 2097 predicate(UseAVX > 0); 2098 match(Set dst (AddF src1 (LoadF src2))); 2099 2100 format %{ "vaddss $dst, $src1, $src2" %} 2101 ins_cost(150); 2102 ins_encode %{ 2103 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2104 %} 2105 ins_pipe(pipe_slow); 2106 %} 2107 2108 instruct addF_reg_imm(regF dst, regF src, immF con) %{ 2109 predicate(UseAVX > 0); 2110 match(Set dst (AddF src con)); 2111 2112 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2113 ins_cost(150); 2114 ins_encode %{ 2115 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2116 %} 2117 ins_pipe(pipe_slow); 2118 %} 2119 2120 instruct addD_reg(regD dst, regD src) %{ 2121 predicate((UseSSE>=2) && (UseAVX == 0)); 2122 match(Set dst (AddD dst src)); 2123 2124 format %{ "addsd $dst, $src" %} 2125 ins_cost(150); 2126 ins_encode %{ 2127 __ addsd($dst$$XMMRegister, $src$$XMMRegister); 2128 %} 2129 ins_pipe(pipe_slow); 2130 %} 2131 2132 instruct addD_mem(regD dst, memory src) %{ 2133 predicate((UseSSE>=2) && (UseAVX == 0)); 2134 match(Set dst (AddD dst (LoadD src))); 2135 2136 format %{ "addsd $dst, $src" %} 2137 ins_cost(150); 2138 ins_encode %{ 2139 __ addsd($dst$$XMMRegister, $src$$Address); 2140 %} 2141 ins_pipe(pipe_slow); 2142 %} 2143 2144 instruct addD_imm(regD dst, immD con) %{ 2145 predicate((UseSSE>=2) && (UseAVX == 0)); 2146 match(Set dst (AddD dst con)); 2147 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2148 ins_cost(150); 2149 ins_encode %{ 2150 __ addsd($dst$$XMMRegister, $constantaddress($con)); 2151 %} 2152 ins_pipe(pipe_slow); 2153 %} 2154 2155 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{ 2156 predicate(UseAVX > 0); 2157 match(Set dst (AddD src1 src2)); 2158 2159 format %{ "vaddsd $dst, $src1, $src2" %} 2160 ins_cost(150); 2161 ins_encode %{ 2162 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2163 %} 2164 ins_pipe(pipe_slow); 2165 %} 2166 2167 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{ 2168 predicate(UseAVX > 0); 2169 match(Set dst (AddD src1 (LoadD src2))); 2170 2171 format %{ "vaddsd $dst, $src1, $src2" %} 2172 ins_cost(150); 2173 ins_encode %{ 2174 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2175 %} 2176 ins_pipe(pipe_slow); 2177 %} 2178 2179 instruct addD_reg_imm(regD dst, regD src, immD con) %{ 2180 predicate(UseAVX > 0); 2181 match(Set dst (AddD src con)); 2182 2183 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2184 ins_cost(150); 2185 ins_encode %{ 2186 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2187 %} 2188 ins_pipe(pipe_slow); 2189 %} 2190 2191 instruct subF_reg(regF dst, regF src) %{ 2192 predicate((UseSSE>=1) && (UseAVX == 0)); 2193 match(Set dst (SubF dst src)); 2194 2195 format %{ "subss $dst, $src" %} 2196 ins_cost(150); 2197 ins_encode %{ 2198 __ subss($dst$$XMMRegister, $src$$XMMRegister); 2199 %} 2200 ins_pipe(pipe_slow); 2201 %} 2202 2203 instruct subF_mem(regF dst, memory src) %{ 2204 predicate((UseSSE>=1) && (UseAVX == 0)); 2205 match(Set dst (SubF dst (LoadF src))); 2206 2207 format %{ "subss $dst, $src" %} 2208 ins_cost(150); 2209 ins_encode %{ 2210 __ subss($dst$$XMMRegister, $src$$Address); 2211 %} 2212 ins_pipe(pipe_slow); 2213 %} 2214 2215 instruct subF_imm(regF dst, immF con) %{ 2216 predicate((UseSSE>=1) && (UseAVX == 0)); 2217 match(Set dst (SubF dst con)); 2218 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2219 ins_cost(150); 2220 ins_encode %{ 2221 __ subss($dst$$XMMRegister, $constantaddress($con)); 2222 %} 2223 ins_pipe(pipe_slow); 2224 %} 2225 2226 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{ 2227 predicate(UseAVX > 0); 2228 match(Set dst (SubF src1 src2)); 2229 2230 format %{ "vsubss $dst, $src1, $src2" %} 2231 ins_cost(150); 2232 ins_encode %{ 2233 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2234 %} 2235 ins_pipe(pipe_slow); 2236 %} 2237 2238 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{ 2239 predicate(UseAVX > 0); 2240 match(Set dst (SubF src1 (LoadF src2))); 2241 2242 format %{ "vsubss $dst, $src1, $src2" %} 2243 ins_cost(150); 2244 ins_encode %{ 2245 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2246 %} 2247 ins_pipe(pipe_slow); 2248 %} 2249 2250 instruct subF_reg_imm(regF dst, regF src, immF con) %{ 2251 predicate(UseAVX > 0); 2252 match(Set dst (SubF src con)); 2253 2254 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2255 ins_cost(150); 2256 ins_encode %{ 2257 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2258 %} 2259 ins_pipe(pipe_slow); 2260 %} 2261 2262 instruct subD_reg(regD dst, regD src) %{ 2263 predicate((UseSSE>=2) && (UseAVX == 0)); 2264 match(Set dst (SubD dst src)); 2265 2266 format %{ "subsd $dst, $src" %} 2267 ins_cost(150); 2268 ins_encode %{ 2269 __ subsd($dst$$XMMRegister, $src$$XMMRegister); 2270 %} 2271 ins_pipe(pipe_slow); 2272 %} 2273 2274 instruct subD_mem(regD dst, memory src) %{ 2275 predicate((UseSSE>=2) && (UseAVX == 0)); 2276 match(Set dst (SubD dst (LoadD src))); 2277 2278 format %{ "subsd $dst, $src" %} 2279 ins_cost(150); 2280 ins_encode %{ 2281 __ subsd($dst$$XMMRegister, $src$$Address); 2282 %} 2283 ins_pipe(pipe_slow); 2284 %} 2285 2286 instruct subD_imm(regD dst, immD con) %{ 2287 predicate((UseSSE>=2) && (UseAVX == 0)); 2288 match(Set dst (SubD dst con)); 2289 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2290 ins_cost(150); 2291 ins_encode %{ 2292 __ subsd($dst$$XMMRegister, $constantaddress($con)); 2293 %} 2294 ins_pipe(pipe_slow); 2295 %} 2296 2297 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{ 2298 predicate(UseAVX > 0); 2299 match(Set dst (SubD src1 src2)); 2300 2301 format %{ "vsubsd $dst, $src1, $src2" %} 2302 ins_cost(150); 2303 ins_encode %{ 2304 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2305 %} 2306 ins_pipe(pipe_slow); 2307 %} 2308 2309 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{ 2310 predicate(UseAVX > 0); 2311 match(Set dst (SubD src1 (LoadD src2))); 2312 2313 format %{ "vsubsd $dst, $src1, $src2" %} 2314 ins_cost(150); 2315 ins_encode %{ 2316 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2317 %} 2318 ins_pipe(pipe_slow); 2319 %} 2320 2321 instruct subD_reg_imm(regD dst, regD src, immD con) %{ 2322 predicate(UseAVX > 0); 2323 match(Set dst (SubD src con)); 2324 2325 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2326 ins_cost(150); 2327 ins_encode %{ 2328 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2329 %} 2330 ins_pipe(pipe_slow); 2331 %} 2332 2333 instruct mulF_reg(regF dst, regF src) %{ 2334 predicate((UseSSE>=1) && (UseAVX == 0)); 2335 match(Set dst (MulF dst src)); 2336 2337 format %{ "mulss $dst, $src" %} 2338 ins_cost(150); 2339 ins_encode %{ 2340 __ mulss($dst$$XMMRegister, $src$$XMMRegister); 2341 %} 2342 ins_pipe(pipe_slow); 2343 %} 2344 2345 instruct mulF_mem(regF dst, memory src) %{ 2346 predicate((UseSSE>=1) && (UseAVX == 0)); 2347 match(Set dst (MulF dst (LoadF src))); 2348 2349 format %{ "mulss $dst, $src" %} 2350 ins_cost(150); 2351 ins_encode %{ 2352 __ mulss($dst$$XMMRegister, $src$$Address); 2353 %} 2354 ins_pipe(pipe_slow); 2355 %} 2356 2357 instruct mulF_imm(regF dst, immF con) %{ 2358 predicate((UseSSE>=1) && (UseAVX == 0)); 2359 match(Set dst (MulF dst con)); 2360 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2361 ins_cost(150); 2362 ins_encode %{ 2363 __ mulss($dst$$XMMRegister, $constantaddress($con)); 2364 %} 2365 ins_pipe(pipe_slow); 2366 %} 2367 2368 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{ 2369 predicate(UseAVX > 0); 2370 match(Set dst (MulF src1 src2)); 2371 2372 format %{ "vmulss $dst, $src1, $src2" %} 2373 ins_cost(150); 2374 ins_encode %{ 2375 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2376 %} 2377 ins_pipe(pipe_slow); 2378 %} 2379 2380 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{ 2381 predicate(UseAVX > 0); 2382 match(Set dst (MulF src1 (LoadF src2))); 2383 2384 format %{ "vmulss $dst, $src1, $src2" %} 2385 ins_cost(150); 2386 ins_encode %{ 2387 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2388 %} 2389 ins_pipe(pipe_slow); 2390 %} 2391 2392 instruct mulF_reg_imm(regF dst, regF src, immF con) %{ 2393 predicate(UseAVX > 0); 2394 match(Set dst (MulF src con)); 2395 2396 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2397 ins_cost(150); 2398 ins_encode %{ 2399 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2400 %} 2401 ins_pipe(pipe_slow); 2402 %} 2403 2404 instruct mulD_reg(regD dst, regD src) %{ 2405 predicate((UseSSE>=2) && (UseAVX == 0)); 2406 match(Set dst (MulD dst src)); 2407 2408 format %{ "mulsd $dst, $src" %} 2409 ins_cost(150); 2410 ins_encode %{ 2411 __ mulsd($dst$$XMMRegister, $src$$XMMRegister); 2412 %} 2413 ins_pipe(pipe_slow); 2414 %} 2415 2416 instruct mulD_mem(regD dst, memory src) %{ 2417 predicate((UseSSE>=2) && (UseAVX == 0)); 2418 match(Set dst (MulD dst (LoadD src))); 2419 2420 format %{ "mulsd $dst, $src" %} 2421 ins_cost(150); 2422 ins_encode %{ 2423 __ mulsd($dst$$XMMRegister, $src$$Address); 2424 %} 2425 ins_pipe(pipe_slow); 2426 %} 2427 2428 instruct mulD_imm(regD dst, immD con) %{ 2429 predicate((UseSSE>=2) && (UseAVX == 0)); 2430 match(Set dst (MulD dst con)); 2431 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2432 ins_cost(150); 2433 ins_encode %{ 2434 __ mulsd($dst$$XMMRegister, $constantaddress($con)); 2435 %} 2436 ins_pipe(pipe_slow); 2437 %} 2438 2439 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{ 2440 predicate(UseAVX > 0); 2441 match(Set dst (MulD src1 src2)); 2442 2443 format %{ "vmulsd $dst, $src1, $src2" %} 2444 ins_cost(150); 2445 ins_encode %{ 2446 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2447 %} 2448 ins_pipe(pipe_slow); 2449 %} 2450 2451 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{ 2452 predicate(UseAVX > 0); 2453 match(Set dst (MulD src1 (LoadD src2))); 2454 2455 format %{ "vmulsd $dst, $src1, $src2" %} 2456 ins_cost(150); 2457 ins_encode %{ 2458 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2459 %} 2460 ins_pipe(pipe_slow); 2461 %} 2462 2463 instruct mulD_reg_imm(regD dst, regD src, immD con) %{ 2464 predicate(UseAVX > 0); 2465 match(Set dst (MulD src con)); 2466 2467 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2468 ins_cost(150); 2469 ins_encode %{ 2470 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2471 %} 2472 ins_pipe(pipe_slow); 2473 %} 2474 2475 instruct divF_reg(regF dst, regF src) %{ 2476 predicate((UseSSE>=1) && (UseAVX == 0)); 2477 match(Set dst (DivF dst src)); 2478 2479 format %{ "divss $dst, $src" %} 2480 ins_cost(150); 2481 ins_encode %{ 2482 __ divss($dst$$XMMRegister, $src$$XMMRegister); 2483 %} 2484 ins_pipe(pipe_slow); 2485 %} 2486 2487 instruct divF_mem(regF dst, memory src) %{ 2488 predicate((UseSSE>=1) && (UseAVX == 0)); 2489 match(Set dst (DivF dst (LoadF src))); 2490 2491 format %{ "divss $dst, $src" %} 2492 ins_cost(150); 2493 ins_encode %{ 2494 __ divss($dst$$XMMRegister, $src$$Address); 2495 %} 2496 ins_pipe(pipe_slow); 2497 %} 2498 2499 instruct divF_imm(regF dst, immF con) %{ 2500 predicate((UseSSE>=1) && (UseAVX == 0)); 2501 match(Set dst (DivF dst con)); 2502 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2503 ins_cost(150); 2504 ins_encode %{ 2505 __ divss($dst$$XMMRegister, $constantaddress($con)); 2506 %} 2507 ins_pipe(pipe_slow); 2508 %} 2509 2510 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{ 2511 predicate(UseAVX > 0); 2512 match(Set dst (DivF src1 src2)); 2513 2514 format %{ "vdivss $dst, $src1, $src2" %} 2515 ins_cost(150); 2516 ins_encode %{ 2517 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2518 %} 2519 ins_pipe(pipe_slow); 2520 %} 2521 2522 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{ 2523 predicate(UseAVX > 0); 2524 match(Set dst (DivF src1 (LoadF src2))); 2525 2526 format %{ "vdivss $dst, $src1, $src2" %} 2527 ins_cost(150); 2528 ins_encode %{ 2529 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2530 %} 2531 ins_pipe(pipe_slow); 2532 %} 2533 2534 instruct divF_reg_imm(regF dst, regF src, immF con) %{ 2535 predicate(UseAVX > 0); 2536 match(Set dst (DivF src con)); 2537 2538 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2539 ins_cost(150); 2540 ins_encode %{ 2541 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2542 %} 2543 ins_pipe(pipe_slow); 2544 %} 2545 2546 instruct divD_reg(regD dst, regD src) %{ 2547 predicate((UseSSE>=2) && (UseAVX == 0)); 2548 match(Set dst (DivD dst src)); 2549 2550 format %{ "divsd $dst, $src" %} 2551 ins_cost(150); 2552 ins_encode %{ 2553 __ divsd($dst$$XMMRegister, $src$$XMMRegister); 2554 %} 2555 ins_pipe(pipe_slow); 2556 %} 2557 2558 instruct divD_mem(regD dst, memory src) %{ 2559 predicate((UseSSE>=2) && (UseAVX == 0)); 2560 match(Set dst (DivD dst (LoadD src))); 2561 2562 format %{ "divsd $dst, $src" %} 2563 ins_cost(150); 2564 ins_encode %{ 2565 __ divsd($dst$$XMMRegister, $src$$Address); 2566 %} 2567 ins_pipe(pipe_slow); 2568 %} 2569 2570 instruct divD_imm(regD dst, immD con) %{ 2571 predicate((UseSSE>=2) && (UseAVX == 0)); 2572 match(Set dst (DivD dst con)); 2573 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2574 ins_cost(150); 2575 ins_encode %{ 2576 __ divsd($dst$$XMMRegister, $constantaddress($con)); 2577 %} 2578 ins_pipe(pipe_slow); 2579 %} 2580 2581 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{ 2582 predicate(UseAVX > 0); 2583 match(Set dst (DivD src1 src2)); 2584 2585 format %{ "vdivsd $dst, $src1, $src2" %} 2586 ins_cost(150); 2587 ins_encode %{ 2588 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2589 %} 2590 ins_pipe(pipe_slow); 2591 %} 2592 2593 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{ 2594 predicate(UseAVX > 0); 2595 match(Set dst (DivD src1 (LoadD src2))); 2596 2597 format %{ "vdivsd $dst, $src1, $src2" %} 2598 ins_cost(150); 2599 ins_encode %{ 2600 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2601 %} 2602 ins_pipe(pipe_slow); 2603 %} 2604 2605 instruct divD_reg_imm(regD dst, regD src, immD con) %{ 2606 predicate(UseAVX > 0); 2607 match(Set dst (DivD src con)); 2608 2609 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2610 ins_cost(150); 2611 ins_encode %{ 2612 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2613 %} 2614 ins_pipe(pipe_slow); 2615 %} 2616 2617 instruct absF_reg(regF dst) %{ 2618 predicate((UseSSE>=1) && (UseAVX == 0)); 2619 match(Set dst (AbsF dst)); 2620 ins_cost(150); 2621 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %} 2622 ins_encode %{ 2623 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask())); 2624 %} 2625 ins_pipe(pipe_slow); 2626 %} 2627 2628 instruct absF_reg_reg(regF dst, regF src) %{ 2629 predicate(UseAVX > 0); 2630 match(Set dst (AbsF src)); 2631 ins_cost(150); 2632 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 2633 ins_encode %{ 2634 int vector_len = 0; 2635 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 2636 ExternalAddress(float_signmask()), vector_len); 2637 %} 2638 ins_pipe(pipe_slow); 2639 %} 2640 2641 instruct absD_reg(regD dst) %{ 2642 predicate((UseSSE>=2) && (UseAVX == 0)); 2643 match(Set dst (AbsD dst)); 2644 ins_cost(150); 2645 format %{ "andpd $dst, [0x7fffffffffffffff]\t" 2646 "# abs double by sign masking" %} 2647 ins_encode %{ 2648 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask())); 2649 %} 2650 ins_pipe(pipe_slow); 2651 %} 2652 2653 instruct absD_reg_reg(regD dst, regD src) %{ 2654 predicate(UseAVX > 0); 2655 match(Set dst (AbsD src)); 2656 ins_cost(150); 2657 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 2658 "# abs double by sign masking" %} 2659 ins_encode %{ 2660 int vector_len = 0; 2661 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 2662 ExternalAddress(double_signmask()), vector_len); 2663 %} 2664 ins_pipe(pipe_slow); 2665 %} 2666 2667 instruct negF_reg(regF dst) %{ 2668 predicate((UseSSE>=1) && (UseAVX == 0)); 2669 match(Set dst (NegF dst)); 2670 ins_cost(150); 2671 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %} 2672 ins_encode %{ 2673 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip())); 2674 %} 2675 ins_pipe(pipe_slow); 2676 %} 2677 2678 instruct negF_reg_reg(regF dst, regF src) %{ 2679 predicate(UseAVX > 0); 2680 match(Set dst (NegF src)); 2681 ins_cost(150); 2682 format %{ "vxorps $dst, $src, [0x80000000]\t# neg float by sign flipping" %} 2683 ins_encode %{ 2684 int vector_len = 0; 2685 __ vxorps($dst$$XMMRegister, $src$$XMMRegister, 2686 ExternalAddress(float_signflip()), vector_len); 2687 %} 2688 ins_pipe(pipe_slow); 2689 %} 2690 2691 instruct negD_reg(regD dst) %{ 2692 predicate((UseSSE>=2) && (UseAVX == 0)); 2693 match(Set dst (NegD dst)); 2694 ins_cost(150); 2695 format %{ "xorpd $dst, [0x8000000000000000]\t" 2696 "# neg double by sign flipping" %} 2697 ins_encode %{ 2698 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip())); 2699 %} 2700 ins_pipe(pipe_slow); 2701 %} 2702 2703 instruct negD_reg_reg(regD dst, regD src) %{ 2704 predicate(UseAVX > 0); 2705 match(Set dst (NegD src)); 2706 ins_cost(150); 2707 format %{ "vxorpd $dst, $src, [0x8000000000000000]\t" 2708 "# neg double by sign flipping" %} 2709 ins_encode %{ 2710 int vector_len = 0; 2711 __ vxorpd($dst$$XMMRegister, $src$$XMMRegister, 2712 ExternalAddress(double_signflip()), vector_len); 2713 %} 2714 ins_pipe(pipe_slow); 2715 %} 2716 2717 instruct sqrtF_reg(regF dst, regF src) %{ 2718 predicate(UseSSE>=1); 2719 match(Set dst (ConvD2F (SqrtD (ConvF2D src)))); 2720 2721 format %{ "sqrtss $dst, $src" %} 2722 ins_cost(150); 2723 ins_encode %{ 2724 __ sqrtss($dst$$XMMRegister, $src$$XMMRegister); 2725 %} 2726 ins_pipe(pipe_slow); 2727 %} 2728 2729 instruct sqrtF_mem(regF dst, memory src) %{ 2730 predicate(UseSSE>=1); 2731 match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF src))))); 2732 2733 format %{ "sqrtss $dst, $src" %} 2734 ins_cost(150); 2735 ins_encode %{ 2736 __ sqrtss($dst$$XMMRegister, $src$$Address); 2737 %} 2738 ins_pipe(pipe_slow); 2739 %} 2740 2741 instruct sqrtF_imm(regF dst, immF con) %{ 2742 predicate(UseSSE>=1); 2743 match(Set dst (ConvD2F (SqrtD (ConvF2D con)))); 2744 format %{ "sqrtss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2745 ins_cost(150); 2746 ins_encode %{ 2747 __ sqrtss($dst$$XMMRegister, $constantaddress($con)); 2748 %} 2749 ins_pipe(pipe_slow); 2750 %} 2751 2752 instruct sqrtD_reg(regD dst, regD src) %{ 2753 predicate(UseSSE>=2); 2754 match(Set dst (SqrtD src)); 2755 2756 format %{ "sqrtsd $dst, $src" %} 2757 ins_cost(150); 2758 ins_encode %{ 2759 __ sqrtsd($dst$$XMMRegister, $src$$XMMRegister); 2760 %} 2761 ins_pipe(pipe_slow); 2762 %} 2763 2764 instruct sqrtD_mem(regD dst, memory src) %{ 2765 predicate(UseSSE>=2); 2766 match(Set dst (SqrtD (LoadD src))); 2767 2768 format %{ "sqrtsd $dst, $src" %} 2769 ins_cost(150); 2770 ins_encode %{ 2771 __ sqrtsd($dst$$XMMRegister, $src$$Address); 2772 %} 2773 ins_pipe(pipe_slow); 2774 %} 2775 2776 instruct sqrtD_imm(regD dst, immD con) %{ 2777 predicate(UseSSE>=2); 2778 match(Set dst (SqrtD con)); 2779 format %{ "sqrtsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2780 ins_cost(150); 2781 ins_encode %{ 2782 __ sqrtsd($dst$$XMMRegister, $constantaddress($con)); 2783 %} 2784 ins_pipe(pipe_slow); 2785 %} 2786 2787 // ====================VECTOR INSTRUCTIONS===================================== 2788 2789 // Load vectors (4 bytes long) 2790 instruct loadV4(vecS dst, memory mem) %{ 2791 predicate(n->as_LoadVector()->memory_size() == 4); 2792 match(Set dst (LoadVector mem)); 2793 ins_cost(125); 2794 format %{ "movd $dst,$mem\t! load vector (4 bytes)" %} 2795 ins_encode %{ 2796 __ movdl($dst$$XMMRegister, $mem$$Address); 2797 %} 2798 ins_pipe( pipe_slow ); 2799 %} 2800 2801 // Load vectors (8 bytes long) 2802 instruct loadV8(vecD dst, memory mem) %{ 2803 predicate(n->as_LoadVector()->memory_size() == 8); 2804 match(Set dst (LoadVector mem)); 2805 ins_cost(125); 2806 format %{ "movq $dst,$mem\t! load vector (8 bytes)" %} 2807 ins_encode %{ 2808 __ movq($dst$$XMMRegister, $mem$$Address); 2809 %} 2810 ins_pipe( pipe_slow ); 2811 %} 2812 2813 // Load vectors (16 bytes long) 2814 instruct loadV16(vecX dst, memory mem) %{ 2815 predicate(n->as_LoadVector()->memory_size() == 16); 2816 match(Set dst (LoadVector mem)); 2817 ins_cost(125); 2818 format %{ "movdqu $dst,$mem\t! load vector (16 bytes)" %} 2819 ins_encode %{ 2820 __ movdqu($dst$$XMMRegister, $mem$$Address); 2821 %} 2822 ins_pipe( pipe_slow ); 2823 %} 2824 2825 // Load vectors (32 bytes long) 2826 instruct loadV32(vecY dst, memory mem) %{ 2827 predicate(n->as_LoadVector()->memory_size() == 32); 2828 match(Set dst (LoadVector mem)); 2829 ins_cost(125); 2830 format %{ "vmovdqu $dst,$mem\t! load vector (32 bytes)" %} 2831 ins_encode %{ 2832 __ vmovdqu($dst$$XMMRegister, $mem$$Address); 2833 %} 2834 ins_pipe( pipe_slow ); 2835 %} 2836 2837 // Load vectors (64 bytes long) 2838 instruct loadV64(vecZ dst, memory mem) %{ 2839 predicate(n->as_LoadVector()->memory_size() == 64); 2840 match(Set dst (LoadVector mem)); 2841 ins_cost(125); 2842 format %{ "vmovdqu $dst k0,$mem\t! load vector (64 bytes)" %} 2843 ins_encode %{ 2844 int vector_len = 2; 2845 __ evmovdqu($dst$$XMMRegister, $mem$$Address, vector_len); 2846 %} 2847 ins_pipe( pipe_slow ); 2848 %} 2849 2850 // Store vectors 2851 instruct storeV4(memory mem, vecS src) %{ 2852 predicate(n->as_StoreVector()->memory_size() == 4); 2853 match(Set mem (StoreVector mem src)); 2854 ins_cost(145); 2855 format %{ "movd $mem,$src\t! store vector (4 bytes)" %} 2856 ins_encode %{ 2857 __ movdl($mem$$Address, $src$$XMMRegister); 2858 %} 2859 ins_pipe( pipe_slow ); 2860 %} 2861 2862 instruct storeV8(memory mem, vecD src) %{ 2863 predicate(n->as_StoreVector()->memory_size() == 8); 2864 match(Set mem (StoreVector mem src)); 2865 ins_cost(145); 2866 format %{ "movq $mem,$src\t! store vector (8 bytes)" %} 2867 ins_encode %{ 2868 __ movq($mem$$Address, $src$$XMMRegister); 2869 %} 2870 ins_pipe( pipe_slow ); 2871 %} 2872 2873 instruct storeV16(memory mem, vecX src) %{ 2874 predicate(n->as_StoreVector()->memory_size() == 16); 2875 match(Set mem (StoreVector mem src)); 2876 ins_cost(145); 2877 format %{ "movdqu $mem,$src\t! store vector (16 bytes)" %} 2878 ins_encode %{ 2879 __ movdqu($mem$$Address, $src$$XMMRegister); 2880 %} 2881 ins_pipe( pipe_slow ); 2882 %} 2883 2884 instruct storeV32(memory mem, vecY src) %{ 2885 predicate(n->as_StoreVector()->memory_size() == 32); 2886 match(Set mem (StoreVector mem src)); 2887 ins_cost(145); 2888 format %{ "vmovdqu $mem,$src\t! store vector (32 bytes)" %} 2889 ins_encode %{ 2890 __ vmovdqu($mem$$Address, $src$$XMMRegister); 2891 %} 2892 ins_pipe( pipe_slow ); 2893 %} 2894 2895 instruct storeV64(memory mem, vecZ src) %{ 2896 predicate(n->as_StoreVector()->memory_size() == 64); 2897 match(Set mem (StoreVector mem src)); 2898 ins_cost(145); 2899 format %{ "vmovdqu $mem k0,$src\t! store vector (64 bytes)" %} 2900 ins_encode %{ 2901 int vector_len = 2; 2902 __ evmovdqu($mem$$Address, $src$$XMMRegister, vector_len); 2903 %} 2904 ins_pipe( pipe_slow ); 2905 %} 2906 2907 // ====================LEGACY REPLICATE======================================= 2908 2909 instruct Repl4B_mem(vecS dst, memory mem) %{ 2910 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 2911 match(Set dst (ReplicateB (LoadB mem))); 2912 format %{ "punpcklbw $dst,$mem\n\t" 2913 "pshuflw $dst,$dst,0x00\t! replicate4B" %} 2914 ins_encode %{ 2915 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 2916 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2917 %} 2918 ins_pipe( pipe_slow ); 2919 %} 2920 2921 instruct Repl8B_mem(vecD dst, memory mem) %{ 2922 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 2923 match(Set dst (ReplicateB (LoadB mem))); 2924 format %{ "punpcklbw $dst,$mem\n\t" 2925 "pshuflw $dst,$dst,0x00\t! replicate8B" %} 2926 ins_encode %{ 2927 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 2928 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2929 %} 2930 ins_pipe( pipe_slow ); 2931 %} 2932 2933 instruct Repl16B(vecX dst, rRegI src) %{ 2934 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 2935 match(Set dst (ReplicateB src)); 2936 format %{ "movd $dst,$src\n\t" 2937 "punpcklbw $dst,$dst\n\t" 2938 "pshuflw $dst,$dst,0x00\n\t" 2939 "punpcklqdq $dst,$dst\t! replicate16B" %} 2940 ins_encode %{ 2941 __ movdl($dst$$XMMRegister, $src$$Register); 2942 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 2943 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2944 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2945 %} 2946 ins_pipe( pipe_slow ); 2947 %} 2948 2949 instruct Repl16B_mem(vecX dst, memory mem) %{ 2950 predicate(n->as_Vector()->length() == 16 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 2951 match(Set dst (ReplicateB (LoadB mem))); 2952 format %{ "punpcklbw $dst,$mem\n\t" 2953 "pshuflw $dst,$dst,0x00\n\t" 2954 "punpcklqdq $dst,$dst\t! replicate16B" %} 2955 ins_encode %{ 2956 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 2957 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2958 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2959 %} 2960 ins_pipe( pipe_slow ); 2961 %} 2962 2963 instruct Repl32B(vecY dst, rRegI src) %{ 2964 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 2965 match(Set dst (ReplicateB src)); 2966 format %{ "movd $dst,$src\n\t" 2967 "punpcklbw $dst,$dst\n\t" 2968 "pshuflw $dst,$dst,0x00\n\t" 2969 "punpcklqdq $dst,$dst\n\t" 2970 "vinserti128h $dst,$dst,$dst\t! replicate32B" %} 2971 ins_encode %{ 2972 __ movdl($dst$$XMMRegister, $src$$Register); 2973 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 2974 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2975 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2976 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2977 %} 2978 ins_pipe( pipe_slow ); 2979 %} 2980 2981 instruct Repl32B_mem(vecY dst, memory mem) %{ 2982 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 2983 match(Set dst (ReplicateB (LoadB mem))); 2984 format %{ "punpcklbw $dst,$mem\n\t" 2985 "pshuflw $dst,$dst,0x00\n\t" 2986 "punpcklqdq $dst,$dst\n\t" 2987 "vinserti128h $dst,$dst,$dst\t! replicate32B" %} 2988 ins_encode %{ 2989 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 2990 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2991 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2992 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2993 %} 2994 ins_pipe( pipe_slow ); 2995 %} 2996 2997 instruct Repl16B_imm(vecX dst, immI con) %{ 2998 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 2999 match(Set dst (ReplicateB con)); 3000 format %{ "movq $dst,[$constantaddress]\n\t" 3001 "punpcklqdq $dst,$dst\t! replicate16B($con)" %} 3002 ins_encode %{ 3003 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3004 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3005 %} 3006 ins_pipe( pipe_slow ); 3007 %} 3008 3009 instruct Repl32B_imm(vecY dst, immI con) %{ 3010 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3011 match(Set dst (ReplicateB con)); 3012 format %{ "movq $dst,[$constantaddress]\n\t" 3013 "punpcklqdq $dst,$dst\n\t" 3014 "vinserti128h $dst,$dst,$dst\t! lreplicate32B($con)" %} 3015 ins_encode %{ 3016 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3017 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3018 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3019 %} 3020 ins_pipe( pipe_slow ); 3021 %} 3022 3023 instruct Repl4S(vecD dst, rRegI src) %{ 3024 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vlbw()); 3025 match(Set dst (ReplicateS src)); 3026 format %{ "movd $dst,$src\n\t" 3027 "pshuflw $dst,$dst,0x00\t! replicate4S" %} 3028 ins_encode %{ 3029 __ movdl($dst$$XMMRegister, $src$$Register); 3030 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3031 %} 3032 ins_pipe( pipe_slow ); 3033 %} 3034 3035 instruct Repl4S_mem(vecD dst, memory mem) %{ 3036 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3037 match(Set dst (ReplicateS (LoadS mem))); 3038 format %{ "pshuflw $dst,$mem,0x00\t! replicate4S" %} 3039 ins_encode %{ 3040 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3041 %} 3042 ins_pipe( pipe_slow ); 3043 %} 3044 3045 instruct Repl8S(vecX dst, rRegI src) %{ 3046 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vlbw()); 3047 match(Set dst (ReplicateS src)); 3048 format %{ "movd $dst,$src\n\t" 3049 "pshuflw $dst,$dst,0x00\n\t" 3050 "punpcklqdq $dst,$dst\t! replicate8S" %} 3051 ins_encode %{ 3052 __ movdl($dst$$XMMRegister, $src$$Register); 3053 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3054 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3055 %} 3056 ins_pipe( pipe_slow ); 3057 %} 3058 3059 instruct Repl8S_mem(vecX dst, memory mem) %{ 3060 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3061 match(Set dst (ReplicateS (LoadS mem))); 3062 format %{ "pshuflw $dst,$mem,0x00\n\t" 3063 "punpcklqdq $dst,$dst\t! replicate8S" %} 3064 ins_encode %{ 3065 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3066 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3067 %} 3068 ins_pipe( pipe_slow ); 3069 %} 3070 3071 instruct Repl8S_imm(vecX dst, immI con) %{ 3072 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vlbw()); 3073 match(Set dst (ReplicateS con)); 3074 format %{ "movq $dst,[$constantaddress]\n\t" 3075 "punpcklqdq $dst,$dst\t! replicate8S($con)" %} 3076 ins_encode %{ 3077 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3078 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3079 %} 3080 ins_pipe( pipe_slow ); 3081 %} 3082 3083 instruct Repl16S(vecY dst, rRegI src) %{ 3084 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3085 match(Set dst (ReplicateS src)); 3086 format %{ "movd $dst,$src\n\t" 3087 "pshuflw $dst,$dst,0x00\n\t" 3088 "punpcklqdq $dst,$dst\n\t" 3089 "vinserti128h $dst,$dst,$dst\t! replicate16S" %} 3090 ins_encode %{ 3091 __ movdl($dst$$XMMRegister, $src$$Register); 3092 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3093 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3094 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3095 %} 3096 ins_pipe( pipe_slow ); 3097 %} 3098 3099 instruct Repl16S_mem(vecY dst, memory mem) %{ 3100 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3101 match(Set dst (ReplicateS (LoadS mem))); 3102 format %{ "pshuflw $dst,$mem,0x00\n\t" 3103 "punpcklqdq $dst,$dst\n\t" 3104 "vinserti128h $dst,$dst,$dst\t! replicate16S" %} 3105 ins_encode %{ 3106 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3107 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3108 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3109 %} 3110 ins_pipe( pipe_slow ); 3111 %} 3112 3113 instruct Repl16S_imm(vecY dst, immI con) %{ 3114 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3115 match(Set dst (ReplicateS con)); 3116 format %{ "movq $dst,[$constantaddress]\n\t" 3117 "punpcklqdq $dst,$dst\n\t" 3118 "vinserti128h $dst,$dst,$dst\t! replicate16S($con)" %} 3119 ins_encode %{ 3120 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3121 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3122 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3123 %} 3124 ins_pipe( pipe_slow ); 3125 %} 3126 3127 instruct Repl4I(vecX dst, rRegI src) %{ 3128 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3129 match(Set dst (ReplicateI src)); 3130 format %{ "movd $dst,$src\n\t" 3131 "pshufd $dst,$dst,0x00\t! replicate4I" %} 3132 ins_encode %{ 3133 __ movdl($dst$$XMMRegister, $src$$Register); 3134 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3135 %} 3136 ins_pipe( pipe_slow ); 3137 %} 3138 3139 instruct Repl4I_mem(vecX dst, memory mem) %{ 3140 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3141 match(Set dst (ReplicateI (LoadI mem))); 3142 format %{ "pshufd $dst,$mem,0x00\t! replicate4I" %} 3143 ins_encode %{ 3144 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3145 %} 3146 ins_pipe( pipe_slow ); 3147 %} 3148 3149 instruct Repl8I(vecY dst, rRegI src) %{ 3150 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3151 match(Set dst (ReplicateI src)); 3152 format %{ "movd $dst,$src\n\t" 3153 "pshufd $dst,$dst,0x00\n\t" 3154 "vinserti128h $dst,$dst,$dst\t! replicate8I" %} 3155 ins_encode %{ 3156 __ movdl($dst$$XMMRegister, $src$$Register); 3157 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3158 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3159 %} 3160 ins_pipe( pipe_slow ); 3161 %} 3162 3163 instruct Repl8I_mem(vecY dst, memory mem) %{ 3164 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3165 match(Set dst (ReplicateI (LoadI mem))); 3166 format %{ "pshufd $dst,$mem,0x00\n\t" 3167 "vinserti128h $dst,$dst,$dst\t! replicate8I" %} 3168 ins_encode %{ 3169 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3170 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3171 %} 3172 ins_pipe( pipe_slow ); 3173 %} 3174 3175 instruct Repl4I_imm(vecX dst, immI con) %{ 3176 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3177 match(Set dst (ReplicateI con)); 3178 format %{ "movq $dst,[$constantaddress]\t! replicate4I($con)\n\t" 3179 "punpcklqdq $dst,$dst" %} 3180 ins_encode %{ 3181 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3182 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3183 %} 3184 ins_pipe( pipe_slow ); 3185 %} 3186 3187 instruct Repl8I_imm(vecY dst, immI con) %{ 3188 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3189 match(Set dst (ReplicateI con)); 3190 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 3191 "punpcklqdq $dst,$dst\n\t" 3192 "vinserti128h $dst,$dst,$dst" %} 3193 ins_encode %{ 3194 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3195 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3196 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3197 %} 3198 ins_pipe( pipe_slow ); 3199 %} 3200 3201 // Long could be loaded into xmm register directly from memory. 3202 instruct Repl2L_mem(vecX dst, memory mem) %{ 3203 predicate(n->as_Vector()->length() == 2 && !VM_Version::supports_avx512vlbw()); 3204 match(Set dst (ReplicateL (LoadL mem))); 3205 format %{ "movq $dst,$mem\n\t" 3206 "punpcklqdq $dst,$dst\t! replicate2L" %} 3207 ins_encode %{ 3208 __ movq($dst$$XMMRegister, $mem$$Address); 3209 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3210 %} 3211 ins_pipe( pipe_slow ); 3212 %} 3213 3214 // Replicate long (8 byte) scalar to be vector 3215 #ifdef _LP64 3216 instruct Repl4L(vecY dst, rRegL src) %{ 3217 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3218 match(Set dst (ReplicateL src)); 3219 format %{ "movdq $dst,$src\n\t" 3220 "punpcklqdq $dst,$dst\n\t" 3221 "vinserti128h $dst,$dst,$dst\t! replicate4L" %} 3222 ins_encode %{ 3223 __ movdq($dst$$XMMRegister, $src$$Register); 3224 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3225 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3226 %} 3227 ins_pipe( pipe_slow ); 3228 %} 3229 #else // _LP64 3230 instruct Repl4L(vecY dst, eRegL src, regD tmp) %{ 3231 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3232 match(Set dst (ReplicateL src)); 3233 effect(TEMP dst, USE src, TEMP tmp); 3234 format %{ "movdl $dst,$src.lo\n\t" 3235 "movdl $tmp,$src.hi\n\t" 3236 "punpckldq $dst,$tmp\n\t" 3237 "punpcklqdq $dst,$dst\n\t" 3238 "vinserti128h $dst,$dst,$dst\t! replicate4L" %} 3239 ins_encode %{ 3240 __ movdl($dst$$XMMRegister, $src$$Register); 3241 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3242 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3243 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3244 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3245 %} 3246 ins_pipe( pipe_slow ); 3247 %} 3248 #endif // _LP64 3249 3250 instruct Repl4L_imm(vecY dst, immL con) %{ 3251 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3252 match(Set dst (ReplicateL con)); 3253 format %{ "movq $dst,[$constantaddress]\n\t" 3254 "punpcklqdq $dst,$dst\n\t" 3255 "vinserti128h $dst,$dst,$dst\t! replicate4L($con)" %} 3256 ins_encode %{ 3257 __ movq($dst$$XMMRegister, $constantaddress($con)); 3258 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3259 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3260 %} 3261 ins_pipe( pipe_slow ); 3262 %} 3263 3264 instruct Repl4L_mem(vecY dst, memory mem) %{ 3265 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3266 match(Set dst (ReplicateL (LoadL mem))); 3267 format %{ "movq $dst,$mem\n\t" 3268 "punpcklqdq $dst,$dst\n\t" 3269 "vinserti128h $dst,$dst,$dst\t! replicate4L" %} 3270 ins_encode %{ 3271 __ movq($dst$$XMMRegister, $mem$$Address); 3272 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3273 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3274 %} 3275 ins_pipe( pipe_slow ); 3276 %} 3277 3278 instruct Repl2F_mem(vecD dst, memory mem) %{ 3279 predicate(n->as_Vector()->length() == 2 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3280 match(Set dst (ReplicateF (LoadF mem))); 3281 format %{ "pshufd $dst,$mem,0x00\t! replicate2F" %} 3282 ins_encode %{ 3283 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3284 %} 3285 ins_pipe( pipe_slow ); 3286 %} 3287 3288 instruct Repl4F_mem(vecX dst, memory mem) %{ 3289 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3290 match(Set dst (ReplicateF (LoadF mem))); 3291 format %{ "pshufd $dst,$mem,0x00\t! replicate4F" %} 3292 ins_encode %{ 3293 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3294 %} 3295 ins_pipe( pipe_slow ); 3296 %} 3297 3298 instruct Repl8F(vecY dst, regF src) %{ 3299 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3300 match(Set dst (ReplicateF src)); 3301 format %{ "pshufd $dst,$src,0x00\n\t" 3302 "vinsertf128h $dst,$dst,$dst\t! replicate8F" %} 3303 ins_encode %{ 3304 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3305 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3306 %} 3307 ins_pipe( pipe_slow ); 3308 %} 3309 3310 instruct Repl8F_mem(vecY dst, memory mem) %{ 3311 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3312 match(Set dst (ReplicateF (LoadF mem))); 3313 format %{ "pshufd $dst,$mem,0x00\n\t" 3314 "vinsertf128h $dst,$dst,$dst\t! replicate8F" %} 3315 ins_encode %{ 3316 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3317 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3318 %} 3319 ins_pipe( pipe_slow ); 3320 %} 3321 3322 instruct Repl2D_mem(vecX dst, memory mem) %{ 3323 predicate(n->as_Vector()->length() == 2 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3324 match(Set dst (ReplicateD (LoadD mem))); 3325 format %{ "pshufd $dst,$mem,0x44\t! replicate2D" %} 3326 ins_encode %{ 3327 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44); 3328 %} 3329 ins_pipe( pipe_slow ); 3330 %} 3331 3332 instruct Repl4D(vecY dst, regD src) %{ 3333 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3334 match(Set dst (ReplicateD src)); 3335 format %{ "pshufd $dst,$src,0x44\n\t" 3336 "vinsertf128h $dst,$dst,$dst\t! replicate4D" %} 3337 ins_encode %{ 3338 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 3339 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3340 %} 3341 ins_pipe( pipe_slow ); 3342 %} 3343 3344 instruct Repl4D_mem(vecY dst, memory mem) %{ 3345 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3346 match(Set dst (ReplicateD (LoadD mem))); 3347 format %{ "pshufd $dst,$mem,0x44\n\t" 3348 "vinsertf128h $dst,$dst,$dst\t! replicate4D" %} 3349 ins_encode %{ 3350 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44); 3351 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3352 %} 3353 ins_pipe( pipe_slow ); 3354 %} 3355 3356 // ====================GENERIC REPLICATE========================================== 3357 3358 // Replicate byte scalar to be vector 3359 instruct Repl4B(vecS dst, rRegI src) %{ 3360 predicate(n->as_Vector()->length() == 4); 3361 match(Set dst (ReplicateB src)); 3362 format %{ "movd $dst,$src\n\t" 3363 "punpcklbw $dst,$dst\n\t" 3364 "pshuflw $dst,$dst,0x00\t! replicate4B" %} 3365 ins_encode %{ 3366 __ movdl($dst$$XMMRegister, $src$$Register); 3367 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3368 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3369 %} 3370 ins_pipe( pipe_slow ); 3371 %} 3372 3373 instruct Repl8B(vecD dst, rRegI src) %{ 3374 predicate(n->as_Vector()->length() == 8); 3375 match(Set dst (ReplicateB src)); 3376 format %{ "movd $dst,$src\n\t" 3377 "punpcklbw $dst,$dst\n\t" 3378 "pshuflw $dst,$dst,0x00\t! replicate8B" %} 3379 ins_encode %{ 3380 __ movdl($dst$$XMMRegister, $src$$Register); 3381 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3382 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3383 %} 3384 ins_pipe( pipe_slow ); 3385 %} 3386 3387 // Replicate byte scalar immediate to be vector by loading from const table. 3388 instruct Repl4B_imm(vecS dst, immI con) %{ 3389 predicate(n->as_Vector()->length() == 4); 3390 match(Set dst (ReplicateB con)); 3391 format %{ "movdl $dst,[$constantaddress]\t! replicate4B($con)" %} 3392 ins_encode %{ 3393 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 1))); 3394 %} 3395 ins_pipe( pipe_slow ); 3396 %} 3397 3398 instruct Repl8B_imm(vecD dst, immI con) %{ 3399 predicate(n->as_Vector()->length() == 8); 3400 match(Set dst (ReplicateB con)); 3401 format %{ "movq $dst,[$constantaddress]\t! replicate8B($con)" %} 3402 ins_encode %{ 3403 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3404 %} 3405 ins_pipe( pipe_slow ); 3406 %} 3407 3408 // Replicate byte scalar zero to be vector 3409 instruct Repl4B_zero(vecS dst, immI0 zero) %{ 3410 predicate(n->as_Vector()->length() == 4); 3411 match(Set dst (ReplicateB zero)); 3412 format %{ "pxor $dst,$dst\t! replicate4B zero" %} 3413 ins_encode %{ 3414 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3415 %} 3416 ins_pipe( fpu_reg_reg ); 3417 %} 3418 3419 instruct Repl8B_zero(vecD dst, immI0 zero) %{ 3420 predicate(n->as_Vector()->length() == 8); 3421 match(Set dst (ReplicateB zero)); 3422 format %{ "pxor $dst,$dst\t! replicate8B zero" %} 3423 ins_encode %{ 3424 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3425 %} 3426 ins_pipe( fpu_reg_reg ); 3427 %} 3428 3429 instruct Repl16B_zero(vecX dst, immI0 zero) %{ 3430 predicate(n->as_Vector()->length() == 16); 3431 match(Set dst (ReplicateB zero)); 3432 format %{ "pxor $dst,$dst\t! replicate16B zero" %} 3433 ins_encode %{ 3434 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3435 %} 3436 ins_pipe( fpu_reg_reg ); 3437 %} 3438 3439 instruct Repl32B_zero(vecY dst, immI0 zero) %{ 3440 predicate(n->as_Vector()->length() == 32); 3441 match(Set dst (ReplicateB zero)); 3442 format %{ "vpxor $dst,$dst,$dst\t! replicate32B zero" %} 3443 ins_encode %{ 3444 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3445 int vector_len = 1; 3446 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3447 %} 3448 ins_pipe( fpu_reg_reg ); 3449 %} 3450 3451 // Replicate char/short (2 byte) scalar to be vector 3452 instruct Repl2S(vecS dst, rRegI src) %{ 3453 predicate(n->as_Vector()->length() == 2); 3454 match(Set dst (ReplicateS src)); 3455 format %{ "movd $dst,$src\n\t" 3456 "pshuflw $dst,$dst,0x00\t! replicate2S" %} 3457 ins_encode %{ 3458 __ movdl($dst$$XMMRegister, $src$$Register); 3459 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3460 %} 3461 ins_pipe( fpu_reg_reg ); 3462 %} 3463 3464 // Replicate char/short (2 byte) scalar immediate to be vector by loading from const table. 3465 instruct Repl2S_imm(vecS dst, immI con) %{ 3466 predicate(n->as_Vector()->length() == 2); 3467 match(Set dst (ReplicateS con)); 3468 format %{ "movdl $dst,[$constantaddress]\t! replicate2S($con)" %} 3469 ins_encode %{ 3470 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 2))); 3471 %} 3472 ins_pipe( fpu_reg_reg ); 3473 %} 3474 3475 instruct Repl4S_imm(vecD dst, immI con) %{ 3476 predicate(n->as_Vector()->length() == 4); 3477 match(Set dst (ReplicateS con)); 3478 format %{ "movq $dst,[$constantaddress]\t! replicate4S($con)" %} 3479 ins_encode %{ 3480 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3481 %} 3482 ins_pipe( fpu_reg_reg ); 3483 %} 3484 3485 // Replicate char/short (2 byte) scalar zero to be vector 3486 instruct Repl2S_zero(vecS dst, immI0 zero) %{ 3487 predicate(n->as_Vector()->length() == 2); 3488 match(Set dst (ReplicateS zero)); 3489 format %{ "pxor $dst,$dst\t! replicate2S zero" %} 3490 ins_encode %{ 3491 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3492 %} 3493 ins_pipe( fpu_reg_reg ); 3494 %} 3495 3496 instruct Repl4S_zero(vecD dst, immI0 zero) %{ 3497 predicate(n->as_Vector()->length() == 4); 3498 match(Set dst (ReplicateS zero)); 3499 format %{ "pxor $dst,$dst\t! replicate4S zero" %} 3500 ins_encode %{ 3501 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3502 %} 3503 ins_pipe( fpu_reg_reg ); 3504 %} 3505 3506 instruct Repl8S_zero(vecX dst, immI0 zero) %{ 3507 predicate(n->as_Vector()->length() == 8); 3508 match(Set dst (ReplicateS zero)); 3509 format %{ "pxor $dst,$dst\t! replicate8S zero" %} 3510 ins_encode %{ 3511 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3512 %} 3513 ins_pipe( fpu_reg_reg ); 3514 %} 3515 3516 instruct Repl16S_zero(vecY dst, immI0 zero) %{ 3517 predicate(n->as_Vector()->length() == 16); 3518 match(Set dst (ReplicateS zero)); 3519 format %{ "vpxor $dst,$dst,$dst\t! replicate16S zero" %} 3520 ins_encode %{ 3521 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3522 int vector_len = 1; 3523 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3524 %} 3525 ins_pipe( fpu_reg_reg ); 3526 %} 3527 3528 // Replicate integer (4 byte) scalar to be vector 3529 instruct Repl2I(vecD dst, rRegI src) %{ 3530 predicate(n->as_Vector()->length() == 2); 3531 match(Set dst (ReplicateI src)); 3532 format %{ "movd $dst,$src\n\t" 3533 "pshufd $dst,$dst,0x00\t! replicate2I" %} 3534 ins_encode %{ 3535 __ movdl($dst$$XMMRegister, $src$$Register); 3536 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3537 %} 3538 ins_pipe( fpu_reg_reg ); 3539 %} 3540 3541 // Integer could be loaded into xmm register directly from memory. 3542 instruct Repl2I_mem(vecD dst, memory mem) %{ 3543 predicate(n->as_Vector()->length() == 2); 3544 match(Set dst (ReplicateI (LoadI mem))); 3545 format %{ "movd $dst,$mem\n\t" 3546 "pshufd $dst,$dst,0x00\t! replicate2I" %} 3547 ins_encode %{ 3548 __ movdl($dst$$XMMRegister, $mem$$Address); 3549 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3550 %} 3551 ins_pipe( fpu_reg_reg ); 3552 %} 3553 3554 // Replicate integer (4 byte) scalar immediate to be vector by loading from const table. 3555 instruct Repl2I_imm(vecD dst, immI con) %{ 3556 predicate(n->as_Vector()->length() == 2); 3557 match(Set dst (ReplicateI con)); 3558 format %{ "movq $dst,[$constantaddress]\t! replicate2I($con)" %} 3559 ins_encode %{ 3560 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3561 %} 3562 ins_pipe( fpu_reg_reg ); 3563 %} 3564 3565 // Replicate integer (4 byte) scalar zero to be vector 3566 instruct Repl2I_zero(vecD dst, immI0 zero) %{ 3567 predicate(n->as_Vector()->length() == 2); 3568 match(Set dst (ReplicateI zero)); 3569 format %{ "pxor $dst,$dst\t! replicate2I" %} 3570 ins_encode %{ 3571 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3572 %} 3573 ins_pipe( fpu_reg_reg ); 3574 %} 3575 3576 instruct Repl4I_zero(vecX dst, immI0 zero) %{ 3577 predicate(n->as_Vector()->length() == 4); 3578 match(Set dst (ReplicateI zero)); 3579 format %{ "pxor $dst,$dst\t! replicate4I zero)" %} 3580 ins_encode %{ 3581 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3582 %} 3583 ins_pipe( fpu_reg_reg ); 3584 %} 3585 3586 instruct Repl8I_zero(vecY dst, immI0 zero) %{ 3587 predicate(n->as_Vector()->length() == 8); 3588 match(Set dst (ReplicateI zero)); 3589 format %{ "vpxor $dst,$dst,$dst\t! replicate8I zero" %} 3590 ins_encode %{ 3591 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3592 int vector_len = 1; 3593 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3594 %} 3595 ins_pipe( fpu_reg_reg ); 3596 %} 3597 3598 // Replicate long (8 byte) scalar to be vector 3599 #ifdef _LP64 3600 instruct Repl2L(vecX dst, rRegL src) %{ 3601 predicate(n->as_Vector()->length() == 2); 3602 match(Set dst (ReplicateL src)); 3603 format %{ "movdq $dst,$src\n\t" 3604 "punpcklqdq $dst,$dst\t! replicate2L" %} 3605 ins_encode %{ 3606 __ movdq($dst$$XMMRegister, $src$$Register); 3607 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3608 %} 3609 ins_pipe( pipe_slow ); 3610 %} 3611 #else // _LP64 3612 instruct Repl2L(vecX dst, eRegL src, regD tmp) %{ 3613 predicate(n->as_Vector()->length() == 2); 3614 match(Set dst (ReplicateL src)); 3615 effect(TEMP dst, USE src, TEMP tmp); 3616 format %{ "movdl $dst,$src.lo\n\t" 3617 "movdl $tmp,$src.hi\n\t" 3618 "punpckldq $dst,$tmp\n\t" 3619 "punpcklqdq $dst,$dst\t! replicate2L"%} 3620 ins_encode %{ 3621 __ movdl($dst$$XMMRegister, $src$$Register); 3622 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3623 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3624 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3625 %} 3626 ins_pipe( pipe_slow ); 3627 %} 3628 #endif // _LP64 3629 3630 // Replicate long (8 byte) scalar immediate to be vector by loading from const table. 3631 instruct Repl2L_imm(vecX dst, immL con) %{ 3632 predicate(n->as_Vector()->length() == 2); 3633 match(Set dst (ReplicateL con)); 3634 format %{ "movq $dst,[$constantaddress]\n\t" 3635 "punpcklqdq $dst,$dst\t! replicate2L($con)" %} 3636 ins_encode %{ 3637 __ movq($dst$$XMMRegister, $constantaddress($con)); 3638 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3639 %} 3640 ins_pipe( pipe_slow ); 3641 %} 3642 3643 // Replicate long (8 byte) scalar zero to be vector 3644 instruct Repl2L_zero(vecX dst, immL0 zero) %{ 3645 predicate(n->as_Vector()->length() == 2); 3646 match(Set dst (ReplicateL zero)); 3647 format %{ "pxor $dst,$dst\t! replicate2L zero" %} 3648 ins_encode %{ 3649 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3650 %} 3651 ins_pipe( fpu_reg_reg ); 3652 %} 3653 3654 instruct Repl4L_zero(vecY dst, immL0 zero) %{ 3655 predicate(n->as_Vector()->length() == 4); 3656 match(Set dst (ReplicateL zero)); 3657 format %{ "vpxor $dst,$dst,$dst\t! replicate4L zero" %} 3658 ins_encode %{ 3659 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3660 int vector_len = 1; 3661 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3662 %} 3663 ins_pipe( fpu_reg_reg ); 3664 %} 3665 3666 // Replicate float (4 byte) scalar to be vector 3667 instruct Repl2F(vecD dst, regF src) %{ 3668 predicate(n->as_Vector()->length() == 2); 3669 match(Set dst (ReplicateF src)); 3670 format %{ "pshufd $dst,$dst,0x00\t! replicate2F" %} 3671 ins_encode %{ 3672 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3673 %} 3674 ins_pipe( fpu_reg_reg ); 3675 %} 3676 3677 instruct Repl4F(vecX dst, regF src) %{ 3678 predicate(n->as_Vector()->length() == 4); 3679 match(Set dst (ReplicateF src)); 3680 format %{ "pshufd $dst,$dst,0x00\t! replicate4F" %} 3681 ins_encode %{ 3682 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3683 %} 3684 ins_pipe( pipe_slow ); 3685 %} 3686 3687 // Replicate float (4 byte) scalar zero to be vector 3688 instruct Repl2F_zero(vecD dst, immF0 zero) %{ 3689 predicate(n->as_Vector()->length() == 2); 3690 match(Set dst (ReplicateF zero)); 3691 format %{ "xorps $dst,$dst\t! replicate2F zero" %} 3692 ins_encode %{ 3693 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 3694 %} 3695 ins_pipe( fpu_reg_reg ); 3696 %} 3697 3698 instruct Repl4F_zero(vecX dst, immF0 zero) %{ 3699 predicate(n->as_Vector()->length() == 4); 3700 match(Set dst (ReplicateF zero)); 3701 format %{ "xorps $dst,$dst\t! replicate4F zero" %} 3702 ins_encode %{ 3703 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 3704 %} 3705 ins_pipe( fpu_reg_reg ); 3706 %} 3707 3708 instruct Repl8F_zero(vecY dst, immF0 zero) %{ 3709 predicate(n->as_Vector()->length() == 8); 3710 match(Set dst (ReplicateF zero)); 3711 format %{ "vxorps $dst,$dst,$dst\t! replicate8F zero" %} 3712 ins_encode %{ 3713 int vector_len = 1; 3714 __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3715 %} 3716 ins_pipe( fpu_reg_reg ); 3717 %} 3718 3719 // Replicate double (8 bytes) scalar to be vector 3720 instruct Repl2D(vecX dst, regD src) %{ 3721 predicate(n->as_Vector()->length() == 2); 3722 match(Set dst (ReplicateD src)); 3723 format %{ "pshufd $dst,$src,0x44\t! replicate2D" %} 3724 ins_encode %{ 3725 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 3726 %} 3727 ins_pipe( pipe_slow ); 3728 %} 3729 3730 // Replicate double (8 byte) scalar zero to be vector 3731 instruct Repl2D_zero(vecX dst, immD0 zero) %{ 3732 predicate(n->as_Vector()->length() == 2); 3733 match(Set dst (ReplicateD zero)); 3734 format %{ "xorpd $dst,$dst\t! replicate2D zero" %} 3735 ins_encode %{ 3736 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister); 3737 %} 3738 ins_pipe( fpu_reg_reg ); 3739 %} 3740 3741 instruct Repl4D_zero(vecY dst, immD0 zero) %{ 3742 predicate(n->as_Vector()->length() == 4); 3743 match(Set dst (ReplicateD zero)); 3744 format %{ "vxorpd $dst,$dst,$dst,vect256\t! replicate4D zero" %} 3745 ins_encode %{ 3746 int vector_len = 1; 3747 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3748 %} 3749 ins_pipe( fpu_reg_reg ); 3750 %} 3751 3752 // ====================EVEX REPLICATE============================================= 3753 3754 instruct Repl4B_mem_evex(vecS dst, memory mem) %{ 3755 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vlbw()); 3756 match(Set dst (ReplicateB (LoadB mem))); 3757 format %{ "vpbroadcastb $dst,$mem\t! replicate4B" %} 3758 ins_encode %{ 3759 int vector_len = 0; 3760 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 3761 %} 3762 ins_pipe( pipe_slow ); 3763 %} 3764 3765 instruct Repl8B_mem_evex(vecD dst, memory mem) %{ 3766 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); 3767 match(Set dst (ReplicateB (LoadB mem))); 3768 format %{ "vpbroadcastb $dst,$mem\t! replicate8B" %} 3769 ins_encode %{ 3770 int vector_len = 0; 3771 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 3772 %} 3773 ins_pipe( pipe_slow ); 3774 %} 3775 3776 instruct Repl16B_evex(vecX dst, rRegI src) %{ 3777 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 3778 match(Set dst (ReplicateB src)); 3779 format %{ "vpbroadcastb $dst,$src\t! replicate16B" %} 3780 ins_encode %{ 3781 int vector_len = 0; 3782 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 3783 %} 3784 ins_pipe( pipe_slow ); 3785 %} 3786 3787 instruct Repl16B_mem_evex(vecX dst, memory mem) %{ 3788 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 3789 match(Set dst (ReplicateB (LoadB mem))); 3790 format %{ "vpbroadcastb $dst,$mem\t! replicate16B" %} 3791 ins_encode %{ 3792 int vector_len = 0; 3793 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 3794 %} 3795 ins_pipe( pipe_slow ); 3796 %} 3797 3798 instruct Repl32B_evex(vecY dst, rRegI src) %{ 3799 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw()); 3800 match(Set dst (ReplicateB src)); 3801 format %{ "vpbroadcastb $dst,$src\t! replicate32B" %} 3802 ins_encode %{ 3803 int vector_len = 1; 3804 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 3805 %} 3806 ins_pipe( pipe_slow ); 3807 %} 3808 3809 instruct Repl32B_mem_evex(vecY dst, memory mem) %{ 3810 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw()); 3811 match(Set dst (ReplicateB (LoadB mem))); 3812 format %{ "vpbroadcastb $dst,$mem\t! replicate32B" %} 3813 ins_encode %{ 3814 int vector_len = 1; 3815 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 3816 %} 3817 ins_pipe( pipe_slow ); 3818 %} 3819 3820 instruct Repl64B_evex(vecZ dst, rRegI src) %{ 3821 predicate(n->as_Vector()->length() == 64 && UseAVX > 2); 3822 match(Set dst (ReplicateB src)); 3823 format %{ "vpbroadcastb $dst,$src\t! upper replicate64B" %} 3824 ins_encode %{ 3825 int vector_len = 2; 3826 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 3827 %} 3828 ins_pipe( pipe_slow ); 3829 %} 3830 3831 instruct Repl64B_mem_evex(vecZ dst, memory mem) %{ 3832 predicate(n->as_Vector()->length() == 64 && VM_Version::supports_avx512vlbw()); 3833 match(Set dst (ReplicateB (LoadB mem))); 3834 format %{ "vpbroadcastb $dst,$mem\t! replicate64B" %} 3835 ins_encode %{ 3836 int vector_len = 2; 3837 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 3838 %} 3839 ins_pipe( pipe_slow ); 3840 %} 3841 3842 instruct Repl16B_imm_evex(vecX dst, immI con) %{ 3843 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 3844 match(Set dst (ReplicateB con)); 3845 format %{ "movq $dst,[$constantaddress]\n\t" 3846 "vpbroadcastb $dst,$dst\t! replicate16B" %} 3847 ins_encode %{ 3848 int vector_len = 0; 3849 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3850 __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3851 %} 3852 ins_pipe( pipe_slow ); 3853 %} 3854 3855 instruct Repl32B_imm_evex(vecY dst, immI con) %{ 3856 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw()); 3857 match(Set dst (ReplicateB con)); 3858 format %{ "movq $dst,[$constantaddress]\n\t" 3859 "vpbroadcastb $dst,$dst\t! replicate32B" %} 3860 ins_encode %{ 3861 int vector_len = 1; 3862 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3863 __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3864 %} 3865 ins_pipe( pipe_slow ); 3866 %} 3867 3868 instruct Repl64B_imm_evex(vecZ dst, immI con) %{ 3869 predicate(n->as_Vector()->length() == 64 && UseAVX > 2); 3870 match(Set dst (ReplicateB con)); 3871 format %{ "movq $dst,[$constantaddress]\n\t" 3872 "vpbroadcastb $dst,$dst\t! upper replicate64B" %} 3873 ins_encode %{ 3874 int vector_len = 2; 3875 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3876 __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3877 %} 3878 ins_pipe( pipe_slow ); 3879 %} 3880 3881 instruct Repl64B_zero_evex(vecZ dst, immI0 zero) %{ 3882 predicate(n->as_Vector()->length() == 64 && UseAVX > 2); 3883 match(Set dst (ReplicateB zero)); 3884 format %{ "vpxor $dst k0,$dst,$dst\t! replicate64B zero" %} 3885 ins_encode %{ 3886 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 3887 int vector_len = 2; 3888 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3889 %} 3890 ins_pipe( fpu_reg_reg ); 3891 %} 3892 3893 instruct Repl4S_evex(vecD dst, rRegI src) %{ 3894 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vlbw()); 3895 match(Set dst (ReplicateS src)); 3896 format %{ "vpbroadcastw $dst,$src\t! replicate4S" %} 3897 ins_encode %{ 3898 int vector_len = 0; 3899 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 3900 %} 3901 ins_pipe( pipe_slow ); 3902 %} 3903 3904 instruct Repl4S_mem_evex(vecD dst, memory mem) %{ 3905 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vlbw()); 3906 match(Set dst (ReplicateS (LoadS mem))); 3907 format %{ "vpbroadcastw $dst,$mem\t! replicate4S" %} 3908 ins_encode %{ 3909 int vector_len = 0; 3910 __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 3911 %} 3912 ins_pipe( pipe_slow ); 3913 %} 3914 3915 instruct Repl8S_evex(vecX dst, rRegI src) %{ 3916 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); 3917 match(Set dst (ReplicateS src)); 3918 format %{ "vpbroadcastw $dst,$src\t! replicate8S" %} 3919 ins_encode %{ 3920 int vector_len = 0; 3921 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 3922 %} 3923 ins_pipe( pipe_slow ); 3924 %} 3925 3926 instruct Repl8S_mem_evex(vecX dst, memory mem) %{ 3927 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); 3928 match(Set dst (ReplicateS (LoadS mem))); 3929 format %{ "vpbroadcastw $dst,$mem\t! replicate8S" %} 3930 ins_encode %{ 3931 int vector_len = 0; 3932 __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 3933 %} 3934 ins_pipe( pipe_slow ); 3935 %} 3936 3937 instruct Repl16S_evex(vecY dst, rRegI src) %{ 3938 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 3939 match(Set dst (ReplicateS src)); 3940 format %{ "vpbroadcastw $dst,$src\t! replicate16S" %} 3941 ins_encode %{ 3942 int vector_len = 1; 3943 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 3944 %} 3945 ins_pipe( pipe_slow ); 3946 %} 3947 3948 instruct Repl16S_mem_evex(vecY dst, memory mem) %{ 3949 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 3950 match(Set dst (ReplicateS (LoadS mem))); 3951 format %{ "vpbroadcastw $dst,$mem\t! replicate16S" %} 3952 ins_encode %{ 3953 int vector_len = 1; 3954 __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 3955 %} 3956 ins_pipe( pipe_slow ); 3957 %} 3958 3959 instruct Repl32S_evex(vecZ dst, rRegI src) %{ 3960 predicate(n->as_Vector()->length() == 32 && UseAVX > 2); 3961 match(Set dst (ReplicateS src)); 3962 format %{ "vpbroadcastw $dst,$src\t! replicate32S" %} 3963 ins_encode %{ 3964 int vector_len = 2; 3965 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 3966 %} 3967 ins_pipe( pipe_slow ); 3968 %} 3969 3970 instruct Repl32S_mem_evex(vecZ dst, memory mem) %{ 3971 predicate(n->as_Vector()->length() == 32 && UseAVX > 2); 3972 match(Set dst (ReplicateS (LoadS mem))); 3973 format %{ "vpbroadcastw $dst,$mem\t! replicate32S" %} 3974 ins_encode %{ 3975 int vector_len = 2; 3976 __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 3977 %} 3978 ins_pipe( pipe_slow ); 3979 %} 3980 3981 instruct Repl8S_imm_evex(vecX dst, immI con) %{ 3982 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); 3983 match(Set dst (ReplicateS con)); 3984 format %{ "movq $dst,[$constantaddress]\n\t" 3985 "vpbroadcastw $dst,$dst\t! replicate8S" %} 3986 ins_encode %{ 3987 int vector_len = 0; 3988 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3989 __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3990 %} 3991 ins_pipe( pipe_slow ); 3992 %} 3993 3994 instruct Repl16S_imm_evex(vecY dst, immI con) %{ 3995 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 3996 match(Set dst (ReplicateS con)); 3997 format %{ "movq $dst,[$constantaddress]\n\t" 3998 "vpbroadcastw $dst,$dst\t! replicate16S" %} 3999 ins_encode %{ 4000 int vector_len = 1; 4001 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 4002 __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4003 %} 4004 ins_pipe( pipe_slow ); 4005 %} 4006 4007 instruct Repl32S_imm_evex(vecZ dst, immI con) %{ 4008 predicate(n->as_Vector()->length() == 32 && UseAVX > 2); 4009 match(Set dst (ReplicateS con)); 4010 format %{ "movq $dst,[$constantaddress]\n\t" 4011 "vpbroadcastw $dst,$dst\t! replicate32S" %} 4012 ins_encode %{ 4013 int vector_len = 2; 4014 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 4015 __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4016 %} 4017 ins_pipe( pipe_slow ); 4018 %} 4019 4020 instruct Repl32S_zero_evex(vecZ dst, immI0 zero) %{ 4021 predicate(n->as_Vector()->length() == 32 && UseAVX > 2); 4022 match(Set dst (ReplicateS zero)); 4023 format %{ "vpxor $dst k0,$dst,$dst\t! replicate32S zero" %} 4024 ins_encode %{ 4025 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 4026 int vector_len = 2; 4027 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4028 %} 4029 ins_pipe( fpu_reg_reg ); 4030 %} 4031 4032 instruct Repl4I_evex(vecX dst, rRegI src) %{ 4033 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4034 match(Set dst (ReplicateI src)); 4035 format %{ "vpbroadcastd $dst,$src\t! replicate4I" %} 4036 ins_encode %{ 4037 int vector_len = 0; 4038 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 4039 %} 4040 ins_pipe( pipe_slow ); 4041 %} 4042 4043 instruct Repl4I_mem_evex(vecX dst, memory mem) %{ 4044 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4045 match(Set dst (ReplicateI (LoadI mem))); 4046 format %{ "vpbroadcastd $dst,$mem\t! replicate4I" %} 4047 ins_encode %{ 4048 int vector_len = 0; 4049 __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 4050 %} 4051 ins_pipe( pipe_slow ); 4052 %} 4053 4054 instruct Repl8I_evex(vecY dst, rRegI src) %{ 4055 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4056 match(Set dst (ReplicateI src)); 4057 format %{ "vpbroadcastd $dst,$src\t! replicate8I" %} 4058 ins_encode %{ 4059 int vector_len = 1; 4060 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 4061 %} 4062 ins_pipe( pipe_slow ); 4063 %} 4064 4065 instruct Repl8I_mem_evex(vecY dst, memory mem) %{ 4066 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4067 match(Set dst (ReplicateI (LoadI mem))); 4068 format %{ "vpbroadcastd $dst,$mem\t! replicate8I" %} 4069 ins_encode %{ 4070 int vector_len = 1; 4071 __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 4072 %} 4073 ins_pipe( pipe_slow ); 4074 %} 4075 4076 instruct Repl16I_evex(vecZ dst, rRegI src) %{ 4077 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4078 match(Set dst (ReplicateI src)); 4079 format %{ "vpbroadcastd $dst,$src\t! replicate16I" %} 4080 ins_encode %{ 4081 int vector_len = 2; 4082 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 4083 %} 4084 ins_pipe( pipe_slow ); 4085 %} 4086 4087 instruct Repl16I_mem_evex(vecZ dst, memory mem) %{ 4088 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4089 match(Set dst (ReplicateI (LoadI mem))); 4090 format %{ "vpbroadcastd $dst,$mem\t! replicate16I" %} 4091 ins_encode %{ 4092 int vector_len = 2; 4093 __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 4094 %} 4095 ins_pipe( pipe_slow ); 4096 %} 4097 4098 instruct Repl4I_imm_evex(vecX dst, immI con) %{ 4099 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4100 match(Set dst (ReplicateI con)); 4101 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 4102 "vpbroadcastd $dst,$dst\t! replicate4I" %} 4103 ins_encode %{ 4104 int vector_len = 0; 4105 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4106 __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4107 %} 4108 ins_pipe( pipe_slow ); 4109 %} 4110 4111 instruct Repl8I_imm_evex(vecY dst, immI con) %{ 4112 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4113 match(Set dst (ReplicateI con)); 4114 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 4115 "vpbroadcastd $dst,$dst\t! replicate8I" %} 4116 ins_encode %{ 4117 int vector_len = 1; 4118 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4119 __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4120 %} 4121 ins_pipe( pipe_slow ); 4122 %} 4123 4124 instruct Repl16I_imm_evex(vecZ dst, immI con) %{ 4125 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4126 match(Set dst (ReplicateI con)); 4127 format %{ "movq $dst,[$constantaddress]\t! replicate16I($con)\n\t" 4128 "vpbroadcastd $dst,$dst\t! replicate16I" %} 4129 ins_encode %{ 4130 int vector_len = 2; 4131 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4132 __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4133 %} 4134 ins_pipe( pipe_slow ); 4135 %} 4136 4137 instruct Repl16I_zero_evex(vecZ dst, immI0 zero) %{ 4138 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4139 match(Set dst (ReplicateI zero)); 4140 format %{ "vpxor $dst k0,$dst,$dst\t! replicate16I zero" %} 4141 ins_encode %{ 4142 // Use vxorpd since AVX does not have vpxor for 512-bit (AVX2 will have it). 4143 int vector_len = 2; 4144 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4145 %} 4146 ins_pipe( fpu_reg_reg ); 4147 %} 4148 4149 // Replicate long (8 byte) scalar to be vector 4150 #ifdef _LP64 4151 instruct Repl4L_evex(vecY dst, rRegL src) %{ 4152 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4153 match(Set dst (ReplicateL src)); 4154 format %{ "vpbroadcastq $dst,$src\t! replicate4L" %} 4155 ins_encode %{ 4156 int vector_len = 1; 4157 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vector_len); 4158 %} 4159 ins_pipe( pipe_slow ); 4160 %} 4161 4162 instruct Repl8L_evex(vecZ dst, rRegL src) %{ 4163 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4164 match(Set dst (ReplicateL src)); 4165 format %{ "vpbroadcastq $dst,$src\t! replicate8L" %} 4166 ins_encode %{ 4167 int vector_len = 2; 4168 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vector_len); 4169 %} 4170 ins_pipe( pipe_slow ); 4171 %} 4172 #else // _LP64 4173 instruct Repl4L_evex(vecY dst, eRegL src, regD tmp) %{ 4174 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4175 match(Set dst (ReplicateL src)); 4176 effect(TEMP dst, USE src, TEMP tmp); 4177 format %{ "movdl $dst,$src.lo\n\t" 4178 "movdl $tmp,$src.hi\n\t" 4179 "punpckldq $dst,$tmp\n\t" 4180 "vpbroadcastq $dst,$dst\t! replicate4L" %} 4181 ins_encode %{ 4182 int vector_len = 1; 4183 __ movdl($dst$$XMMRegister, $src$$Register); 4184 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4185 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4186 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4187 %} 4188 ins_pipe( pipe_slow ); 4189 %} 4190 4191 instruct Repl8L_evex(vecZ dst, eRegL src, regD tmp) %{ 4192 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4193 match(Set dst (ReplicateL src)); 4194 effect(TEMP dst, USE src, TEMP tmp); 4195 format %{ "movdl $dst,$src.lo\n\t" 4196 "movdl $tmp,$src.hi\n\t" 4197 "punpckldq $dst,$tmp\n\t" 4198 "vpbroadcastq $dst,$dst\t! replicate8L" %} 4199 ins_encode %{ 4200 int vector_len = 2; 4201 __ movdl($dst$$XMMRegister, $src$$Register); 4202 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4203 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4204 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4205 %} 4206 ins_pipe( pipe_slow ); 4207 %} 4208 #endif // _LP64 4209 4210 instruct Repl4L_imm_evex(vecY dst, immL con) %{ 4211 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4212 match(Set dst (ReplicateL con)); 4213 format %{ "movq $dst,[$constantaddress]\n\t" 4214 "vpbroadcastq $dst,$dst\t! replicate4L" %} 4215 ins_encode %{ 4216 int vector_len = 1; 4217 __ movq($dst$$XMMRegister, $constantaddress($con)); 4218 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4219 %} 4220 ins_pipe( pipe_slow ); 4221 %} 4222 4223 instruct Repl8L_imm_evex(vecZ dst, immL con) %{ 4224 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4225 match(Set dst (ReplicateL con)); 4226 format %{ "movq $dst,[$constantaddress]\n\t" 4227 "vpbroadcastq $dst,$dst\t! replicate8L" %} 4228 ins_encode %{ 4229 int vector_len = 2; 4230 __ movq($dst$$XMMRegister, $constantaddress($con)); 4231 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4232 %} 4233 ins_pipe( pipe_slow ); 4234 %} 4235 4236 instruct Repl2L_mem_evex(vecX dst, memory mem) %{ 4237 predicate(n->as_Vector()->length() == 2 && VM_Version::supports_avx512vl()); 4238 match(Set dst (ReplicateL (LoadL mem))); 4239 format %{ "vpbroadcastd $dst,$mem\t! replicate2L" %} 4240 ins_encode %{ 4241 int vector_len = 0; 4242 __ evpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4243 %} 4244 ins_pipe( pipe_slow ); 4245 %} 4246 4247 instruct Repl4L_mem_evex(vecY dst, memory mem) %{ 4248 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4249 match(Set dst (ReplicateL (LoadL mem))); 4250 format %{ "vpbroadcastd $dst,$mem\t! replicate4L" %} 4251 ins_encode %{ 4252 int vector_len = 1; 4253 __ evpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4254 %} 4255 ins_pipe( pipe_slow ); 4256 %} 4257 4258 instruct Repl8L_mem_evex(vecZ dst, memory mem) %{ 4259 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4260 match(Set dst (ReplicateL (LoadL mem))); 4261 format %{ "vpbroadcastd $dst,$mem\t! replicate8L" %} 4262 ins_encode %{ 4263 int vector_len = 2; 4264 __ evpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4265 %} 4266 ins_pipe( pipe_slow ); 4267 %} 4268 4269 instruct Repl8L_zero_evex(vecZ dst, immL0 zero) %{ 4270 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4271 match(Set dst (ReplicateL zero)); 4272 format %{ "vpxor $dst k0,$dst,$dst\t! replicate8L zero" %} 4273 ins_encode %{ 4274 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 4275 int vector_len = 2; 4276 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4277 %} 4278 ins_pipe( fpu_reg_reg ); 4279 %} 4280 4281 instruct Repl8F_evex(vecY dst, regF src) %{ 4282 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4283 match(Set dst (ReplicateF src)); 4284 format %{ "vbroadcastss $dst,$src\t! replicate8F" %} 4285 ins_encode %{ 4286 int vector_len = 1; 4287 __ evpbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4288 %} 4289 ins_pipe( pipe_slow ); 4290 %} 4291 4292 instruct Repl8F_mem_evex(vecY dst, memory mem) %{ 4293 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4294 match(Set dst (ReplicateF (LoadF mem))); 4295 format %{ "vbroadcastss $dst,$mem\t! replicate8F" %} 4296 ins_encode %{ 4297 int vector_len = 1; 4298 __ evpbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len); 4299 %} 4300 ins_pipe( pipe_slow ); 4301 %} 4302 4303 instruct Repl16F_evex(vecZ dst, regF src) %{ 4304 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4305 match(Set dst (ReplicateF src)); 4306 format %{ "vbroadcastss $dst,$src\t! replicate16F" %} 4307 ins_encode %{ 4308 int vector_len = 2; 4309 __ evpbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4310 %} 4311 ins_pipe( pipe_slow ); 4312 %} 4313 4314 instruct Repl16F_mem_evex(vecZ dst, memory mem) %{ 4315 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4316 match(Set dst (ReplicateF (LoadF mem))); 4317 format %{ "vbroadcastss $dst,$mem\t! replicate16F" %} 4318 ins_encode %{ 4319 int vector_len = 2; 4320 __ evpbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len); 4321 %} 4322 ins_pipe( pipe_slow ); 4323 %} 4324 4325 instruct Repl16F_zero_evex(vecZ dst, immF0 zero) %{ 4326 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4327 match(Set dst (ReplicateF zero)); 4328 format %{ "vxorps $dst k0,$dst,$dst\t! replicate16F zero" %} 4329 ins_encode %{ 4330 int vector_len = 2; 4331 __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4332 %} 4333 ins_pipe( fpu_reg_reg ); 4334 %} 4335 4336 instruct Repl4D_evex(vecY dst, regD src) %{ 4337 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4338 match(Set dst (ReplicateD src)); 4339 format %{ "vbroadcastsd $dst,$src\t! replicate4D" %} 4340 ins_encode %{ 4341 int vector_len = 1; 4342 __ evpbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4343 %} 4344 ins_pipe( pipe_slow ); 4345 %} 4346 4347 instruct Repl4D_mem_evex(vecY dst, memory mem) %{ 4348 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4349 match(Set dst (ReplicateD (LoadD mem))); 4350 format %{ "vbroadcastsd $dst,$mem\t! replicate4D" %} 4351 ins_encode %{ 4352 int vector_len = 1; 4353 __ evpbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len); 4354 %} 4355 ins_pipe( pipe_slow ); 4356 %} 4357 4358 instruct Repl8D_evex(vecZ dst, regD src) %{ 4359 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4360 match(Set dst (ReplicateD src)); 4361 format %{ "vbroadcastsd $dst,$src\t! replicate8D" %} 4362 ins_encode %{ 4363 int vector_len = 2; 4364 __ evpbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4365 %} 4366 ins_pipe( pipe_slow ); 4367 %} 4368 4369 instruct Repl8D_mem_evex(vecZ dst, memory mem) %{ 4370 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4371 match(Set dst (ReplicateD (LoadD mem))); 4372 format %{ "vbroadcastsd $dst,$mem\t! replicate8D" %} 4373 ins_encode %{ 4374 int vector_len = 2; 4375 __ evpbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len); 4376 %} 4377 ins_pipe( pipe_slow ); 4378 %} 4379 4380 instruct Repl8D_zero_evex(vecZ dst, immD0 zero) %{ 4381 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4382 match(Set dst (ReplicateD zero)); 4383 format %{ "vxorpd $dst k0,$dst,$dst,vect512\t! replicate8D zero" %} 4384 ins_encode %{ 4385 int vector_len = 2; 4386 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4387 %} 4388 ins_pipe( fpu_reg_reg ); 4389 %} 4390 4391 // ====================REDUCTION ARITHMETIC======================================= 4392 4393 instruct rsadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 4394 predicate(UseSSE > 2 && UseAVX == 0); 4395 match(Set dst (AddReductionVI src1 src2)); 4396 effect(TEMP tmp2, TEMP tmp); 4397 format %{ "movdqu $tmp2,$src2\n\t" 4398 "phaddd $tmp2,$tmp2\n\t" 4399 "movd $tmp,$src1\n\t" 4400 "paddd $tmp,$tmp2\n\t" 4401 "movd $dst,$tmp\t! add reduction2I" %} 4402 ins_encode %{ 4403 __ movdqu($tmp2$$XMMRegister, $src2$$XMMRegister); 4404 __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister); 4405 __ movdl($tmp$$XMMRegister, $src1$$Register); 4406 __ paddd($tmp$$XMMRegister, $tmp2$$XMMRegister); 4407 __ movdl($dst$$Register, $tmp$$XMMRegister); 4408 %} 4409 ins_pipe( pipe_slow ); 4410 %} 4411 4412 instruct rvadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 4413 predicate(UseAVX > 0 && UseAVX < 3); 4414 match(Set dst (AddReductionVI src1 src2)); 4415 effect(TEMP tmp, TEMP tmp2); 4416 format %{ "vphaddd $tmp,$src2,$src2\n\t" 4417 "movd $tmp2,$src1\n\t" 4418 "vpaddd $tmp2,$tmp2,$tmp\n\t" 4419 "movd $dst,$tmp2\t! add reduction2I" %} 4420 ins_encode %{ 4421 int vector_len = 0; 4422 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 4423 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4424 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 4425 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4426 %} 4427 ins_pipe( pipe_slow ); 4428 %} 4429 4430 instruct rvadd2I_reduction_reg_evex(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 4431 predicate(UseAVX > 2); 4432 match(Set dst (AddReductionVI src1 src2)); 4433 effect(TEMP tmp, TEMP tmp2); 4434 format %{ "pshufd $tmp2,$src2,0x1\n\t" 4435 "vpaddd $tmp,$src2,$tmp2\n\t" 4436 "movd $tmp2,$src1\n\t" 4437 "vpaddd $tmp2,$tmp,$tmp2\n\t" 4438 "movd $dst,$tmp2\t! add reduction2I" %} 4439 ins_encode %{ 4440 int vector_len = 0; 4441 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 4442 __ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4443 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4444 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4445 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4446 %} 4447 ins_pipe( pipe_slow ); 4448 %} 4449 4450 instruct rsadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 4451 predicate(UseSSE > 2 && UseAVX == 0); 4452 match(Set dst (AddReductionVI src1 src2)); 4453 effect(TEMP tmp2, TEMP tmp); 4454 format %{ "movdqu $tmp2,$src2\n\t" 4455 "phaddd $tmp2,$tmp2\n\t" 4456 "phaddd $tmp2,$tmp2\n\t" 4457 "movd $tmp,$src1\n\t" 4458 "paddd $tmp,$tmp2\n\t" 4459 "movd $dst,$tmp\t! add reduction4I" %} 4460 ins_encode %{ 4461 __ movdqu($tmp2$$XMMRegister, $src2$$XMMRegister); 4462 __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister); 4463 __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister); 4464 __ movdl($tmp$$XMMRegister, $src1$$Register); 4465 __ paddd($tmp$$XMMRegister, $tmp2$$XMMRegister); 4466 __ movdl($dst$$Register, $tmp$$XMMRegister); 4467 %} 4468 ins_pipe( pipe_slow ); 4469 %} 4470 4471 instruct rvadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 4472 predicate(UseAVX > 0 && UseAVX < 3); 4473 match(Set dst (AddReductionVI src1 src2)); 4474 effect(TEMP tmp, TEMP tmp2); 4475 format %{ "vphaddd $tmp,$src2,$src2\n\t" 4476 "vphaddd $tmp,$tmp,$tmp2\n\t" 4477 "movd $tmp2,$src1\n\t" 4478 "vpaddd $tmp2,$tmp2,$tmp\n\t" 4479 "movd $dst,$tmp2\t! add reduction4I" %} 4480 ins_encode %{ 4481 int vector_len = 0; 4482 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 4483 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4484 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4485 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 4486 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4487 %} 4488 ins_pipe( pipe_slow ); 4489 %} 4490 4491 instruct rvadd4I_reduction_reg_evex(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 4492 predicate(UseAVX > 2); 4493 match(Set dst (AddReductionVI src1 src2)); 4494 effect(TEMP tmp, TEMP tmp2); 4495 format %{ "pshufd $tmp2,$src2,0xE\n\t" 4496 "vpaddd $tmp,$src2,$tmp2\n\t" 4497 "pshufd $tmp2,$tmp,0x1\n\t" 4498 "vpaddd $tmp,$tmp,$tmp2\n\t" 4499 "movd $tmp2,$src1\n\t" 4500 "vpaddd $tmp2,$tmp,$tmp2\n\t" 4501 "movd $dst,$tmp2\t! add reduction4I" %} 4502 ins_encode %{ 4503 int vector_len = 0; 4504 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 4505 __ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4506 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 4507 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4508 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4509 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4510 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4511 %} 4512 ins_pipe( pipe_slow ); 4513 %} 4514 4515 instruct rvadd8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 4516 predicate(UseAVX > 0 && UseAVX < 3); 4517 match(Set dst (AddReductionVI src1 src2)); 4518 effect(TEMP tmp, TEMP tmp2); 4519 format %{ "vphaddd $tmp,$src2,$src2\n\t" 4520 "vphaddd $tmp,$tmp,$tmp2\n\t" 4521 "vextracti128 $tmp2,$tmp\n\t" 4522 "vpaddd $tmp,$tmp,$tmp2\n\t" 4523 "movd $tmp2,$src1\n\t" 4524 "vpaddd $tmp2,$tmp2,$tmp\n\t" 4525 "movd $dst,$tmp2\t! add reduction8I" %} 4526 ins_encode %{ 4527 int vector_len = 1; 4528 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 4529 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4530 __ vextracti128h($tmp2$$XMMRegister, $tmp$$XMMRegister); 4531 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4532 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4533 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4534 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4535 %} 4536 ins_pipe( pipe_slow ); 4537 %} 4538 4539 instruct rvadd8I_reduction_reg_evex(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 4540 predicate(UseAVX > 2); 4541 match(Set dst (AddReductionVI src1 src2)); 4542 effect(TEMP tmp, TEMP tmp2); 4543 format %{ "vextracti128 $tmp,$src2\n\t" 4544 "vpaddd $tmp,$tmp,$src2\n\t" 4545 "pshufd $tmp2,$tmp,0xE\n\t" 4546 "vpaddd $tmp,$tmp,$tmp2\n\t" 4547 "pshufd $tmp2,$tmp,0x1\n\t" 4548 "vpaddd $tmp,$tmp,$tmp2\n\t" 4549 "movd $tmp2,$src1\n\t" 4550 "vpaddd $tmp2,$tmp,$tmp2\n\t" 4551 "movd $dst,$tmp2\t! add reduction8I" %} 4552 ins_encode %{ 4553 int vector_len = 0; 4554 __ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister); 4555 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 4556 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 4557 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4558 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 4559 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4560 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4561 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4562 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4563 %} 4564 ins_pipe( pipe_slow ); 4565 %} 4566 4567 instruct rvadd16I_reduction_reg_evex(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ 4568 predicate(UseAVX > 2); 4569 match(Set dst (AddReductionVI src1 src2)); 4570 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 4571 format %{ "vextracti64x4 $tmp3,$src2\n\t" 4572 "vpaddd $tmp3,$tmp3,$src2\n\t" 4573 "vextracti128 $tmp,$tmp3\n\t" 4574 "vpaddd $tmp,$tmp,$tmp3\n\t" 4575 "pshufd $tmp2,$tmp,0xE\n\t" 4576 "vpaddd $tmp,$tmp,$tmp2\n\t" 4577 "pshufd $tmp2,$tmp,0x1\n\t" 4578 "vpaddd $tmp,$tmp,$tmp2\n\t" 4579 "movd $tmp2,$src1\n\t" 4580 "vpaddd $tmp2,$tmp,$tmp2\n\t" 4581 "movd $dst,$tmp2\t! mul reduction16I" %} 4582 ins_encode %{ 4583 __ vextracti64x4h($tmp3$$XMMRegister, $src2$$XMMRegister); 4584 __ vpaddd($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); 4585 __ vextracti128h($tmp$$XMMRegister, $tmp3$$XMMRegister); 4586 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); 4587 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 4588 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4589 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 4590 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4591 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4592 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4593 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4594 %} 4595 ins_pipe( pipe_slow ); 4596 %} 4597 4598 #ifdef _LP64 4599 instruct rvadd2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, regF tmp, regF tmp2) %{ 4600 predicate(UseAVX > 2); 4601 match(Set dst (AddReductionVL src1 src2)); 4602 effect(TEMP tmp, TEMP tmp2); 4603 format %{ "pshufd $tmp2,$src2,0xE\n\t" 4604 "vpaddq $tmp,$src2,$tmp2\n\t" 4605 "movdq $tmp2,$src1\n\t" 4606 "vpaddq $tmp2,$tmp,$tmp2\n\t" 4607 "movdq $dst,$tmp2\t! add reduction2L" %} 4608 ins_encode %{ 4609 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 4610 __ vpaddq($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, 0); 4611 __ movdq($tmp2$$XMMRegister, $src1$$Register); 4612 __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4613 __ movdq($dst$$Register, $tmp2$$XMMRegister); 4614 %} 4615 ins_pipe( pipe_slow ); 4616 %} 4617 4618 instruct rvadd4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{ 4619 predicate(UseAVX > 2); 4620 match(Set dst (AddReductionVL src1 src2)); 4621 effect(TEMP tmp, TEMP tmp2); 4622 format %{ "vextracti64x2 $tmp,$src2, 0x1\n\t" 4623 "vpaddq $tmp2,$tmp,$src2\n\t" 4624 "pshufd $tmp,$tmp2,0xE\n\t" 4625 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4626 "movdq $tmp,$src1\n\t" 4627 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4628 "movdq $dst,$tmp2\t! add reduction4L" %} 4629 ins_encode %{ 4630 __ vextracti64x2h($tmp$$XMMRegister, $src2$$XMMRegister, 0x1); 4631 __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); 4632 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 4633 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4634 __ movdq($tmp$$XMMRegister, $src1$$Register); 4635 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4636 __ movdq($dst$$Register, $tmp2$$XMMRegister); 4637 %} 4638 ins_pipe( pipe_slow ); 4639 %} 4640 4641 instruct rvadd8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{ 4642 predicate(UseAVX > 2); 4643 match(Set dst (AddReductionVL src1 src2)); 4644 effect(TEMP tmp, TEMP tmp2); 4645 format %{ "vextracti64x4 $tmp2,$src2\n\t" 4646 "vpaddq $tmp2,$tmp2,$src2\n\t" 4647 "vextracti128 $tmp,$tmp2\n\t" 4648 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4649 "pshufd $tmp,$tmp2,0xE\n\t" 4650 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4651 "movdq $tmp,$src1\n\t" 4652 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4653 "movdq $dst,$tmp2\t! add reduction8L" %} 4654 ins_encode %{ 4655 __ vextracti64x4h($tmp2$$XMMRegister, $src2$$XMMRegister); 4656 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 4657 __ vextracti128h($tmp$$XMMRegister, $tmp2$$XMMRegister); 4658 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4659 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 4660 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4661 __ movdq($tmp$$XMMRegister, $src1$$Register); 4662 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4663 __ movdq($dst$$Register, $tmp2$$XMMRegister); 4664 %} 4665 ins_pipe( pipe_slow ); 4666 %} 4667 #endif 4668 4669 instruct rsadd2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{ 4670 predicate(UseSSE >= 1 && UseAVX == 0); 4671 match(Set dst (AddReductionVF src1 src2)); 4672 effect(TEMP tmp, TEMP tmp2); 4673 format %{ "movdqu $tmp,$src1\n\t" 4674 "addss $tmp,$src2\n\t" 4675 "pshufd $tmp2,$src2,0x01\n\t" 4676 "addss $tmp,$tmp2\n\t" 4677 "movdqu $dst,$tmp\t! add reduction2F" %} 4678 ins_encode %{ 4679 __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); 4680 __ addss($tmp$$XMMRegister, $src2$$XMMRegister); 4681 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01); 4682 __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister); 4683 __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister); 4684 %} 4685 ins_pipe( pipe_slow ); 4686 %} 4687 4688 instruct rvadd2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{ 4689 predicate(UseAVX > 0); 4690 match(Set dst (AddReductionVF src1 src2)); 4691 effect(TEMP tmp2, TEMP tmp); 4692 format %{ "vaddss $tmp2,$src1,$src2\n\t" 4693 "pshufd $tmp,$src2,0x01\n\t" 4694 "vaddss $dst,$tmp2,$tmp\t! add reduction2F" %} 4695 ins_encode %{ 4696 __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 4697 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 4698 __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4699 %} 4700 ins_pipe( pipe_slow ); 4701 %} 4702 4703 instruct rsadd4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{ 4704 predicate(UseSSE >= 1 && UseAVX == 0); 4705 match(Set dst (AddReductionVF src1 src2)); 4706 effect(TEMP tmp, TEMP tmp2); 4707 format %{ "movdqu $tmp,$src1\n\t" 4708 "addss $tmp,$src2\n\t" 4709 "pshufd $tmp2,$src2,0x01\n\t" 4710 "addss $tmp,$tmp2\n\t" 4711 "pshufd $tmp2,$src2,0x02\n\t" 4712 "addss $tmp,$tmp2\n\t" 4713 "pshufd $tmp2,$src2,0x03\n\t" 4714 "addss $tmp,$tmp2\n\t" 4715 "movdqu $dst,$tmp\t! add reduction4F" %} 4716 ins_encode %{ 4717 __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); 4718 __ addss($tmp$$XMMRegister, $src2$$XMMRegister); 4719 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01); 4720 __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister); 4721 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x02); 4722 __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister); 4723 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x03); 4724 __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister); 4725 __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister); 4726 %} 4727 ins_pipe( pipe_slow ); 4728 %} 4729 4730 instruct rvadd4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{ 4731 predicate(UseAVX > 0); 4732 match(Set dst (AddReductionVF src1 src2)); 4733 effect(TEMP tmp, TEMP tmp2); 4734 format %{ "vaddss $tmp2,$src1,$src2\n\t" 4735 "pshufd $tmp,$src2,0x01\n\t" 4736 "vaddss $tmp2,$tmp2,$tmp\n\t" 4737 "pshufd $tmp,$src2,0x02\n\t" 4738 "vaddss $tmp2,$tmp2,$tmp\n\t" 4739 "pshufd $tmp,$src2,0x03\n\t" 4740 "vaddss $dst,$tmp2,$tmp\t! add reduction4F" %} 4741 ins_encode %{ 4742 __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 4743 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 4744 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4745 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 4746 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4747 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 4748 __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4749 %} 4750 ins_pipe( pipe_slow ); 4751 %} 4752 4753 instruct radd8F_reduction_reg(regF dst, regF src1, vecY src2, regF tmp, regF tmp2, regF tmp3) %{ 4754 predicate(UseAVX > 0); 4755 match(Set dst (AddReductionVF src1 src2)); 4756 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 4757 format %{ "vaddss $tmp2,$src1,$src2\n\t" 4758 "pshufd $tmp,$src2,0x01\n\t" 4759 "vaddss $tmp2,$tmp2,$tmp\n\t" 4760 "pshufd $tmp,$src2,0x02\n\t" 4761 "vaddss $tmp2,$tmp2,$tmp\n\t" 4762 "pshufd $tmp,$src2,0x03\n\t" 4763 "vaddss $tmp2,$tmp2,$tmp\n\t" 4764 "vextractf128 $tmp3,$src2\n\t" 4765 "vaddss $tmp2,$tmp2,$tmp3\n\t" 4766 "pshufd $tmp,$tmp3,0x01\n\t" 4767 "vaddss $tmp2,$tmp2,$tmp\n\t" 4768 "pshufd $tmp,$tmp3,0x02\n\t" 4769 "vaddss $tmp2,$tmp2,$tmp\n\t" 4770 "pshufd $tmp,$tmp3,0x03\n\t" 4771 "vaddss $dst,$tmp2,$tmp\t! add reduction8F" %} 4772 ins_encode %{ 4773 __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 4774 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 4775 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4776 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 4777 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4778 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 4779 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4780 __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister); 4781 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 4782 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); 4783 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4784 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); 4785 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4786 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); 4787 __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4788 %} 4789 ins_pipe( pipe_slow ); 4790 %} 4791 4792 instruct radd16F_reduction_reg(regF dst, regF src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ 4793 predicate(UseAVX > 2); 4794 match(Set dst (AddReductionVF src1 src2)); 4795 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 4796 format %{ "vaddss $tmp2,$src1,$src2\n\t" 4797 "pshufd $tmp,$src2,0x01\n\t" 4798 "vaddss $tmp2,$tmp2,$tmp\n\t" 4799 "pshufd $tmp,$src2,0x02\n\t" 4800 "vaddss $tmp2,$tmp2,$tmp\n\t" 4801 "pshufd $tmp,$src2,0x03\n\t" 4802 "vaddss $tmp2,$tmp2,$tmp\n\t" 4803 "vextractf64x2 $tmp3,$src2, 0x1\n\t" 4804 "vaddss $tmp2,$tmp2,$tmp3\n\t" 4805 "pshufd $tmp,$tmp3,0x01\n\t" 4806 "vaddss $tmp2,$tmp2,$tmp\n\t" 4807 "pshufd $tmp,$tmp3,0x02\n\t" 4808 "vaddss $tmp2,$tmp2,$tmp\n\t" 4809 "pshufd $tmp,$tmp3,0x03\n\t" 4810 "vaddss $tmp2,$tmp2,$tmp\n\t" 4811 "vextractf64x2 $tmp3,$src2, 0x2\n\t" 4812 "vaddss $tmp2,$tmp2,$tmp3\n\t" 4813 "pshufd $tmp,$tmp3,0x01\n\t" 4814 "vaddss $tmp2,$tmp2,$tmp\n\t" 4815 "pshufd $tmp,$tmp3,0x02\n\t" 4816 "vaddss $tmp2,$tmp2,$tmp\n\t" 4817 "pshufd $tmp,$tmp3,0x03\n\t" 4818 "vaddss $tmp2,$tmp2,$tmp\n\t" 4819 "vextractf64x2 $tmp3,$src2, 0x3\n\t" 4820 "vaddss $tmp2,$tmp2,$tmp3\n\t" 4821 "pshufd $tmp,$tmp3,0x01\n\t" 4822 "vaddss $tmp2,$tmp2,$tmp\n\t" 4823 "pshufd $tmp,$tmp3,0x02\n\t" 4824 "vaddss $tmp2,$tmp2,$tmp\n\t" 4825 "pshufd $tmp,$tmp3,0x03\n\t" 4826 "vaddss $dst,$tmp2,$tmp\t! add reduction16F" %} 4827 ins_encode %{ 4828 __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 4829 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 4830 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4831 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 4832 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4833 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 4834 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4835 __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1); 4836 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 4837 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); 4838 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4839 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); 4840 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4841 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); 4842 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4843 __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x2); 4844 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 4845 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); 4846 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4847 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); 4848 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4849 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); 4850 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4851 __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x3); 4852 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 4853 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); 4854 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4855 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); 4856 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4857 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); 4858 __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4859 %} 4860 ins_pipe( pipe_slow ); 4861 %} 4862 4863 instruct rsadd2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp) %{ 4864 predicate(UseSSE >= 1 && UseAVX == 0); 4865 match(Set dst (AddReductionVD src1 src2)); 4866 effect(TEMP tmp, TEMP dst); 4867 format %{ "movdqu $tmp,$src1\n\t" 4868 "addsd $tmp,$src2\n\t" 4869 "pshufd $dst,$src2,0xE\n\t" 4870 "addsd $dst,$tmp\t! add reduction2D" %} 4871 ins_encode %{ 4872 __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); 4873 __ addsd($tmp$$XMMRegister, $src2$$XMMRegister); 4874 __ pshufd($dst$$XMMRegister, $src2$$XMMRegister, 0xE); 4875 __ addsd($dst$$XMMRegister, $tmp$$XMMRegister); 4876 %} 4877 ins_pipe( pipe_slow ); 4878 %} 4879 4880 instruct rvadd2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp, regD tmp2) %{ 4881 predicate(UseAVX > 0); 4882 match(Set dst (AddReductionVD src1 src2)); 4883 effect(TEMP tmp, TEMP tmp2); 4884 format %{ "vaddsd $tmp2,$src1,$src2\n\t" 4885 "pshufd $tmp,$src2,0xE\n\t" 4886 "vaddsd $dst,$tmp2,$tmp\t! add reduction2D" %} 4887 ins_encode %{ 4888 __ vaddsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 4889 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 4890 __ vaddsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4891 %} 4892 ins_pipe( pipe_slow ); 4893 %} 4894 4895 instruct rvadd4D_reduction_reg(regD dst, regD src1, vecY src2, regD tmp, regD tmp2, regD tmp3) %{ 4896 predicate(UseAVX > 0); 4897 match(Set dst (AddReductionVD src1 src2)); 4898 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 4899 format %{ "vaddsd $tmp2,$src1,$src2\n\t" 4900 "pshufd $tmp,$src2,0xE\n\t" 4901 "vaddsd $tmp2,$tmp2,$tmp\n\t" 4902 "vextractf128 $tmp3,$src2\n\t" 4903 "vaddsd $tmp2,$tmp2,$tmp3\n\t" 4904 "pshufd $tmp,$tmp3,0xE\n\t" 4905 "vaddsd $dst,$tmp2,$tmp\t! add reduction4D" %} 4906 ins_encode %{ 4907 __ vaddsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 4908 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 4909 __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4910 __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister); 4911 __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 4912 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); 4913 __ vaddsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4914 %} 4915 ins_pipe( pipe_slow ); 4916 %} 4917 4918 instruct rvadd8D_reduction_reg(regD dst, regD src1, vecZ src2, regD tmp, regD tmp2, regD tmp3) %{ 4919 predicate(UseAVX > 2); 4920 match(Set dst (AddReductionVD src1 src2)); 4921 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 4922 format %{ "vaddsd $tmp2,$src1,$src2\n\t" 4923 "pshufd $tmp,$src2,0xE\n\t" 4924 "vaddsd $tmp2,$tmp2,$tmp\n\t" 4925 "vextractf64x2 $tmp3,$src2, 0x1\n\t" 4926 "vaddsd $tmp2,$tmp2,$tmp3\n\t" 4927 "pshufd $tmp,$tmp3,0xE\n\t" 4928 "vaddsd $tmp2,$tmp2,$tmp\n\t" 4929 "vextractf64x2 $tmp3,$src2, 0x2\n\t" 4930 "vaddsd $tmp2,$tmp2,$tmp3\n\t" 4931 "pshufd $tmp,$tmp3,0xE\n\t" 4932 "vaddsd $tmp2,$tmp2,$tmp\n\t" 4933 "vextractf64x2 $tmp3,$src2, 0x3\n\t" 4934 "vaddsd $tmp2,$tmp2,$tmp3\n\t" 4935 "pshufd $tmp,$tmp3,0xE\n\t" 4936 "vaddsd $dst,$tmp2,$tmp\t! add reduction8D" %} 4937 ins_encode %{ 4938 __ vaddsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 4939 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 4940 __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4941 __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1); 4942 __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 4943 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); 4944 __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4945 __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x2); 4946 __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 4947 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); 4948 __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4949 __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x3); 4950 __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 4951 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); 4952 __ vaddsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4953 %} 4954 ins_pipe( pipe_slow ); 4955 %} 4956 4957 instruct rsmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 4958 predicate(UseSSE > 3 && UseAVX == 0); 4959 match(Set dst (MulReductionVI src1 src2)); 4960 effect(TEMP tmp, TEMP tmp2); 4961 format %{ "pshufd $tmp2,$src2,0x1\n\t" 4962 "pmulld $tmp2,$src2\n\t" 4963 "movd $tmp,$src1\n\t" 4964 "pmulld $tmp2,$tmp\n\t" 4965 "movd $dst,$tmp2\t! mul reduction2I" %} 4966 ins_encode %{ 4967 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 4968 __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister); 4969 __ movdl($tmp$$XMMRegister, $src1$$Register); 4970 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 4971 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4972 %} 4973 ins_pipe( pipe_slow ); 4974 %} 4975 4976 instruct rvmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 4977 predicate(UseAVX > 0); 4978 match(Set dst (MulReductionVI src1 src2)); 4979 effect(TEMP tmp, TEMP tmp2); 4980 format %{ "pshufd $tmp2,$src2,0x1\n\t" 4981 "vpmulld $tmp,$src2,$tmp2\n\t" 4982 "movd $tmp2,$src1\n\t" 4983 "vpmulld $tmp2,$tmp,$tmp2\n\t" 4984 "movd $dst,$tmp2\t! mul reduction2I" %} 4985 ins_encode %{ 4986 int vector_len = 0; 4987 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 4988 __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4989 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4990 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4991 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4992 %} 4993 ins_pipe( pipe_slow ); 4994 %} 4995 4996 instruct rsmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 4997 predicate(UseSSE > 3 && UseAVX == 0); 4998 match(Set dst (MulReductionVI src1 src2)); 4999 effect(TEMP tmp, TEMP tmp2); 5000 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5001 "pmulld $tmp2,$src2\n\t" 5002 "pshufd $tmp,$tmp2,0x1\n\t" 5003 "pmulld $tmp2,$tmp\n\t" 5004 "movd $tmp,$src1\n\t" 5005 "pmulld $tmp2,$tmp\n\t" 5006 "movd $dst,$tmp2\t! mul reduction4I" %} 5007 ins_encode %{ 5008 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5009 __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister); 5010 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x1); 5011 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 5012 __ movdl($tmp$$XMMRegister, $src1$$Register); 5013 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 5014 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5015 %} 5016 ins_pipe( pipe_slow ); 5017 %} 5018 5019 instruct rvmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 5020 predicate(UseAVX > 0); 5021 match(Set dst (MulReductionVI src1 src2)); 5022 effect(TEMP tmp, TEMP tmp2); 5023 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5024 "vpmulld $tmp,$src2,$tmp2\n\t" 5025 "pshufd $tmp2,$tmp,0x1\n\t" 5026 "vpmulld $tmp,$tmp,$tmp2\n\t" 5027 "movd $tmp2,$src1\n\t" 5028 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5029 "movd $dst,$tmp2\t! mul reduction4I" %} 5030 ins_encode %{ 5031 int vector_len = 0; 5032 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5033 __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5034 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5035 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5036 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5037 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5038 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5039 %} 5040 ins_pipe( pipe_slow ); 5041 %} 5042 5043 instruct rvmul8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 5044 predicate(UseAVX > 0); 5045 match(Set dst (MulReductionVI src1 src2)); 5046 effect(TEMP tmp, TEMP tmp2); 5047 format %{ "vextracti128 $tmp,$src2\n\t" 5048 "vpmulld $tmp,$tmp,$src2\n\t" 5049 "pshufd $tmp2,$tmp,0xE\n\t" 5050 "vpmulld $tmp,$tmp,$tmp2\n\t" 5051 "pshufd $tmp2,$tmp,0x1\n\t" 5052 "vpmulld $tmp,$tmp,$tmp2\n\t" 5053 "movd $tmp2,$src1\n\t" 5054 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5055 "movd $dst,$tmp2\t! mul reduction8I" %} 5056 ins_encode %{ 5057 int vector_len = 0; 5058 __ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister); 5059 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 5060 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5061 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5062 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5063 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5064 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5065 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5066 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5067 %} 5068 ins_pipe( pipe_slow ); 5069 %} 5070 5071 instruct rvmul16I_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ 5072 predicate(UseAVX > 2); 5073 match(Set dst (MulReductionVI src1 src2)); 5074 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 5075 format %{ "vextracti64x4 $tmp3,$src2\n\t" 5076 "vpmulld $tmp3,$tmp3,$src2\n\t" 5077 "vextracti128 $tmp,$tmp3\n\t" 5078 "vpmulld $tmp,$tmp,$src2\n\t" 5079 "pshufd $tmp2,$tmp,0xE\n\t" 5080 "vpmulld $tmp,$tmp,$tmp2\n\t" 5081 "pshufd $tmp2,$tmp,0x1\n\t" 5082 "vpmulld $tmp,$tmp,$tmp2\n\t" 5083 "movd $tmp2,$src1\n\t" 5084 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5085 "movd $dst,$tmp2\t! mul reduction16I" %} 5086 ins_encode %{ 5087 __ vextracti64x4h($tmp3$$XMMRegister, $src2$$XMMRegister); 5088 __ vpmulld($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); 5089 __ vextracti128h($tmp$$XMMRegister, $tmp3$$XMMRegister); 5090 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); 5091 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5092 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5093 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5094 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5095 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5096 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5097 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5098 %} 5099 ins_pipe( pipe_slow ); 5100 %} 5101 5102 #ifdef _LP64 5103 instruct rvmul2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, regF tmp, regF tmp2) %{ 5104 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 5105 match(Set dst (MulReductionVL src1 src2)); 5106 effect(TEMP tmp, TEMP tmp2); 5107 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5108 "vpmullq $tmp,$src2,$tmp2\n\t" 5109 "movdq $tmp2,$src1\n\t" 5110 "vpmullq $tmp2,$tmp,$tmp2\n\t" 5111 "movdq $dst,$tmp2\t! mul reduction2L" %} 5112 ins_encode %{ 5113 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5114 __ vpmullq($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, 0); 5115 __ movdq($tmp2$$XMMRegister, $src1$$Register); 5116 __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5117 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5118 %} 5119 ins_pipe( pipe_slow ); 5120 %} 5121 5122 instruct rvmul4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{ 5123 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 5124 match(Set dst (MulReductionVL src1 src2)); 5125 effect(TEMP tmp, TEMP tmp2); 5126 format %{ "vextracti64x2 $tmp,$src2, 0x1\n\t" 5127 "vpmullq $tmp2,$tmp,$src2\n\t" 5128 "pshufd $tmp,$tmp2,0xE\n\t" 5129 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5130 "movdq $tmp,$src1\n\t" 5131 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5132 "movdq $dst,$tmp2\t! mul reduction4L" %} 5133 ins_encode %{ 5134 __ vextracti64x2h($tmp$$XMMRegister, $src2$$XMMRegister, 0x1); 5135 __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); 5136 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5137 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5138 __ movdq($tmp$$XMMRegister, $src1$$Register); 5139 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5140 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5141 %} 5142 ins_pipe( pipe_slow ); 5143 %} 5144 5145 instruct rvmul8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{ 5146 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 5147 match(Set dst (MulReductionVL src1 src2)); 5148 effect(TEMP tmp, TEMP tmp2); 5149 format %{ "vextracti64x4 $tmp2,$src2\n\t" 5150 "vpmullq $tmp2,$tmp2,$src2\n\t" 5151 "vextracti128 $tmp,$tmp2\n\t" 5152 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5153 "pshufd $tmp,$tmp2,0xE\n\t" 5154 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5155 "movdq $tmp,$src1\n\t" 5156 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5157 "movdq $dst,$tmp2\t! mul reduction8L" %} 5158 ins_encode %{ 5159 __ vextracti64x4h($tmp2$$XMMRegister, $src2$$XMMRegister); 5160 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 5161 __ vextracti128h($tmp$$XMMRegister, $tmp2$$XMMRegister); 5162 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5163 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5164 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5165 __ movdq($tmp$$XMMRegister, $src1$$Register); 5166 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5167 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5168 %} 5169 ins_pipe( pipe_slow ); 5170 %} 5171 #endif 5172 5173 instruct rsmul2F_reduction(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{ 5174 predicate(UseSSE >= 1 && UseAVX == 0); 5175 match(Set dst (MulReductionVF src1 src2)); 5176 effect(TEMP tmp, TEMP tmp2); 5177 format %{ "movdqu $tmp,$src1\n\t" 5178 "mulss $tmp,$src2\n\t" 5179 "pshufd $tmp2,$src2,0x01\n\t" 5180 "mulss $tmp,$tmp2\n\t" 5181 "movdqu $dst,$tmp\t! mul reduction2F" %} 5182 ins_encode %{ 5183 __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); 5184 __ mulss($tmp$$XMMRegister, $src2$$XMMRegister); 5185 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01); 5186 __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister); 5187 __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister); 5188 %} 5189 ins_pipe( pipe_slow ); 5190 %} 5191 5192 instruct rvmul2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{ 5193 predicate(UseAVX > 0); 5194 match(Set dst (MulReductionVF src1 src2)); 5195 effect(TEMP tmp, TEMP tmp2); 5196 format %{ "vmulss $tmp2,$src1,$src2\n\t" 5197 "pshufd $tmp,$src2,0x01\n\t" 5198 "vmulss $dst,$tmp2,$tmp\t! mul reduction2F" %} 5199 ins_encode %{ 5200 __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 5201 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5202 __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5203 %} 5204 ins_pipe( pipe_slow ); 5205 %} 5206 5207 instruct rsmul4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{ 5208 predicate(UseSSE >= 1 && UseAVX == 0); 5209 match(Set dst (MulReductionVF src1 src2)); 5210 effect(TEMP tmp, TEMP tmp2); 5211 format %{ "movdqu $tmp,$src1\n\t" 5212 "mulss $tmp,$src2\n\t" 5213 "pshufd $tmp2,$src2,0x01\n\t" 5214 "mulss $tmp,$tmp2\n\t" 5215 "pshufd $tmp2,$src2,0x02\n\t" 5216 "mulss $tmp,$tmp2\n\t" 5217 "pshufd $tmp2,$src2,0x03\n\t" 5218 "mulss $tmp,$tmp2\n\t" 5219 "movdqu $dst,$tmp\t! mul reduction4F" %} 5220 ins_encode %{ 5221 __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); 5222 __ mulss($tmp$$XMMRegister, $src2$$XMMRegister); 5223 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01); 5224 __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister); 5225 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x02); 5226 __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister); 5227 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x03); 5228 __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister); 5229 __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister); 5230 %} 5231 ins_pipe( pipe_slow ); 5232 %} 5233 5234 instruct rvmul4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{ 5235 predicate(UseAVX > 0); 5236 match(Set dst (MulReductionVF src1 src2)); 5237 effect(TEMP tmp, TEMP tmp2); 5238 format %{ "vmulss $tmp2,$src1,$src2\n\t" 5239 "pshufd $tmp,$src2,0x01\n\t" 5240 "vmulss $tmp2,$tmp2,$tmp\n\t" 5241 "pshufd $tmp,$src2,0x02\n\t" 5242 "vmulss $tmp2,$tmp2,$tmp\n\t" 5243 "pshufd $tmp,$src2,0x03\n\t" 5244 "vmulss $dst,$tmp2,$tmp\t! mul reduction4F" %} 5245 ins_encode %{ 5246 __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 5247 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5248 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5249 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5250 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5251 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5252 __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5253 %} 5254 ins_pipe( pipe_slow ); 5255 %} 5256 5257 instruct rvmul8F_reduction_reg(regF dst, regF src1, vecY src2, regF tmp, regF tmp2, regF tmp3) %{ 5258 predicate(UseAVX > 0); 5259 match(Set dst (MulReductionVF src1 src2)); 5260 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 5261 format %{ "vmulss $tmp2,$src1,$src2\n\t" 5262 "pshufd $tmp,$src2,0x01\n\t" 5263 "vmulss $tmp2,$tmp2,$tmp\n\t" 5264 "pshufd $tmp,$src2,0x02\n\t" 5265 "vmulss $tmp2,$tmp2,$tmp\n\t" 5266 "pshufd $tmp,$src2,0x03\n\t" 5267 "vmulss $tmp2,$tmp2,$tmp\n\t" 5268 "vextractf128 $tmp3,$src2\n\t" 5269 "vmulss $tmp2,$tmp2,$tmp3\n\t" 5270 "pshufd $tmp,$tmp3,0x01\n\t" 5271 "vmulss $tmp2,$tmp2,$tmp\n\t" 5272 "pshufd $tmp,$tmp3,0x02\n\t" 5273 "vmulss $tmp2,$tmp2,$tmp\n\t" 5274 "pshufd $tmp,$tmp3,0x03\n\t" 5275 "vmulss $dst,$tmp2,$tmp\t! mul reduction8F" %} 5276 ins_encode %{ 5277 __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 5278 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5279 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5280 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5281 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5282 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5283 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5284 __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister); 5285 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 5286 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); 5287 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5288 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); 5289 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5290 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); 5291 __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5292 %} 5293 ins_pipe( pipe_slow ); 5294 %} 5295 5296 instruct rvmul16F_reduction_reg(regF dst, regF src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ 5297 predicate(UseAVX > 2); 5298 match(Set dst (MulReductionVF src1 src2)); 5299 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 5300 format %{ "vmulss $tmp2,$src1,$src2\n\t" 5301 "pshufd $tmp,$src2,0x01\n\t" 5302 "vmulss $tmp2,$tmp2,$tmp\n\t" 5303 "pshufd $tmp,$src2,0x02\n\t" 5304 "vmulss $tmp2,$tmp2,$tmp\n\t" 5305 "pshufd $tmp,$src2,0x03\n\t" 5306 "vmulss $tmp2,$tmp2,$tmp\n\t" 5307 "vextractf32x4 $tmp3,$src2, 0x1\n\t" 5308 "vmulss $tmp2,$tmp2,$tmp3\n\t" 5309 "pshufd $tmp,$tmp3,0x01\n\t" 5310 "vmulss $tmp2,$tmp2,$tmp\n\t" 5311 "pshufd $tmp,$tmp3,0x02\n\t" 5312 "vmulss $tmp2,$tmp2,$tmp\n\t" 5313 "pshufd $tmp,$tmp3,0x03\n\t" 5314 "vmulss $tmp2,$tmp2,$tmp\n\t" 5315 "vextractf32x4 $tmp3,$src2, 0x2\n\t" 5316 "vmulss $tmp2,$tmp2,$tmp3\n\t" 5317 "pshufd $tmp,$tmp3,0x01\n\t" 5318 "vmulss $tmp2,$tmp2,$tmp\n\t" 5319 "pshufd $tmp,$tmp3,0x02\n\t" 5320 "vmulss $tmp2,$tmp2,$tmp\n\t" 5321 "pshufd $tmp,$tmp3,0x03\n\t" 5322 "vmulss $tmp2,$tmp2,$tmp\n\t" 5323 "vextractf32x4 $tmp3,$src2, 0x3\n\t" 5324 "vmulss $tmp2,$tmp2,$tmp3\n\t" 5325 "pshufd $tmp,$tmp3,0x01\n\t" 5326 "vmulss $tmp2,$tmp2,$tmp\n\t" 5327 "pshufd $tmp,$tmp3,0x02\n\t" 5328 "vmulss $tmp2,$tmp2,$tmp\n\t" 5329 "pshufd $tmp,$tmp3,0x03\n\t" 5330 "vmulss $dst,$tmp2,$tmp\t! mul reduction16F" %} 5331 ins_encode %{ 5332 __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 5333 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5334 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5335 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5336 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5337 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5338 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5339 __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1); 5340 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 5341 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); 5342 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5343 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); 5344 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5345 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); 5346 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5347 __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x2); 5348 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 5349 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); 5350 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5351 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); 5352 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5353 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); 5354 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5355 __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x3); 5356 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 5357 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); 5358 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5359 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); 5360 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5361 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); 5362 __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5363 %} 5364 ins_pipe( pipe_slow ); 5365 %} 5366 5367 instruct rsmul2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp) %{ 5368 predicate(UseSSE >= 1 && UseAVX == 0); 5369 match(Set dst (MulReductionVD src1 src2)); 5370 effect(TEMP tmp, TEMP dst); 5371 format %{ "movdqu $tmp,$src1\n\t" 5372 "mulsd $tmp,$src2\n\t" 5373 "pshufd $dst,$src2,0xE\n\t" 5374 "mulsd $dst,$tmp\t! mul reduction2D" %} 5375 ins_encode %{ 5376 __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); 5377 __ mulsd($tmp$$XMMRegister, $src2$$XMMRegister); 5378 __ pshufd($dst$$XMMRegister, $src2$$XMMRegister, 0xE); 5379 __ mulsd($dst$$XMMRegister, $tmp$$XMMRegister); 5380 %} 5381 ins_pipe( pipe_slow ); 5382 %} 5383 5384 instruct rvmul2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp, regD tmp2) %{ 5385 predicate(UseAVX > 0); 5386 match(Set dst (MulReductionVD src1 src2)); 5387 effect(TEMP tmp, TEMP tmp2); 5388 format %{ "vmulsd $tmp2,$src1,$src2\n\t" 5389 "pshufd $tmp,$src2,0xE\n\t" 5390 "vmulsd $dst,$tmp2,$tmp\t! mul reduction2D" %} 5391 ins_encode %{ 5392 __ vmulsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 5393 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5394 __ vmulsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5395 %} 5396 ins_pipe( pipe_slow ); 5397 %} 5398 5399 instruct rvmul4D_reduction_reg(regD dst, regD src1, vecY src2, regD tmp, regD tmp2, regD tmp3) %{ 5400 predicate(UseAVX > 0); 5401 match(Set dst (MulReductionVD src1 src2)); 5402 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 5403 format %{ "vmulsd $tmp2,$src1,$src2\n\t" 5404 "pshufd $tmp,$src2,0xE\n\t" 5405 "vmulsd $tmp2,$tmp2,$tmp\n\t" 5406 "vextractf128 $tmp3,$src2\n\t" 5407 "vmulsd $tmp2,$tmp2,$tmp3\n\t" 5408 "pshufd $tmp,$tmp3,0xE\n\t" 5409 "vmulsd $dst,$tmp2,$tmp\t! mul reduction4D" %} 5410 ins_encode %{ 5411 __ vmulsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 5412 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5413 __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5414 __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister); 5415 __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 5416 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); 5417 __ vmulsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5418 %} 5419 ins_pipe( pipe_slow ); 5420 %} 5421 5422 instruct rvmul8D_reduction_reg(regD dst, regD src1, vecZ src2, regD tmp, regD tmp2, regD tmp3) %{ 5423 predicate(UseAVX > 2); 5424 match(Set dst (MulReductionVD src1 src2)); 5425 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 5426 format %{ "vmulsd $tmp2,$src1,$src2\n\t" 5427 "pshufd $tmp,$src2,0xE\n\t" 5428 "vmulsd $tmp2,$tmp2,$tmp\n\t" 5429 "vextractf64x2 $tmp3,$src2, 0x1\n\t" 5430 "vmulsd $tmp2,$tmp2,$tmp3\n\t" 5431 "pshufd $tmp,$src2,0xE\n\t" 5432 "vmulsd $tmp2,$tmp2,$tmp\n\t" 5433 "vextractf64x2 $tmp3,$src2, 0x2\n\t" 5434 "vmulsd $tmp2,$tmp2,$tmp3\n\t" 5435 "pshufd $tmp,$tmp3,0xE\n\t" 5436 "vmulsd $tmp2,$tmp2,$tmp\n\t" 5437 "vextractf64x2 $tmp3,$src2, 0x3\n\t" 5438 "vmulsd $tmp2,$tmp2,$tmp3\n\t" 5439 "pshufd $tmp,$tmp3,0xE\n\t" 5440 "vmulsd $dst,$tmp2,$tmp\t! mul reduction8D" %} 5441 ins_encode %{ 5442 __ vmulsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 5443 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5444 __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5445 __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1); 5446 __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 5447 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); 5448 __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5449 __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x2); 5450 __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 5451 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); 5452 __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5453 __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x3); 5454 __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 5455 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); 5456 __ vmulsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5457 %} 5458 ins_pipe( pipe_slow ); 5459 %} 5460 5461 // ====================VECTOR ARITHMETIC======================================= 5462 5463 // --------------------------------- ADD -------------------------------------- 5464 5465 // Bytes vector add 5466 instruct vadd4B(vecS dst, vecS src) %{ 5467 predicate(n->as_Vector()->length() == 4); 5468 match(Set dst (AddVB dst src)); 5469 format %{ "paddb $dst,$src\t! add packed4B" %} 5470 ins_encode %{ 5471 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5472 %} 5473 ins_pipe( pipe_slow ); 5474 %} 5475 5476 instruct vadd4B_reg(vecS dst, vecS src1, vecS src2) %{ 5477 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5478 match(Set dst (AddVB src1 src2)); 5479 format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %} 5480 ins_encode %{ 5481 int vector_len = 0; 5482 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5483 %} 5484 ins_pipe( pipe_slow ); 5485 %} 5486 5487 instruct vadd4B_mem(vecS dst, vecS src, memory mem) %{ 5488 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5489 match(Set dst (AddVB src (LoadVector mem))); 5490 format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %} 5491 ins_encode %{ 5492 int vector_len = 0; 5493 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5494 %} 5495 ins_pipe( pipe_slow ); 5496 %} 5497 5498 instruct vadd8B(vecD dst, vecD src) %{ 5499 predicate(n->as_Vector()->length() == 8); 5500 match(Set dst (AddVB dst src)); 5501 format %{ "paddb $dst,$src\t! add packed8B" %} 5502 ins_encode %{ 5503 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5504 %} 5505 ins_pipe( pipe_slow ); 5506 %} 5507 5508 instruct vadd8B_reg(vecD dst, vecD src1, vecD src2) %{ 5509 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 5510 match(Set dst (AddVB src1 src2)); 5511 format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %} 5512 ins_encode %{ 5513 int vector_len = 0; 5514 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5515 %} 5516 ins_pipe( pipe_slow ); 5517 %} 5518 5519 instruct vadd8B_mem(vecD dst, vecD src, memory mem) %{ 5520 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 5521 match(Set dst (AddVB src (LoadVector mem))); 5522 format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %} 5523 ins_encode %{ 5524 int vector_len = 0; 5525 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5526 %} 5527 ins_pipe( pipe_slow ); 5528 %} 5529 5530 instruct vadd16B(vecX dst, vecX src) %{ 5531 predicate(n->as_Vector()->length() == 16); 5532 match(Set dst (AddVB dst src)); 5533 format %{ "paddb $dst,$src\t! add packed16B" %} 5534 ins_encode %{ 5535 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5536 %} 5537 ins_pipe( pipe_slow ); 5538 %} 5539 5540 instruct vadd16B_reg(vecX dst, vecX src1, vecX src2) %{ 5541 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 5542 match(Set dst (AddVB src1 src2)); 5543 format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %} 5544 ins_encode %{ 5545 int vector_len = 0; 5546 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5547 %} 5548 ins_pipe( pipe_slow ); 5549 %} 5550 5551 instruct vadd16B_mem(vecX dst, vecX src, memory mem) %{ 5552 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 5553 match(Set dst (AddVB src (LoadVector mem))); 5554 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} 5555 ins_encode %{ 5556 int vector_len = 0; 5557 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5558 %} 5559 ins_pipe( pipe_slow ); 5560 %} 5561 5562 instruct vadd32B_reg(vecY dst, vecY src1, vecY src2) %{ 5563 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 5564 match(Set dst (AddVB src1 src2)); 5565 format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %} 5566 ins_encode %{ 5567 int vector_len = 1; 5568 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5569 %} 5570 ins_pipe( pipe_slow ); 5571 %} 5572 5573 instruct vadd32B_mem(vecY dst, vecY src, memory mem) %{ 5574 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 5575 match(Set dst (AddVB src (LoadVector mem))); 5576 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} 5577 ins_encode %{ 5578 int vector_len = 1; 5579 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5580 %} 5581 ins_pipe( pipe_slow ); 5582 %} 5583 5584 instruct vadd64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 5585 predicate(UseAVX > 2 && n->as_Vector()->length() == 64); 5586 match(Set dst (AddVB src1 src2)); 5587 format %{ "vpaddb $dst,$src1,$src2\t! add packed64B" %} 5588 ins_encode %{ 5589 int vector_len = 2; 5590 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5591 %} 5592 ins_pipe( pipe_slow ); 5593 %} 5594 5595 instruct vadd64B_mem(vecZ dst, vecZ src, memory mem) %{ 5596 predicate(UseAVX > 2 && n->as_Vector()->length() == 64); 5597 match(Set dst (AddVB src (LoadVector mem))); 5598 format %{ "vpaddb $dst,$src,$mem\t! add packed64B" %} 5599 ins_encode %{ 5600 int vector_len = 2; 5601 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5602 %} 5603 ins_pipe( pipe_slow ); 5604 %} 5605 5606 // Shorts/Chars vector add 5607 instruct vadd2S(vecS dst, vecS src) %{ 5608 predicate(n->as_Vector()->length() == 2); 5609 match(Set dst (AddVS dst src)); 5610 format %{ "paddw $dst,$src\t! add packed2S" %} 5611 ins_encode %{ 5612 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 5613 %} 5614 ins_pipe( pipe_slow ); 5615 %} 5616 5617 instruct vadd2S_reg(vecS dst, vecS src1, vecS src2) %{ 5618 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 5619 match(Set dst (AddVS src1 src2)); 5620 format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %} 5621 ins_encode %{ 5622 int vector_len = 0; 5623 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5624 %} 5625 ins_pipe( pipe_slow ); 5626 %} 5627 5628 instruct vadd2S_mem(vecS dst, vecS src, memory mem) %{ 5629 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 5630 match(Set dst (AddVS src (LoadVector mem))); 5631 format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %} 5632 ins_encode %{ 5633 int vector_len = 0; 5634 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5635 %} 5636 ins_pipe( pipe_slow ); 5637 %} 5638 5639 instruct vadd4S(vecD dst, vecD src) %{ 5640 predicate(n->as_Vector()->length() == 4); 5641 match(Set dst (AddVS dst src)); 5642 format %{ "paddw $dst,$src\t! add packed4S" %} 5643 ins_encode %{ 5644 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 5645 %} 5646 ins_pipe( pipe_slow ); 5647 %} 5648 5649 instruct vadd4S_reg(vecD dst, vecD src1, vecD src2) %{ 5650 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5651 match(Set dst (AddVS src1 src2)); 5652 format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %} 5653 ins_encode %{ 5654 int vector_len = 0; 5655 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5656 %} 5657 ins_pipe( pipe_slow ); 5658 %} 5659 5660 instruct vadd4S_mem(vecD dst, vecD src, memory mem) %{ 5661 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5662 match(Set dst (AddVS src (LoadVector mem))); 5663 format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %} 5664 ins_encode %{ 5665 int vector_len = 0; 5666 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5667 %} 5668 ins_pipe( pipe_slow ); 5669 %} 5670 5671 instruct vadd8S(vecX dst, vecX src) %{ 5672 predicate(n->as_Vector()->length() == 8); 5673 match(Set dst (AddVS dst src)); 5674 format %{ "paddw $dst,$src\t! add packed8S" %} 5675 ins_encode %{ 5676 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 5677 %} 5678 ins_pipe( pipe_slow ); 5679 %} 5680 5681 instruct vadd8S_reg(vecX dst, vecX src1, vecX src2) %{ 5682 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 5683 match(Set dst (AddVS src1 src2)); 5684 format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %} 5685 ins_encode %{ 5686 int vector_len = 0; 5687 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5688 %} 5689 ins_pipe( pipe_slow ); 5690 %} 5691 5692 instruct vadd8S_mem(vecX dst, vecX src, memory mem) %{ 5693 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 5694 match(Set dst (AddVS src (LoadVector mem))); 5695 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} 5696 ins_encode %{ 5697 int vector_len = 0; 5698 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5699 %} 5700 ins_pipe( pipe_slow ); 5701 %} 5702 5703 instruct vadd16S_reg(vecY dst, vecY src1, vecY src2) %{ 5704 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 5705 match(Set dst (AddVS src1 src2)); 5706 format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %} 5707 ins_encode %{ 5708 int vector_len = 1; 5709 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5710 %} 5711 ins_pipe( pipe_slow ); 5712 %} 5713 5714 instruct vadd16S_mem(vecY dst, vecY src, memory mem) %{ 5715 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 5716 match(Set dst (AddVS src (LoadVector mem))); 5717 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} 5718 ins_encode %{ 5719 int vector_len = 1; 5720 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5721 %} 5722 ins_pipe( pipe_slow ); 5723 %} 5724 5725 instruct vadd32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 5726 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); 5727 match(Set dst (AddVS src1 src2)); 5728 format %{ "vpaddw $dst,$src1,$src2\t! add packed32S" %} 5729 ins_encode %{ 5730 int vector_len = 2; 5731 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5732 %} 5733 ins_pipe( pipe_slow ); 5734 %} 5735 5736 instruct vadd32S_mem(vecZ dst, vecZ src, memory mem) %{ 5737 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); 5738 match(Set dst (AddVS src (LoadVector mem))); 5739 format %{ "vpaddw $dst,$src,$mem\t! add packed32S" %} 5740 ins_encode %{ 5741 int vector_len = 2; 5742 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5743 %} 5744 ins_pipe( pipe_slow ); 5745 %} 5746 5747 // Integers vector add 5748 instruct vadd2I(vecD dst, vecD src) %{ 5749 predicate(n->as_Vector()->length() == 2); 5750 match(Set dst (AddVI dst src)); 5751 format %{ "paddd $dst,$src\t! add packed2I" %} 5752 ins_encode %{ 5753 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 5754 %} 5755 ins_pipe( pipe_slow ); 5756 %} 5757 5758 instruct vadd2I_reg(vecD dst, vecD src1, vecD src2) %{ 5759 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 5760 match(Set dst (AddVI src1 src2)); 5761 format %{ "vpaddd $dst,$src1,$src2\t! add packed2I" %} 5762 ins_encode %{ 5763 int vector_len = 0; 5764 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5765 %} 5766 ins_pipe( pipe_slow ); 5767 %} 5768 5769 instruct vadd2I_mem(vecD dst, vecD src, memory mem) %{ 5770 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 5771 match(Set dst (AddVI src (LoadVector mem))); 5772 format %{ "vpaddd $dst,$src,$mem\t! add packed2I" %} 5773 ins_encode %{ 5774 int vector_len = 0; 5775 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5776 %} 5777 ins_pipe( pipe_slow ); 5778 %} 5779 5780 instruct vadd4I(vecX dst, vecX src) %{ 5781 predicate(n->as_Vector()->length() == 4); 5782 match(Set dst (AddVI dst src)); 5783 format %{ "paddd $dst,$src\t! add packed4I" %} 5784 ins_encode %{ 5785 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 5786 %} 5787 ins_pipe( pipe_slow ); 5788 %} 5789 5790 instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{ 5791 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5792 match(Set dst (AddVI src1 src2)); 5793 format %{ "vpaddd $dst,$src1,$src2\t! add packed4I" %} 5794 ins_encode %{ 5795 int vector_len = 0; 5796 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5797 %} 5798 ins_pipe( pipe_slow ); 5799 %} 5800 5801 instruct vadd4I_mem(vecX dst, vecX src, memory mem) %{ 5802 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5803 match(Set dst (AddVI src (LoadVector mem))); 5804 format %{ "vpaddd $dst,$src,$mem\t! add packed4I" %} 5805 ins_encode %{ 5806 int vector_len = 0; 5807 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5808 %} 5809 ins_pipe( pipe_slow ); 5810 %} 5811 5812 instruct vadd8I_reg(vecY dst, vecY src1, vecY src2) %{ 5813 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 5814 match(Set dst (AddVI src1 src2)); 5815 format %{ "vpaddd $dst,$src1,$src2\t! add packed8I" %} 5816 ins_encode %{ 5817 int vector_len = 1; 5818 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5819 %} 5820 ins_pipe( pipe_slow ); 5821 %} 5822 5823 instruct vadd8I_mem(vecY dst, vecY src, memory mem) %{ 5824 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 5825 match(Set dst (AddVI src (LoadVector mem))); 5826 format %{ "vpaddd $dst,$src,$mem\t! add packed8I" %} 5827 ins_encode %{ 5828 int vector_len = 1; 5829 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5830 %} 5831 ins_pipe( pipe_slow ); 5832 %} 5833 5834 instruct vadd16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 5835 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 5836 match(Set dst (AddVI src1 src2)); 5837 format %{ "vpaddd $dst,$src1,$src2\t! add packed16I" %} 5838 ins_encode %{ 5839 int vector_len = 2; 5840 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5841 %} 5842 ins_pipe( pipe_slow ); 5843 %} 5844 5845 instruct vadd16I_mem(vecZ dst, vecZ src, memory mem) %{ 5846 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 5847 match(Set dst (AddVI src (LoadVector mem))); 5848 format %{ "vpaddd $dst,$src,$mem\t! add packed16I" %} 5849 ins_encode %{ 5850 int vector_len = 2; 5851 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5852 %} 5853 ins_pipe( pipe_slow ); 5854 %} 5855 5856 // Longs vector add 5857 instruct vadd2L(vecX dst, vecX src) %{ 5858 predicate(n->as_Vector()->length() == 2); 5859 match(Set dst (AddVL dst src)); 5860 format %{ "paddq $dst,$src\t! add packed2L" %} 5861 ins_encode %{ 5862 __ paddq($dst$$XMMRegister, $src$$XMMRegister); 5863 %} 5864 ins_pipe( pipe_slow ); 5865 %} 5866 5867 instruct vadd2L_reg(vecX dst, vecX src1, vecX src2) %{ 5868 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 5869 match(Set dst (AddVL src1 src2)); 5870 format %{ "vpaddq $dst,$src1,$src2\t! add packed2L" %} 5871 ins_encode %{ 5872 int vector_len = 0; 5873 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5874 %} 5875 ins_pipe( pipe_slow ); 5876 %} 5877 5878 instruct vadd2L_mem(vecX dst, vecX src, memory mem) %{ 5879 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 5880 match(Set dst (AddVL src (LoadVector mem))); 5881 format %{ "vpaddq $dst,$src,$mem\t! add packed2L" %} 5882 ins_encode %{ 5883 int vector_len = 0; 5884 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5885 %} 5886 ins_pipe( pipe_slow ); 5887 %} 5888 5889 instruct vadd4L_reg(vecY dst, vecY src1, vecY src2) %{ 5890 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 5891 match(Set dst (AddVL src1 src2)); 5892 format %{ "vpaddq $dst,$src1,$src2\t! add packed4L" %} 5893 ins_encode %{ 5894 int vector_len = 1; 5895 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5896 %} 5897 ins_pipe( pipe_slow ); 5898 %} 5899 5900 instruct vadd4L_mem(vecY dst, vecY src, memory mem) %{ 5901 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 5902 match(Set dst (AddVL src (LoadVector mem))); 5903 format %{ "vpaddq $dst,$src,$mem\t! add packed4L" %} 5904 ins_encode %{ 5905 int vector_len = 1; 5906 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5907 %} 5908 ins_pipe( pipe_slow ); 5909 %} 5910 5911 instruct vadd8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 5912 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 5913 match(Set dst (AddVL src1 src2)); 5914 format %{ "vpaddq $dst,$src1,$src2\t! add packed8L" %} 5915 ins_encode %{ 5916 int vector_len = 2; 5917 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5918 %} 5919 ins_pipe( pipe_slow ); 5920 %} 5921 5922 instruct vadd8L_mem(vecZ dst, vecZ src, memory mem) %{ 5923 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 5924 match(Set dst (AddVL src (LoadVector mem))); 5925 format %{ "vpaddq $dst,$src,$mem\t! add packed8L" %} 5926 ins_encode %{ 5927 int vector_len = 2; 5928 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5929 %} 5930 ins_pipe( pipe_slow ); 5931 %} 5932 5933 // Floats vector add 5934 instruct vadd2F(vecD dst, vecD src) %{ 5935 predicate(n->as_Vector()->length() == 2); 5936 match(Set dst (AddVF dst src)); 5937 format %{ "addps $dst,$src\t! add packed2F" %} 5938 ins_encode %{ 5939 __ addps($dst$$XMMRegister, $src$$XMMRegister); 5940 %} 5941 ins_pipe( pipe_slow ); 5942 %} 5943 5944 instruct vadd2F_reg(vecD dst, vecD src1, vecD src2) %{ 5945 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 5946 match(Set dst (AddVF src1 src2)); 5947 format %{ "vaddps $dst,$src1,$src2\t! add packed2F" %} 5948 ins_encode %{ 5949 int vector_len = 0; 5950 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5951 %} 5952 ins_pipe( pipe_slow ); 5953 %} 5954 5955 instruct vadd2F_mem(vecD dst, vecD src, memory mem) %{ 5956 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 5957 match(Set dst (AddVF src (LoadVector mem))); 5958 format %{ "vaddps $dst,$src,$mem\t! add packed2F" %} 5959 ins_encode %{ 5960 int vector_len = 0; 5961 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5962 %} 5963 ins_pipe( pipe_slow ); 5964 %} 5965 5966 instruct vadd4F(vecX dst, vecX src) %{ 5967 predicate(n->as_Vector()->length() == 4); 5968 match(Set dst (AddVF dst src)); 5969 format %{ "addps $dst,$src\t! add packed4F" %} 5970 ins_encode %{ 5971 __ addps($dst$$XMMRegister, $src$$XMMRegister); 5972 %} 5973 ins_pipe( pipe_slow ); 5974 %} 5975 5976 instruct vadd4F_reg(vecX dst, vecX src1, vecX src2) %{ 5977 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5978 match(Set dst (AddVF src1 src2)); 5979 format %{ "vaddps $dst,$src1,$src2\t! add packed4F" %} 5980 ins_encode %{ 5981 int vector_len = 0; 5982 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5983 %} 5984 ins_pipe( pipe_slow ); 5985 %} 5986 5987 instruct vadd4F_mem(vecX dst, vecX src, memory mem) %{ 5988 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5989 match(Set dst (AddVF src (LoadVector mem))); 5990 format %{ "vaddps $dst,$src,$mem\t! add packed4F" %} 5991 ins_encode %{ 5992 int vector_len = 0; 5993 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5994 %} 5995 ins_pipe( pipe_slow ); 5996 %} 5997 5998 instruct vadd8F_reg(vecY dst, vecY src1, vecY src2) %{ 5999 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6000 match(Set dst (AddVF src1 src2)); 6001 format %{ "vaddps $dst,$src1,$src2\t! add packed8F" %} 6002 ins_encode %{ 6003 int vector_len = 1; 6004 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6005 %} 6006 ins_pipe( pipe_slow ); 6007 %} 6008 6009 instruct vadd8F_mem(vecY dst, vecY src, memory mem) %{ 6010 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6011 match(Set dst (AddVF src (LoadVector mem))); 6012 format %{ "vaddps $dst,$src,$mem\t! add packed8F" %} 6013 ins_encode %{ 6014 int vector_len = 1; 6015 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6016 %} 6017 ins_pipe( pipe_slow ); 6018 %} 6019 6020 instruct vadd16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6021 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6022 match(Set dst (AddVF src1 src2)); 6023 format %{ "vaddps $dst,$src1,$src2\t! add packed16F" %} 6024 ins_encode %{ 6025 int vector_len = 2; 6026 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6027 %} 6028 ins_pipe( pipe_slow ); 6029 %} 6030 6031 instruct vadd16F_mem(vecZ dst, vecZ src, memory mem) %{ 6032 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6033 match(Set dst (AddVF src (LoadVector mem))); 6034 format %{ "vaddps $dst,$src,$mem\t! add packed16F" %} 6035 ins_encode %{ 6036 int vector_len = 2; 6037 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6038 %} 6039 ins_pipe( pipe_slow ); 6040 %} 6041 6042 // Doubles vector add 6043 instruct vadd2D(vecX dst, vecX src) %{ 6044 predicate(n->as_Vector()->length() == 2); 6045 match(Set dst (AddVD dst src)); 6046 format %{ "addpd $dst,$src\t! add packed2D" %} 6047 ins_encode %{ 6048 __ addpd($dst$$XMMRegister, $src$$XMMRegister); 6049 %} 6050 ins_pipe( pipe_slow ); 6051 %} 6052 6053 instruct vadd2D_reg(vecX dst, vecX src1, vecX src2) %{ 6054 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6055 match(Set dst (AddVD src1 src2)); 6056 format %{ "vaddpd $dst,$src1,$src2\t! add packed2D" %} 6057 ins_encode %{ 6058 int vector_len = 0; 6059 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6060 %} 6061 ins_pipe( pipe_slow ); 6062 %} 6063 6064 instruct vadd2D_mem(vecX dst, vecX src, memory mem) %{ 6065 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6066 match(Set dst (AddVD src (LoadVector mem))); 6067 format %{ "vaddpd $dst,$src,$mem\t! add packed2D" %} 6068 ins_encode %{ 6069 int vector_len = 0; 6070 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6071 %} 6072 ins_pipe( pipe_slow ); 6073 %} 6074 6075 instruct vadd4D_reg(vecY dst, vecY src1, vecY src2) %{ 6076 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6077 match(Set dst (AddVD src1 src2)); 6078 format %{ "vaddpd $dst,$src1,$src2\t! add packed4D" %} 6079 ins_encode %{ 6080 int vector_len = 1; 6081 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6082 %} 6083 ins_pipe( pipe_slow ); 6084 %} 6085 6086 instruct vadd4D_mem(vecY dst, vecY src, memory mem) %{ 6087 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6088 match(Set dst (AddVD src (LoadVector mem))); 6089 format %{ "vaddpd $dst,$src,$mem\t! add packed4D" %} 6090 ins_encode %{ 6091 int vector_len = 1; 6092 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6093 %} 6094 ins_pipe( pipe_slow ); 6095 %} 6096 6097 instruct vadd8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6098 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6099 match(Set dst (AddVD src1 src2)); 6100 format %{ "vaddpd $dst,$src1,$src2\t! add packed8D" %} 6101 ins_encode %{ 6102 int vector_len = 2; 6103 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6104 %} 6105 ins_pipe( pipe_slow ); 6106 %} 6107 6108 instruct vadd8D_mem(vecZ dst, vecZ src, memory mem) %{ 6109 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6110 match(Set dst (AddVD src (LoadVector mem))); 6111 format %{ "vaddpd $dst,$src,$mem\t! add packed8D" %} 6112 ins_encode %{ 6113 int vector_len = 2; 6114 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6115 %} 6116 ins_pipe( pipe_slow ); 6117 %} 6118 6119 // --------------------------------- SUB -------------------------------------- 6120 6121 // Bytes vector sub 6122 instruct vsub4B(vecS dst, vecS src) %{ 6123 predicate(n->as_Vector()->length() == 4); 6124 match(Set dst (SubVB dst src)); 6125 format %{ "psubb $dst,$src\t! sub packed4B" %} 6126 ins_encode %{ 6127 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 6128 %} 6129 ins_pipe( pipe_slow ); 6130 %} 6131 6132 instruct vsub4B_reg(vecS dst, vecS src1, vecS src2) %{ 6133 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6134 match(Set dst (SubVB src1 src2)); 6135 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} 6136 ins_encode %{ 6137 int vector_len = 0; 6138 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6139 %} 6140 ins_pipe( pipe_slow ); 6141 %} 6142 6143 instruct vsub4B_mem(vecS dst, vecS src, memory mem) %{ 6144 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6145 match(Set dst (SubVB src (LoadVector mem))); 6146 format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %} 6147 ins_encode %{ 6148 int vector_len = 0; 6149 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6150 %} 6151 ins_pipe( pipe_slow ); 6152 %} 6153 6154 instruct vsub8B(vecD dst, vecD src) %{ 6155 predicate(n->as_Vector()->length() == 8); 6156 match(Set dst (SubVB dst src)); 6157 format %{ "psubb $dst,$src\t! sub packed8B" %} 6158 ins_encode %{ 6159 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 6160 %} 6161 ins_pipe( pipe_slow ); 6162 %} 6163 6164 instruct vsub8B_reg(vecD dst, vecD src1, vecD src2) %{ 6165 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6166 match(Set dst (SubVB src1 src2)); 6167 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} 6168 ins_encode %{ 6169 int vector_len = 0; 6170 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6171 %} 6172 ins_pipe( pipe_slow ); 6173 %} 6174 6175 instruct vsub8B_mem(vecD dst, vecD src, memory mem) %{ 6176 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6177 match(Set dst (SubVB src (LoadVector mem))); 6178 format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %} 6179 ins_encode %{ 6180 int vector_len = 0; 6181 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6182 %} 6183 ins_pipe( pipe_slow ); 6184 %} 6185 6186 instruct vsub16B(vecX dst, vecX src) %{ 6187 predicate(n->as_Vector()->length() == 16); 6188 match(Set dst (SubVB dst src)); 6189 format %{ "psubb $dst,$src\t! sub packed16B" %} 6190 ins_encode %{ 6191 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 6192 %} 6193 ins_pipe( pipe_slow ); 6194 %} 6195 6196 instruct vsub16B_reg(vecX dst, vecX src1, vecX src2) %{ 6197 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 6198 match(Set dst (SubVB src1 src2)); 6199 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} 6200 ins_encode %{ 6201 int vector_len = 0; 6202 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6203 %} 6204 ins_pipe( pipe_slow ); 6205 %} 6206 6207 instruct vsub16B_mem(vecX dst, vecX src, memory mem) %{ 6208 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 6209 match(Set dst (SubVB src (LoadVector mem))); 6210 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} 6211 ins_encode %{ 6212 int vector_len = 0; 6213 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6214 %} 6215 ins_pipe( pipe_slow ); 6216 %} 6217 6218 instruct vsub32B_reg(vecY dst, vecY src1, vecY src2) %{ 6219 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 6220 match(Set dst (SubVB src1 src2)); 6221 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} 6222 ins_encode %{ 6223 int vector_len = 1; 6224 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6225 %} 6226 ins_pipe( pipe_slow ); 6227 %} 6228 6229 instruct vsub32B_mem(vecY dst, vecY src, memory mem) %{ 6230 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 6231 match(Set dst (SubVB src (LoadVector mem))); 6232 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} 6233 ins_encode %{ 6234 int vector_len = 1; 6235 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6236 %} 6237 ins_pipe( pipe_slow ); 6238 %} 6239 6240 instruct vsub64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6241 predicate(UseAVX > 2 && n->as_Vector()->length() == 64); 6242 match(Set dst (SubVB src1 src2)); 6243 format %{ "vpsubb $dst,$src1,$src2\t! sub packed64B" %} 6244 ins_encode %{ 6245 int vector_len = 2; 6246 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6247 %} 6248 ins_pipe( pipe_slow ); 6249 %} 6250 6251 instruct vsub64B_mem(vecZ dst, vecZ src, memory mem) %{ 6252 predicate(UseAVX > 2 && n->as_Vector()->length() == 64); 6253 match(Set dst (SubVB src (LoadVector mem))); 6254 format %{ "vpsubb $dst,$src,$mem\t! sub packed64B" %} 6255 ins_encode %{ 6256 int vector_len = 2; 6257 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6258 %} 6259 ins_pipe( pipe_slow ); 6260 %} 6261 6262 // Shorts/Chars vector sub 6263 instruct vsub2S(vecS dst, vecS src) %{ 6264 predicate(n->as_Vector()->length() == 2); 6265 match(Set dst (SubVS dst src)); 6266 format %{ "psubw $dst,$src\t! sub packed2S" %} 6267 ins_encode %{ 6268 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 6269 %} 6270 ins_pipe( pipe_slow ); 6271 %} 6272 6273 instruct vsub2S_reg(vecS dst, vecS src1, vecS src2) %{ 6274 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6275 match(Set dst (SubVS src1 src2)); 6276 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} 6277 ins_encode %{ 6278 int vector_len = 0; 6279 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6280 %} 6281 ins_pipe( pipe_slow ); 6282 %} 6283 6284 instruct vsub2S_mem(vecS dst, vecS src, memory mem) %{ 6285 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6286 match(Set dst (SubVS src (LoadVector mem))); 6287 format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %} 6288 ins_encode %{ 6289 int vector_len = 0; 6290 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6291 %} 6292 ins_pipe( pipe_slow ); 6293 %} 6294 6295 instruct vsub4S(vecD dst, vecD src) %{ 6296 predicate(n->as_Vector()->length() == 4); 6297 match(Set dst (SubVS dst src)); 6298 format %{ "psubw $dst,$src\t! sub packed4S" %} 6299 ins_encode %{ 6300 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 6301 %} 6302 ins_pipe( pipe_slow ); 6303 %} 6304 6305 instruct vsub4S_reg(vecD dst, vecD src1, vecD src2) %{ 6306 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6307 match(Set dst (SubVS src1 src2)); 6308 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} 6309 ins_encode %{ 6310 int vector_len = 0; 6311 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6312 %} 6313 ins_pipe( pipe_slow ); 6314 %} 6315 6316 instruct vsub4S_mem(vecD dst, vecD src, memory mem) %{ 6317 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6318 match(Set dst (SubVS src (LoadVector mem))); 6319 format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %} 6320 ins_encode %{ 6321 int vector_len = 0; 6322 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6323 %} 6324 ins_pipe( pipe_slow ); 6325 %} 6326 6327 instruct vsub8S(vecX dst, vecX src) %{ 6328 predicate(n->as_Vector()->length() == 8); 6329 match(Set dst (SubVS dst src)); 6330 format %{ "psubw $dst,$src\t! sub packed8S" %} 6331 ins_encode %{ 6332 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 6333 %} 6334 ins_pipe( pipe_slow ); 6335 %} 6336 6337 instruct vsub8S_reg(vecX dst, vecX src1, vecX src2) %{ 6338 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6339 match(Set dst (SubVS src1 src2)); 6340 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} 6341 ins_encode %{ 6342 int vector_len = 0; 6343 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6344 %} 6345 ins_pipe( pipe_slow ); 6346 %} 6347 6348 instruct vsub8S_mem(vecX dst, vecX src, memory mem) %{ 6349 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6350 match(Set dst (SubVS src (LoadVector mem))); 6351 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} 6352 ins_encode %{ 6353 int vector_len = 0; 6354 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6355 %} 6356 ins_pipe( pipe_slow ); 6357 %} 6358 6359 instruct vsub16S_reg(vecY dst, vecY src1, vecY src2) %{ 6360 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 6361 match(Set dst (SubVS src1 src2)); 6362 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} 6363 ins_encode %{ 6364 int vector_len = 1; 6365 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6366 %} 6367 ins_pipe( pipe_slow ); 6368 %} 6369 6370 instruct vsub16S_mem(vecY dst, vecY src, memory mem) %{ 6371 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 6372 match(Set dst (SubVS src (LoadVector mem))); 6373 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} 6374 ins_encode %{ 6375 int vector_len = 1; 6376 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6377 %} 6378 ins_pipe( pipe_slow ); 6379 %} 6380 6381 instruct vsub32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6382 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); 6383 match(Set dst (SubVS src1 src2)); 6384 format %{ "vpsubw $dst,$src1,$src2\t! sub packed32S" %} 6385 ins_encode %{ 6386 int vector_len = 2; 6387 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6388 %} 6389 ins_pipe( pipe_slow ); 6390 %} 6391 6392 instruct vsub32S_mem(vecZ dst, vecZ src, memory mem) %{ 6393 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); 6394 match(Set dst (SubVS src (LoadVector mem))); 6395 format %{ "vpsubw $dst,$src,$mem\t! sub packed32S" %} 6396 ins_encode %{ 6397 int vector_len = 2; 6398 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6399 %} 6400 ins_pipe( pipe_slow ); 6401 %} 6402 6403 // Integers vector sub 6404 instruct vsub2I(vecD dst, vecD src) %{ 6405 predicate(n->as_Vector()->length() == 2); 6406 match(Set dst (SubVI dst src)); 6407 format %{ "psubd $dst,$src\t! sub packed2I" %} 6408 ins_encode %{ 6409 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 6410 %} 6411 ins_pipe( pipe_slow ); 6412 %} 6413 6414 instruct vsub2I_reg(vecD dst, vecD src1, vecD src2) %{ 6415 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6416 match(Set dst (SubVI src1 src2)); 6417 format %{ "vpsubd $dst,$src1,$src2\t! sub packed2I" %} 6418 ins_encode %{ 6419 int vector_len = 0; 6420 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6421 %} 6422 ins_pipe( pipe_slow ); 6423 %} 6424 6425 instruct vsub2I_mem(vecD dst, vecD src, memory mem) %{ 6426 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6427 match(Set dst (SubVI src (LoadVector mem))); 6428 format %{ "vpsubd $dst,$src,$mem\t! sub packed2I" %} 6429 ins_encode %{ 6430 int vector_len = 0; 6431 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6432 %} 6433 ins_pipe( pipe_slow ); 6434 %} 6435 6436 instruct vsub4I(vecX dst, vecX src) %{ 6437 predicate(n->as_Vector()->length() == 4); 6438 match(Set dst (SubVI dst src)); 6439 format %{ "psubd $dst,$src\t! sub packed4I" %} 6440 ins_encode %{ 6441 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 6442 %} 6443 ins_pipe( pipe_slow ); 6444 %} 6445 6446 instruct vsub4I_reg(vecX dst, vecX src1, vecX src2) %{ 6447 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6448 match(Set dst (SubVI src1 src2)); 6449 format %{ "vpsubd $dst,$src1,$src2\t! sub packed4I" %} 6450 ins_encode %{ 6451 int vector_len = 0; 6452 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6453 %} 6454 ins_pipe( pipe_slow ); 6455 %} 6456 6457 instruct vsub4I_mem(vecX dst, vecX src, memory mem) %{ 6458 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6459 match(Set dst (SubVI src (LoadVector mem))); 6460 format %{ "vpsubd $dst,$src,$mem\t! sub packed4I" %} 6461 ins_encode %{ 6462 int vector_len = 0; 6463 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6464 %} 6465 ins_pipe( pipe_slow ); 6466 %} 6467 6468 instruct vsub8I_reg(vecY dst, vecY src1, vecY src2) %{ 6469 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 6470 match(Set dst (SubVI src1 src2)); 6471 format %{ "vpsubd $dst,$src1,$src2\t! sub packed8I" %} 6472 ins_encode %{ 6473 int vector_len = 1; 6474 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6475 %} 6476 ins_pipe( pipe_slow ); 6477 %} 6478 6479 instruct vsub8I_mem(vecY dst, vecY src, memory mem) %{ 6480 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 6481 match(Set dst (SubVI src (LoadVector mem))); 6482 format %{ "vpsubd $dst,$src,$mem\t! sub packed8I" %} 6483 ins_encode %{ 6484 int vector_len = 1; 6485 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6486 %} 6487 ins_pipe( pipe_slow ); 6488 %} 6489 6490 instruct vsub16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6491 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6492 match(Set dst (SubVI src1 src2)); 6493 format %{ "vpsubd $dst,$src1,$src2\t! sub packed16I" %} 6494 ins_encode %{ 6495 int vector_len = 2; 6496 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6497 %} 6498 ins_pipe( pipe_slow ); 6499 %} 6500 6501 instruct vsub16I_mem(vecZ dst, vecZ src, memory mem) %{ 6502 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6503 match(Set dst (SubVI src (LoadVector mem))); 6504 format %{ "vpsubd $dst,$src,$mem\t! sub packed16I" %} 6505 ins_encode %{ 6506 int vector_len = 2; 6507 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6508 %} 6509 ins_pipe( pipe_slow ); 6510 %} 6511 6512 // Longs vector sub 6513 instruct vsub2L(vecX dst, vecX src) %{ 6514 predicate(n->as_Vector()->length() == 2); 6515 match(Set dst (SubVL dst src)); 6516 format %{ "psubq $dst,$src\t! sub packed2L" %} 6517 ins_encode %{ 6518 __ psubq($dst$$XMMRegister, $src$$XMMRegister); 6519 %} 6520 ins_pipe( pipe_slow ); 6521 %} 6522 6523 instruct vsub2L_reg(vecX dst, vecX src1, vecX src2) %{ 6524 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6525 match(Set dst (SubVL src1 src2)); 6526 format %{ "vpsubq $dst,$src1,$src2\t! sub packed2L" %} 6527 ins_encode %{ 6528 int vector_len = 0; 6529 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6530 %} 6531 ins_pipe( pipe_slow ); 6532 %} 6533 6534 instruct vsub2L_mem(vecX dst, vecX src, memory mem) %{ 6535 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6536 match(Set dst (SubVL src (LoadVector mem))); 6537 format %{ "vpsubq $dst,$src,$mem\t! sub packed2L" %} 6538 ins_encode %{ 6539 int vector_len = 0; 6540 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6541 %} 6542 ins_pipe( pipe_slow ); 6543 %} 6544 6545 instruct vsub4L_reg(vecY dst, vecY src1, vecY src2) %{ 6546 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 6547 match(Set dst (SubVL src1 src2)); 6548 format %{ "vpsubq $dst,$src1,$src2\t! sub packed4L" %} 6549 ins_encode %{ 6550 int vector_len = 1; 6551 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6552 %} 6553 ins_pipe( pipe_slow ); 6554 %} 6555 6556 instruct vsub4L_mem(vecY dst, vecY src, memory mem) %{ 6557 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 6558 match(Set dst (SubVL src (LoadVector mem))); 6559 format %{ "vpsubq $dst,$src,$mem\t! sub packed4L" %} 6560 ins_encode %{ 6561 int vector_len = 1; 6562 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6563 %} 6564 ins_pipe( pipe_slow ); 6565 %} 6566 6567 instruct vsub8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6568 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6569 match(Set dst (SubVL src1 src2)); 6570 format %{ "vpsubq $dst,$src1,$src2\t! sub packed8L" %} 6571 ins_encode %{ 6572 int vector_len = 2; 6573 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6574 %} 6575 ins_pipe( pipe_slow ); 6576 %} 6577 6578 instruct vsub8L_mem(vecZ dst, vecZ src, memory mem) %{ 6579 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6580 match(Set dst (SubVL src (LoadVector mem))); 6581 format %{ "vpsubq $dst,$src,$mem\t! sub packed8L" %} 6582 ins_encode %{ 6583 int vector_len = 2; 6584 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6585 %} 6586 ins_pipe( pipe_slow ); 6587 %} 6588 6589 // Floats vector sub 6590 instruct vsub2F(vecD dst, vecD src) %{ 6591 predicate(n->as_Vector()->length() == 2); 6592 match(Set dst (SubVF dst src)); 6593 format %{ "subps $dst,$src\t! sub packed2F" %} 6594 ins_encode %{ 6595 __ subps($dst$$XMMRegister, $src$$XMMRegister); 6596 %} 6597 ins_pipe( pipe_slow ); 6598 %} 6599 6600 instruct vsub2F_reg(vecD dst, vecD src1, vecD src2) %{ 6601 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6602 match(Set dst (SubVF src1 src2)); 6603 format %{ "vsubps $dst,$src1,$src2\t! sub packed2F" %} 6604 ins_encode %{ 6605 int vector_len = 0; 6606 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6607 %} 6608 ins_pipe( pipe_slow ); 6609 %} 6610 6611 instruct vsub2F_mem(vecD dst, vecD src, memory mem) %{ 6612 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6613 match(Set dst (SubVF src (LoadVector mem))); 6614 format %{ "vsubps $dst,$src,$mem\t! sub packed2F" %} 6615 ins_encode %{ 6616 int vector_len = 0; 6617 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6618 %} 6619 ins_pipe( pipe_slow ); 6620 %} 6621 6622 instruct vsub4F(vecX dst, vecX src) %{ 6623 predicate(n->as_Vector()->length() == 4); 6624 match(Set dst (SubVF dst src)); 6625 format %{ "subps $dst,$src\t! sub packed4F" %} 6626 ins_encode %{ 6627 __ subps($dst$$XMMRegister, $src$$XMMRegister); 6628 %} 6629 ins_pipe( pipe_slow ); 6630 %} 6631 6632 instruct vsub4F_reg(vecX dst, vecX src1, vecX src2) %{ 6633 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6634 match(Set dst (SubVF src1 src2)); 6635 format %{ "vsubps $dst,$src1,$src2\t! sub packed4F" %} 6636 ins_encode %{ 6637 int vector_len = 0; 6638 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6639 %} 6640 ins_pipe( pipe_slow ); 6641 %} 6642 6643 instruct vsub4F_mem(vecX dst, vecX src, memory mem) %{ 6644 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6645 match(Set dst (SubVF src (LoadVector mem))); 6646 format %{ "vsubps $dst,$src,$mem\t! sub packed4F" %} 6647 ins_encode %{ 6648 int vector_len = 0; 6649 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6650 %} 6651 ins_pipe( pipe_slow ); 6652 %} 6653 6654 instruct vsub8F_reg(vecY dst, vecY src1, vecY src2) %{ 6655 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6656 match(Set dst (SubVF src1 src2)); 6657 format %{ "vsubps $dst,$src1,$src2\t! sub packed8F" %} 6658 ins_encode %{ 6659 int vector_len = 1; 6660 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6661 %} 6662 ins_pipe( pipe_slow ); 6663 %} 6664 6665 instruct vsub8F_mem(vecY dst, vecY src, memory mem) %{ 6666 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6667 match(Set dst (SubVF src (LoadVector mem))); 6668 format %{ "vsubps $dst,$src,$mem\t! sub packed8F" %} 6669 ins_encode %{ 6670 int vector_len = 1; 6671 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6672 %} 6673 ins_pipe( pipe_slow ); 6674 %} 6675 6676 instruct vsub16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6677 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6678 match(Set dst (SubVF src1 src2)); 6679 format %{ "vsubps $dst,$src1,$src2\t! sub packed16F" %} 6680 ins_encode %{ 6681 int vector_len = 2; 6682 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6683 %} 6684 ins_pipe( pipe_slow ); 6685 %} 6686 6687 instruct vsub16F_mem(vecZ dst, vecZ src, memory mem) %{ 6688 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6689 match(Set dst (SubVF src (LoadVector mem))); 6690 format %{ "vsubps $dst,$src,$mem\t! sub packed16F" %} 6691 ins_encode %{ 6692 int vector_len = 2; 6693 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6694 %} 6695 ins_pipe( pipe_slow ); 6696 %} 6697 6698 // Doubles vector sub 6699 instruct vsub2D(vecX dst, vecX src) %{ 6700 predicate(n->as_Vector()->length() == 2); 6701 match(Set dst (SubVD dst src)); 6702 format %{ "subpd $dst,$src\t! sub packed2D" %} 6703 ins_encode %{ 6704 __ subpd($dst$$XMMRegister, $src$$XMMRegister); 6705 %} 6706 ins_pipe( pipe_slow ); 6707 %} 6708 6709 instruct vsub2D_reg(vecX dst, vecX src1, vecX src2) %{ 6710 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6711 match(Set dst (SubVD src1 src2)); 6712 format %{ "vsubpd $dst,$src1,$src2\t! sub packed2D" %} 6713 ins_encode %{ 6714 int vector_len = 0; 6715 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6716 %} 6717 ins_pipe( pipe_slow ); 6718 %} 6719 6720 instruct vsub2D_mem(vecX dst, vecX src, memory mem) %{ 6721 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6722 match(Set dst (SubVD src (LoadVector mem))); 6723 format %{ "vsubpd $dst,$src,$mem\t! sub packed2D" %} 6724 ins_encode %{ 6725 int vector_len = 0; 6726 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6727 %} 6728 ins_pipe( pipe_slow ); 6729 %} 6730 6731 instruct vsub4D_reg(vecY dst, vecY src1, vecY src2) %{ 6732 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6733 match(Set dst (SubVD src1 src2)); 6734 format %{ "vsubpd $dst,$src1,$src2\t! sub packed4D" %} 6735 ins_encode %{ 6736 int vector_len = 1; 6737 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6738 %} 6739 ins_pipe( pipe_slow ); 6740 %} 6741 6742 instruct vsub4D_mem(vecY dst, vecY src, memory mem) %{ 6743 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6744 match(Set dst (SubVD src (LoadVector mem))); 6745 format %{ "vsubpd $dst,$src,$mem\t! sub packed4D" %} 6746 ins_encode %{ 6747 int vector_len = 1; 6748 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6749 %} 6750 ins_pipe( pipe_slow ); 6751 %} 6752 6753 instruct vsub8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6754 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6755 match(Set dst (SubVD src1 src2)); 6756 format %{ "vsubpd $dst,$src1,$src2\t! sub packed8D" %} 6757 ins_encode %{ 6758 int vector_len = 2; 6759 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6760 %} 6761 ins_pipe( pipe_slow ); 6762 %} 6763 6764 instruct vsub8D_mem(vecZ dst, vecZ src, memory mem) %{ 6765 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6766 match(Set dst (SubVD src (LoadVector mem))); 6767 format %{ "vsubpd $dst,$src,$mem\t! sub packed8D" %} 6768 ins_encode %{ 6769 int vector_len = 2; 6770 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6771 %} 6772 ins_pipe( pipe_slow ); 6773 %} 6774 6775 // --------------------------------- MUL -------------------------------------- 6776 6777 // Shorts/Chars vector mul 6778 instruct vmul2S(vecS dst, vecS src) %{ 6779 predicate(n->as_Vector()->length() == 2); 6780 match(Set dst (MulVS dst src)); 6781 format %{ "pmullw $dst,$src\t! mul packed2S" %} 6782 ins_encode %{ 6783 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 6784 %} 6785 ins_pipe( pipe_slow ); 6786 %} 6787 6788 instruct vmul2S_reg(vecS dst, vecS src1, vecS src2) %{ 6789 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6790 match(Set dst (MulVS src1 src2)); 6791 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} 6792 ins_encode %{ 6793 int vector_len = 0; 6794 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6795 %} 6796 ins_pipe( pipe_slow ); 6797 %} 6798 6799 instruct vmul2S_mem(vecS dst, vecS src, memory mem) %{ 6800 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6801 match(Set dst (MulVS src (LoadVector mem))); 6802 format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %} 6803 ins_encode %{ 6804 int vector_len = 0; 6805 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6806 %} 6807 ins_pipe( pipe_slow ); 6808 %} 6809 6810 instruct vmul4S(vecD dst, vecD src) %{ 6811 predicate(n->as_Vector()->length() == 4); 6812 match(Set dst (MulVS dst src)); 6813 format %{ "pmullw $dst,$src\t! mul packed4S" %} 6814 ins_encode %{ 6815 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 6816 %} 6817 ins_pipe( pipe_slow ); 6818 %} 6819 6820 instruct vmul4S_reg(vecD dst, vecD src1, vecD src2) %{ 6821 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6822 match(Set dst (MulVS src1 src2)); 6823 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} 6824 ins_encode %{ 6825 int vector_len = 0; 6826 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6827 %} 6828 ins_pipe( pipe_slow ); 6829 %} 6830 6831 instruct vmul4S_mem(vecD dst, vecD src, memory mem) %{ 6832 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6833 match(Set dst (MulVS src (LoadVector mem))); 6834 format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %} 6835 ins_encode %{ 6836 int vector_len = 0; 6837 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6838 %} 6839 ins_pipe( pipe_slow ); 6840 %} 6841 6842 instruct vmul8S(vecX dst, vecX src) %{ 6843 predicate(n->as_Vector()->length() == 8); 6844 match(Set dst (MulVS dst src)); 6845 format %{ "pmullw $dst,$src\t! mul packed8S" %} 6846 ins_encode %{ 6847 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 6848 %} 6849 ins_pipe( pipe_slow ); 6850 %} 6851 6852 instruct vmul8S_reg(vecX dst, vecX src1, vecX src2) %{ 6853 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6854 match(Set dst (MulVS src1 src2)); 6855 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} 6856 ins_encode %{ 6857 int vector_len = 0; 6858 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6859 %} 6860 ins_pipe( pipe_slow ); 6861 %} 6862 6863 instruct vmul8S_mem(vecX dst, vecX src, memory mem) %{ 6864 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6865 match(Set dst (MulVS src (LoadVector mem))); 6866 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} 6867 ins_encode %{ 6868 int vector_len = 0; 6869 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6870 %} 6871 ins_pipe( pipe_slow ); 6872 %} 6873 6874 instruct vmul16S_reg(vecY dst, vecY src1, vecY src2) %{ 6875 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 6876 match(Set dst (MulVS src1 src2)); 6877 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} 6878 ins_encode %{ 6879 int vector_len = 1; 6880 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6881 %} 6882 ins_pipe( pipe_slow ); 6883 %} 6884 6885 instruct vmul16S_mem(vecY dst, vecY src, memory mem) %{ 6886 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 6887 match(Set dst (MulVS src (LoadVector mem))); 6888 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} 6889 ins_encode %{ 6890 int vector_len = 1; 6891 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6892 %} 6893 ins_pipe( pipe_slow ); 6894 %} 6895 6896 instruct vmul32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6897 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); 6898 match(Set dst (MulVS src1 src2)); 6899 format %{ "vpmullw $dst,$src1,$src2\t! mul packed32S" %} 6900 ins_encode %{ 6901 int vector_len = 2; 6902 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6903 %} 6904 ins_pipe( pipe_slow ); 6905 %} 6906 6907 instruct vmul32S_mem(vecZ dst, vecZ src, memory mem) %{ 6908 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); 6909 match(Set dst (MulVS src (LoadVector mem))); 6910 format %{ "vpmullw $dst,$src,$mem\t! mul packed32S" %} 6911 ins_encode %{ 6912 int vector_len = 2; 6913 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6914 %} 6915 ins_pipe( pipe_slow ); 6916 %} 6917 6918 // Integers vector mul (sse4_1) 6919 instruct vmul2I(vecD dst, vecD src) %{ 6920 predicate(UseSSE > 3 && n->as_Vector()->length() == 2); 6921 match(Set dst (MulVI dst src)); 6922 format %{ "pmulld $dst,$src\t! mul packed2I" %} 6923 ins_encode %{ 6924 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 6925 %} 6926 ins_pipe( pipe_slow ); 6927 %} 6928 6929 instruct vmul2I_reg(vecD dst, vecD src1, vecD src2) %{ 6930 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6931 match(Set dst (MulVI src1 src2)); 6932 format %{ "vpmulld $dst,$src1,$src2\t! mul packed2I" %} 6933 ins_encode %{ 6934 int vector_len = 0; 6935 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6936 %} 6937 ins_pipe( pipe_slow ); 6938 %} 6939 6940 instruct vmul2I_mem(vecD dst, vecD src, memory mem) %{ 6941 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6942 match(Set dst (MulVI src (LoadVector mem))); 6943 format %{ "vpmulld $dst,$src,$mem\t! mul packed2I" %} 6944 ins_encode %{ 6945 int vector_len = 0; 6946 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6947 %} 6948 ins_pipe( pipe_slow ); 6949 %} 6950 6951 instruct vmul4I(vecX dst, vecX src) %{ 6952 predicate(UseSSE > 3 && n->as_Vector()->length() == 4); 6953 match(Set dst (MulVI dst src)); 6954 format %{ "pmulld $dst,$src\t! mul packed4I" %} 6955 ins_encode %{ 6956 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 6957 %} 6958 ins_pipe( pipe_slow ); 6959 %} 6960 6961 instruct vmul4I_reg(vecX dst, vecX src1, vecX src2) %{ 6962 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6963 match(Set dst (MulVI src1 src2)); 6964 format %{ "vpmulld $dst,$src1,$src2\t! mul packed4I" %} 6965 ins_encode %{ 6966 int vector_len = 0; 6967 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6968 %} 6969 ins_pipe( pipe_slow ); 6970 %} 6971 6972 instruct vmul4I_mem(vecX dst, vecX src, memory mem) %{ 6973 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6974 match(Set dst (MulVI src (LoadVector mem))); 6975 format %{ "vpmulld $dst,$src,$mem\t! mul packed4I" %} 6976 ins_encode %{ 6977 int vector_len = 0; 6978 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6979 %} 6980 ins_pipe( pipe_slow ); 6981 %} 6982 6983 instruct vmul2L_reg(vecX dst, vecX src1, vecX src2) %{ 6984 predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && VM_Version::supports_avx512dq()); 6985 match(Set dst (MulVL src1 src2)); 6986 format %{ "vpmullq $dst,$src1,$src2\t! mul packed2L" %} 6987 ins_encode %{ 6988 int vector_len = 0; 6989 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6990 %} 6991 ins_pipe( pipe_slow ); 6992 %} 6993 6994 instruct vmul2L_mem(vecX dst, vecX src, memory mem) %{ 6995 predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && VM_Version::supports_avx512dq()); 6996 match(Set dst (MulVL src (LoadVector mem))); 6997 format %{ "vpmullq $dst,$src,$mem\t! mul packed2L" %} 6998 ins_encode %{ 6999 int vector_len = 0; 7000 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7001 %} 7002 ins_pipe( pipe_slow ); 7003 %} 7004 7005 instruct vmul4L_reg(vecY dst, vecY src1, vecY src2) %{ 7006 predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && VM_Version::supports_avx512dq()); 7007 match(Set dst (MulVL src1 src2)); 7008 format %{ "vpmullq $dst,$src1,$src2\t! mul packed4L" %} 7009 ins_encode %{ 7010 int vector_len = 1; 7011 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7012 %} 7013 ins_pipe( pipe_slow ); 7014 %} 7015 7016 instruct vmul4L_mem(vecY dst, vecY src, memory mem) %{ 7017 predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && VM_Version::supports_avx512dq()); 7018 match(Set dst (MulVL src (LoadVector mem))); 7019 format %{ "vpmullq $dst,$src,$mem\t! mul packed4L" %} 7020 ins_encode %{ 7021 int vector_len = 1; 7022 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7023 %} 7024 ins_pipe( pipe_slow ); 7025 %} 7026 7027 instruct vmul8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7028 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq()); 7029 match(Set dst (MulVL src1 src2)); 7030 format %{ "vpmullq $dst,$src1,$src2\t! mul packed8L" %} 7031 ins_encode %{ 7032 int vector_len = 2; 7033 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7034 %} 7035 ins_pipe( pipe_slow ); 7036 %} 7037 7038 instruct vmul8L_mem(vecZ dst, vecZ src, memory mem) %{ 7039 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq()); 7040 match(Set dst (MulVL src (LoadVector mem))); 7041 format %{ "vpmullq $dst,$src,$mem\t! mul packed8L" %} 7042 ins_encode %{ 7043 int vector_len = 2; 7044 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7045 %} 7046 ins_pipe( pipe_slow ); 7047 %} 7048 7049 instruct vmul8I_reg(vecY dst, vecY src1, vecY src2) %{ 7050 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 7051 match(Set dst (MulVI src1 src2)); 7052 format %{ "vpmulld $dst,$src1,$src2\t! mul packed8I" %} 7053 ins_encode %{ 7054 int vector_len = 1; 7055 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7056 %} 7057 ins_pipe( pipe_slow ); 7058 %} 7059 7060 instruct vmul8I_mem(vecY dst, vecY src, memory mem) %{ 7061 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 7062 match(Set dst (MulVI src (LoadVector mem))); 7063 format %{ "vpmulld $dst,$src,$mem\t! mul packed8I" %} 7064 ins_encode %{ 7065 int vector_len = 1; 7066 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7067 %} 7068 ins_pipe( pipe_slow ); 7069 %} 7070 7071 instruct vmul16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7072 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7073 match(Set dst (MulVI src1 src2)); 7074 format %{ "vpmulld $dst,$src1,$src2\t! mul packed16I" %} 7075 ins_encode %{ 7076 int vector_len = 2; 7077 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7078 %} 7079 ins_pipe( pipe_slow ); 7080 %} 7081 7082 instruct vmul16I_mem(vecZ dst, vecZ src, memory mem) %{ 7083 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7084 match(Set dst (MulVI src (LoadVector mem))); 7085 format %{ "vpmulld $dst,$src,$mem\t! mul packed16I" %} 7086 ins_encode %{ 7087 int vector_len = 2; 7088 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7089 %} 7090 ins_pipe( pipe_slow ); 7091 %} 7092 7093 // Floats vector mul 7094 instruct vmul2F(vecD dst, vecD src) %{ 7095 predicate(n->as_Vector()->length() == 2); 7096 match(Set dst (MulVF dst src)); 7097 format %{ "mulps $dst,$src\t! mul packed2F" %} 7098 ins_encode %{ 7099 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 7100 %} 7101 ins_pipe( pipe_slow ); 7102 %} 7103 7104 instruct vmul2F_reg(vecD dst, vecD src1, vecD src2) %{ 7105 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7106 match(Set dst (MulVF src1 src2)); 7107 format %{ "vmulps $dst,$src1,$src2\t! mul packed2F" %} 7108 ins_encode %{ 7109 int vector_len = 0; 7110 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7111 %} 7112 ins_pipe( pipe_slow ); 7113 %} 7114 7115 instruct vmul2F_mem(vecD dst, vecD src, memory mem) %{ 7116 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7117 match(Set dst (MulVF src (LoadVector mem))); 7118 format %{ "vmulps $dst,$src,$mem\t! mul packed2F" %} 7119 ins_encode %{ 7120 int vector_len = 0; 7121 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7122 %} 7123 ins_pipe( pipe_slow ); 7124 %} 7125 7126 instruct vmul4F(vecX dst, vecX src) %{ 7127 predicate(n->as_Vector()->length() == 4); 7128 match(Set dst (MulVF dst src)); 7129 format %{ "mulps $dst,$src\t! mul packed4F" %} 7130 ins_encode %{ 7131 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 7132 %} 7133 ins_pipe( pipe_slow ); 7134 %} 7135 7136 instruct vmul4F_reg(vecX dst, vecX src1, vecX src2) %{ 7137 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7138 match(Set dst (MulVF src1 src2)); 7139 format %{ "vmulps $dst,$src1,$src2\t! mul packed4F" %} 7140 ins_encode %{ 7141 int vector_len = 0; 7142 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7143 %} 7144 ins_pipe( pipe_slow ); 7145 %} 7146 7147 instruct vmul4F_mem(vecX dst, vecX src, memory mem) %{ 7148 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7149 match(Set dst (MulVF src (LoadVector mem))); 7150 format %{ "vmulps $dst,$src,$mem\t! mul packed4F" %} 7151 ins_encode %{ 7152 int vector_len = 0; 7153 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7154 %} 7155 ins_pipe( pipe_slow ); 7156 %} 7157 7158 instruct vmul8F_reg(vecY dst, vecY src1, vecY src2) %{ 7159 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7160 match(Set dst (MulVF src1 src2)); 7161 format %{ "vmulps $dst,$src1,$src2\t! mul packed8F" %} 7162 ins_encode %{ 7163 int vector_len = 1; 7164 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7165 %} 7166 ins_pipe( pipe_slow ); 7167 %} 7168 7169 instruct vmul8F_mem(vecY dst, vecY src, memory mem) %{ 7170 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7171 match(Set dst (MulVF src (LoadVector mem))); 7172 format %{ "vmulps $dst,$src,$mem\t! mul packed8F" %} 7173 ins_encode %{ 7174 int vector_len = 1; 7175 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7176 %} 7177 ins_pipe( pipe_slow ); 7178 %} 7179 7180 instruct vmul16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7181 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7182 match(Set dst (MulVF src1 src2)); 7183 format %{ "vmulps $dst,$src1,$src2\t! mul packed16F" %} 7184 ins_encode %{ 7185 int vector_len = 2; 7186 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7187 %} 7188 ins_pipe( pipe_slow ); 7189 %} 7190 7191 instruct vmul16F_mem(vecZ dst, vecZ src, memory mem) %{ 7192 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7193 match(Set dst (MulVF src (LoadVector mem))); 7194 format %{ "vmulps $dst,$src,$mem\t! mul packed16F" %} 7195 ins_encode %{ 7196 int vector_len = 2; 7197 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7198 %} 7199 ins_pipe( pipe_slow ); 7200 %} 7201 7202 // Doubles vector mul 7203 instruct vmul2D(vecX dst, vecX src) %{ 7204 predicate(n->as_Vector()->length() == 2); 7205 match(Set dst (MulVD dst src)); 7206 format %{ "mulpd $dst,$src\t! mul packed2D" %} 7207 ins_encode %{ 7208 __ mulpd($dst$$XMMRegister, $src$$XMMRegister); 7209 %} 7210 ins_pipe( pipe_slow ); 7211 %} 7212 7213 instruct vmul2D_reg(vecX dst, vecX src1, vecX src2) %{ 7214 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7215 match(Set dst (MulVD src1 src2)); 7216 format %{ "vmulpd $dst,$src1,$src2\t! mul packed2D" %} 7217 ins_encode %{ 7218 int vector_len = 0; 7219 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7220 %} 7221 ins_pipe( pipe_slow ); 7222 %} 7223 7224 instruct vmul2D_mem(vecX dst, vecX src, memory mem) %{ 7225 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7226 match(Set dst (MulVD src (LoadVector mem))); 7227 format %{ "vmulpd $dst,$src,$mem\t! mul packed2D" %} 7228 ins_encode %{ 7229 int vector_len = 0; 7230 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7231 %} 7232 ins_pipe( pipe_slow ); 7233 %} 7234 7235 instruct vmul4D_reg(vecY dst, vecY src1, vecY src2) %{ 7236 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7237 match(Set dst (MulVD src1 src2)); 7238 format %{ "vmulpd $dst,$src1,$src2\t! mul packed4D" %} 7239 ins_encode %{ 7240 int vector_len = 1; 7241 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7242 %} 7243 ins_pipe( pipe_slow ); 7244 %} 7245 7246 instruct vmul4D_mem(vecY dst, vecY src, memory mem) %{ 7247 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7248 match(Set dst (MulVD src (LoadVector mem))); 7249 format %{ "vmulpd $dst,$src,$mem\t! mul packed4D" %} 7250 ins_encode %{ 7251 int vector_len = 1; 7252 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7253 %} 7254 ins_pipe( pipe_slow ); 7255 %} 7256 7257 instruct vmul8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7258 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7259 match(Set dst (MulVD src1 src2)); 7260 format %{ "vmulpd $dst k0,$src1,$src2\t! mul packed8D" %} 7261 ins_encode %{ 7262 int vector_len = 2; 7263 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7264 %} 7265 ins_pipe( pipe_slow ); 7266 %} 7267 7268 instruct vmul8D_mem(vecZ dst, vecZ src, memory mem) %{ 7269 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7270 match(Set dst (MulVD src (LoadVector mem))); 7271 format %{ "vmulpd $dst k0,$src,$mem\t! mul packed8D" %} 7272 ins_encode %{ 7273 int vector_len = 2; 7274 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7275 %} 7276 ins_pipe( pipe_slow ); 7277 %} 7278 7279 // --------------------------------- DIV -------------------------------------- 7280 7281 // Floats vector div 7282 instruct vdiv2F(vecD dst, vecD src) %{ 7283 predicate(n->as_Vector()->length() == 2); 7284 match(Set dst (DivVF dst src)); 7285 format %{ "divps $dst,$src\t! div packed2F" %} 7286 ins_encode %{ 7287 __ divps($dst$$XMMRegister, $src$$XMMRegister); 7288 %} 7289 ins_pipe( pipe_slow ); 7290 %} 7291 7292 instruct vdiv2F_reg(vecD dst, vecD src1, vecD src2) %{ 7293 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7294 match(Set dst (DivVF src1 src2)); 7295 format %{ "vdivps $dst,$src1,$src2\t! div packed2F" %} 7296 ins_encode %{ 7297 int vector_len = 0; 7298 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7299 %} 7300 ins_pipe( pipe_slow ); 7301 %} 7302 7303 instruct vdiv2F_mem(vecD dst, vecD src, memory mem) %{ 7304 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7305 match(Set dst (DivVF src (LoadVector mem))); 7306 format %{ "vdivps $dst,$src,$mem\t! div packed2F" %} 7307 ins_encode %{ 7308 int vector_len = 0; 7309 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7310 %} 7311 ins_pipe( pipe_slow ); 7312 %} 7313 7314 instruct vdiv4F(vecX dst, vecX src) %{ 7315 predicate(n->as_Vector()->length() == 4); 7316 match(Set dst (DivVF dst src)); 7317 format %{ "divps $dst,$src\t! div packed4F" %} 7318 ins_encode %{ 7319 __ divps($dst$$XMMRegister, $src$$XMMRegister); 7320 %} 7321 ins_pipe( pipe_slow ); 7322 %} 7323 7324 instruct vdiv4F_reg(vecX dst, vecX src1, vecX src2) %{ 7325 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7326 match(Set dst (DivVF src1 src2)); 7327 format %{ "vdivps $dst,$src1,$src2\t! div packed4F" %} 7328 ins_encode %{ 7329 int vector_len = 0; 7330 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7331 %} 7332 ins_pipe( pipe_slow ); 7333 %} 7334 7335 instruct vdiv4F_mem(vecX dst, vecX src, memory mem) %{ 7336 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7337 match(Set dst (DivVF src (LoadVector mem))); 7338 format %{ "vdivps $dst,$src,$mem\t! div packed4F" %} 7339 ins_encode %{ 7340 int vector_len = 0; 7341 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7342 %} 7343 ins_pipe( pipe_slow ); 7344 %} 7345 7346 instruct vdiv8F_reg(vecY dst, vecY src1, vecY src2) %{ 7347 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7348 match(Set dst (DivVF src1 src2)); 7349 format %{ "vdivps $dst,$src1,$src2\t! div packed8F" %} 7350 ins_encode %{ 7351 int vector_len = 1; 7352 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7353 %} 7354 ins_pipe( pipe_slow ); 7355 %} 7356 7357 instruct vdiv8F_mem(vecY dst, vecY src, memory mem) %{ 7358 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7359 match(Set dst (DivVF src (LoadVector mem))); 7360 format %{ "vdivps $dst,$src,$mem\t! div packed8F" %} 7361 ins_encode %{ 7362 int vector_len = 1; 7363 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7364 %} 7365 ins_pipe( pipe_slow ); 7366 %} 7367 7368 instruct vdiv16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7369 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 7370 match(Set dst (DivVF src1 src2)); 7371 format %{ "vdivps $dst,$src1,$src2\t! div packed16F" %} 7372 ins_encode %{ 7373 int vector_len = 2; 7374 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7375 %} 7376 ins_pipe( pipe_slow ); 7377 %} 7378 7379 instruct vdiv16F_mem(vecZ dst, vecZ src, memory mem) %{ 7380 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 7381 match(Set dst (DivVF src (LoadVector mem))); 7382 format %{ "vdivps $dst,$src,$mem\t! div packed16F" %} 7383 ins_encode %{ 7384 int vector_len = 2; 7385 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7386 %} 7387 ins_pipe( pipe_slow ); 7388 %} 7389 7390 // Doubles vector div 7391 instruct vdiv2D(vecX dst, vecX src) %{ 7392 predicate(n->as_Vector()->length() == 2); 7393 match(Set dst (DivVD dst src)); 7394 format %{ "divpd $dst,$src\t! div packed2D" %} 7395 ins_encode %{ 7396 __ divpd($dst$$XMMRegister, $src$$XMMRegister); 7397 %} 7398 ins_pipe( pipe_slow ); 7399 %} 7400 7401 instruct vdiv2D_reg(vecX dst, vecX src1, vecX src2) %{ 7402 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7403 match(Set dst (DivVD src1 src2)); 7404 format %{ "vdivpd $dst,$src1,$src2\t! div packed2D" %} 7405 ins_encode %{ 7406 int vector_len = 0; 7407 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7408 %} 7409 ins_pipe( pipe_slow ); 7410 %} 7411 7412 instruct vdiv2D_mem(vecX dst, vecX src, memory mem) %{ 7413 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7414 match(Set dst (DivVD src (LoadVector mem))); 7415 format %{ "vdivpd $dst,$src,$mem\t! div packed2D" %} 7416 ins_encode %{ 7417 int vector_len = 0; 7418 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7419 %} 7420 ins_pipe( pipe_slow ); 7421 %} 7422 7423 instruct vdiv4D_reg(vecY dst, vecY src1, vecY src2) %{ 7424 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7425 match(Set dst (DivVD src1 src2)); 7426 format %{ "vdivpd $dst,$src1,$src2\t! div packed4D" %} 7427 ins_encode %{ 7428 int vector_len = 1; 7429 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7430 %} 7431 ins_pipe( pipe_slow ); 7432 %} 7433 7434 instruct vdiv4D_mem(vecY dst, vecY src, memory mem) %{ 7435 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7436 match(Set dst (DivVD src (LoadVector mem))); 7437 format %{ "vdivpd $dst,$src,$mem\t! div packed4D" %} 7438 ins_encode %{ 7439 int vector_len = 1; 7440 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7441 %} 7442 ins_pipe( pipe_slow ); 7443 %} 7444 7445 instruct vdiv8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7446 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7447 match(Set dst (DivVD src1 src2)); 7448 format %{ "vdivpd $dst,$src1,$src2\t! div packed8D" %} 7449 ins_encode %{ 7450 int vector_len = 2; 7451 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7452 %} 7453 ins_pipe( pipe_slow ); 7454 %} 7455 7456 instruct vdiv8D_mem(vecZ dst, vecZ src, memory mem) %{ 7457 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7458 match(Set dst (DivVD src (LoadVector mem))); 7459 format %{ "vdivpd $dst,$src,$mem\t! div packed8D" %} 7460 ins_encode %{ 7461 int vector_len = 2; 7462 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7463 %} 7464 ins_pipe( pipe_slow ); 7465 %} 7466 7467 // ------------------------------ Shift --------------------------------------- 7468 7469 // Left and right shift count vectors are the same on x86 7470 // (only lowest bits of xmm reg are used for count). 7471 instruct vshiftcnt(vecS dst, rRegI cnt) %{ 7472 match(Set dst (LShiftCntV cnt)); 7473 match(Set dst (RShiftCntV cnt)); 7474 format %{ "movd $dst,$cnt\t! load shift count" %} 7475 ins_encode %{ 7476 __ movdl($dst$$XMMRegister, $cnt$$Register); 7477 %} 7478 ins_pipe( pipe_slow ); 7479 %} 7480 7481 // --------------------------------- Sqrt -------------------------------------- 7482 7483 // Floating point vector sqrt - double precision only 7484 instruct vsqrt2D_reg(vecX dst, vecX src) %{ 7485 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7486 match(Set dst (SqrtVD src)); 7487 format %{ "vsqrtpd $dst,$src\t! sqrt packed2D" %} 7488 ins_encode %{ 7489 int vector_len = 0; 7490 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 7491 %} 7492 ins_pipe( pipe_slow ); 7493 %} 7494 7495 instruct vsqrt2D_mem(vecX dst, memory mem) %{ 7496 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7497 match(Set dst (SqrtVD (LoadVector mem))); 7498 format %{ "vsqrtpd $dst,$mem\t! sqrt packed2D" %} 7499 ins_encode %{ 7500 int vector_len = 0; 7501 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); 7502 %} 7503 ins_pipe( pipe_slow ); 7504 %} 7505 7506 instruct vsqrt4D_reg(vecY dst, vecY src) %{ 7507 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7508 match(Set dst (SqrtVD src)); 7509 format %{ "vsqrtpd $dst,$src\t! sqrt packed4D" %} 7510 ins_encode %{ 7511 int vector_len = 1; 7512 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 7513 %} 7514 ins_pipe( pipe_slow ); 7515 %} 7516 7517 instruct vsqrt4D_mem(vecY dst, memory mem) %{ 7518 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7519 match(Set dst (SqrtVD (LoadVector mem))); 7520 format %{ "vsqrtpd $dst,$mem\t! sqrt packed4D" %} 7521 ins_encode %{ 7522 int vector_len = 1; 7523 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); 7524 %} 7525 ins_pipe( pipe_slow ); 7526 %} 7527 7528 instruct vsqrt8D_reg(vecZ dst, vecZ src) %{ 7529 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7530 match(Set dst (SqrtVD src)); 7531 format %{ "vsqrtpd $dst,$src\t! sqrt packed8D" %} 7532 ins_encode %{ 7533 int vector_len = 2; 7534 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 7535 %} 7536 ins_pipe( pipe_slow ); 7537 %} 7538 7539 instruct vsqrt8D_mem(vecZ dst, memory mem) %{ 7540 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7541 match(Set dst (SqrtVD (LoadVector mem))); 7542 format %{ "vsqrtpd $dst,$mem\t! sqrt packed8D" %} 7543 ins_encode %{ 7544 int vector_len = 2; 7545 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); 7546 %} 7547 ins_pipe( pipe_slow ); 7548 %} 7549 7550 // ------------------------------ LeftShift ----------------------------------- 7551 7552 // Shorts/Chars vector left shift 7553 instruct vsll2S(vecS dst, vecS shift) %{ 7554 predicate(n->as_Vector()->length() == 2); 7555 match(Set dst (LShiftVS dst shift)); 7556 format %{ "psllw $dst,$shift\t! left shift packed2S" %} 7557 ins_encode %{ 7558 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 7559 %} 7560 ins_pipe( pipe_slow ); 7561 %} 7562 7563 instruct vsll2S_imm(vecS dst, immI8 shift) %{ 7564 predicate(n->as_Vector()->length() == 2); 7565 match(Set dst (LShiftVS dst shift)); 7566 format %{ "psllw $dst,$shift\t! left shift packed2S" %} 7567 ins_encode %{ 7568 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 7569 %} 7570 ins_pipe( pipe_slow ); 7571 %} 7572 7573 instruct vsll2S_reg(vecS dst, vecS src, vecS shift) %{ 7574 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7575 match(Set dst (LShiftVS src shift)); 7576 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 7577 ins_encode %{ 7578 int vector_len = 0; 7579 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7580 %} 7581 ins_pipe( pipe_slow ); 7582 %} 7583 7584 instruct vsll2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ 7585 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7586 match(Set dst (LShiftVS src shift)); 7587 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 7588 ins_encode %{ 7589 int vector_len = 0; 7590 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7591 %} 7592 ins_pipe( pipe_slow ); 7593 %} 7594 7595 instruct vsll4S(vecD dst, vecS shift) %{ 7596 predicate(n->as_Vector()->length() == 4); 7597 match(Set dst (LShiftVS dst shift)); 7598 format %{ "psllw $dst,$shift\t! left shift packed4S" %} 7599 ins_encode %{ 7600 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 7601 %} 7602 ins_pipe( pipe_slow ); 7603 %} 7604 7605 instruct vsll4S_imm(vecD dst, immI8 shift) %{ 7606 predicate(n->as_Vector()->length() == 4); 7607 match(Set dst (LShiftVS dst shift)); 7608 format %{ "psllw $dst,$shift\t! left shift packed4S" %} 7609 ins_encode %{ 7610 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 7611 %} 7612 ins_pipe( pipe_slow ); 7613 %} 7614 7615 instruct vsll4S_reg(vecD dst, vecD src, vecS shift) %{ 7616 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7617 match(Set dst (LShiftVS src shift)); 7618 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 7619 ins_encode %{ 7620 int vector_len = 0; 7621 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7622 %} 7623 ins_pipe( pipe_slow ); 7624 %} 7625 7626 instruct vsll4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ 7627 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7628 match(Set dst (LShiftVS src shift)); 7629 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 7630 ins_encode %{ 7631 int vector_len = 0; 7632 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7633 %} 7634 ins_pipe( pipe_slow ); 7635 %} 7636 7637 instruct vsll8S(vecX dst, vecS shift) %{ 7638 predicate(n->as_Vector()->length() == 8); 7639 match(Set dst (LShiftVS dst shift)); 7640 format %{ "psllw $dst,$shift\t! left shift packed8S" %} 7641 ins_encode %{ 7642 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 7643 %} 7644 ins_pipe( pipe_slow ); 7645 %} 7646 7647 instruct vsll8S_imm(vecX dst, immI8 shift) %{ 7648 predicate(n->as_Vector()->length() == 8); 7649 match(Set dst (LShiftVS dst shift)); 7650 format %{ "psllw $dst,$shift\t! left shift packed8S" %} 7651 ins_encode %{ 7652 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 7653 %} 7654 ins_pipe( pipe_slow ); 7655 %} 7656 7657 instruct vsll8S_reg(vecX dst, vecX src, vecS shift) %{ 7658 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7659 match(Set dst (LShiftVS src shift)); 7660 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 7661 ins_encode %{ 7662 int vector_len = 0; 7663 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7664 %} 7665 ins_pipe( pipe_slow ); 7666 %} 7667 7668 instruct vsll8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ 7669 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7670 match(Set dst (LShiftVS src shift)); 7671 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 7672 ins_encode %{ 7673 int vector_len = 0; 7674 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7675 %} 7676 ins_pipe( pipe_slow ); 7677 %} 7678 7679 instruct vsll16S_reg(vecY dst, vecY src, vecS shift) %{ 7680 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 7681 match(Set dst (LShiftVS src shift)); 7682 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 7683 ins_encode %{ 7684 int vector_len = 1; 7685 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7686 %} 7687 ins_pipe( pipe_slow ); 7688 %} 7689 7690 instruct vsll16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ 7691 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 7692 match(Set dst (LShiftVS src shift)); 7693 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 7694 ins_encode %{ 7695 int vector_len = 1; 7696 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7697 %} 7698 ins_pipe( pipe_slow ); 7699 %} 7700 7701 instruct vsll32S_reg(vecZ dst, vecZ src, vecS shift) %{ 7702 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); 7703 match(Set dst (LShiftVS src shift)); 7704 format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %} 7705 ins_encode %{ 7706 int vector_len = 2; 7707 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7708 %} 7709 ins_pipe( pipe_slow ); 7710 %} 7711 7712 instruct vsll32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 7713 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); 7714 match(Set dst (LShiftVS src shift)); 7715 format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %} 7716 ins_encode %{ 7717 int vector_len = 2; 7718 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7719 %} 7720 ins_pipe( pipe_slow ); 7721 %} 7722 7723 // Integers vector left shift 7724 instruct vsll2I(vecD dst, vecS shift) %{ 7725 predicate(n->as_Vector()->length() == 2); 7726 match(Set dst (LShiftVI dst shift)); 7727 format %{ "pslld $dst,$shift\t! left shift packed2I" %} 7728 ins_encode %{ 7729 __ pslld($dst$$XMMRegister, $shift$$XMMRegister); 7730 %} 7731 ins_pipe( pipe_slow ); 7732 %} 7733 7734 instruct vsll2I_imm(vecD dst, immI8 shift) %{ 7735 predicate(n->as_Vector()->length() == 2); 7736 match(Set dst (LShiftVI dst shift)); 7737 format %{ "pslld $dst,$shift\t! left shift packed2I" %} 7738 ins_encode %{ 7739 __ pslld($dst$$XMMRegister, (int)$shift$$constant); 7740 %} 7741 ins_pipe( pipe_slow ); 7742 %} 7743 7744 instruct vsll2I_reg(vecD dst, vecD src, vecS shift) %{ 7745 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7746 match(Set dst (LShiftVI src shift)); 7747 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} 7748 ins_encode %{ 7749 int vector_len = 0; 7750 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7751 %} 7752 ins_pipe( pipe_slow ); 7753 %} 7754 7755 instruct vsll2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 7756 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7757 match(Set dst (LShiftVI src shift)); 7758 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} 7759 ins_encode %{ 7760 int vector_len = 0; 7761 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7762 %} 7763 ins_pipe( pipe_slow ); 7764 %} 7765 7766 instruct vsll4I(vecX dst, vecS shift) %{ 7767 predicate(n->as_Vector()->length() == 4); 7768 match(Set dst (LShiftVI dst shift)); 7769 format %{ "pslld $dst,$shift\t! left shift packed4I" %} 7770 ins_encode %{ 7771 __ pslld($dst$$XMMRegister, $shift$$XMMRegister); 7772 %} 7773 ins_pipe( pipe_slow ); 7774 %} 7775 7776 instruct vsll4I_imm(vecX dst, immI8 shift) %{ 7777 predicate(n->as_Vector()->length() == 4); 7778 match(Set dst (LShiftVI dst shift)); 7779 format %{ "pslld $dst,$shift\t! left shift packed4I" %} 7780 ins_encode %{ 7781 __ pslld($dst$$XMMRegister, (int)$shift$$constant); 7782 %} 7783 ins_pipe( pipe_slow ); 7784 %} 7785 7786 instruct vsll4I_reg(vecX dst, vecX src, vecS shift) %{ 7787 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7788 match(Set dst (LShiftVI src shift)); 7789 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} 7790 ins_encode %{ 7791 int vector_len = 0; 7792 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7793 %} 7794 ins_pipe( pipe_slow ); 7795 %} 7796 7797 instruct vsll4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 7798 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7799 match(Set dst (LShiftVI src shift)); 7800 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} 7801 ins_encode %{ 7802 int vector_len = 0; 7803 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7804 %} 7805 ins_pipe( pipe_slow ); 7806 %} 7807 7808 instruct vsll8I_reg(vecY dst, vecY src, vecS shift) %{ 7809 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 7810 match(Set dst (LShiftVI src shift)); 7811 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} 7812 ins_encode %{ 7813 int vector_len = 1; 7814 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7815 %} 7816 ins_pipe( pipe_slow ); 7817 %} 7818 7819 instruct vsll8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 7820 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 7821 match(Set dst (LShiftVI src shift)); 7822 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} 7823 ins_encode %{ 7824 int vector_len = 1; 7825 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7826 %} 7827 ins_pipe( pipe_slow ); 7828 %} 7829 7830 instruct vsll16I_reg(vecZ dst, vecZ src, vecS shift) %{ 7831 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7832 match(Set dst (LShiftVI src shift)); 7833 format %{ "vpslld $dst,$src,$shift\t! left shift packed16I" %} 7834 ins_encode %{ 7835 int vector_len = 2; 7836 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7837 %} 7838 ins_pipe( pipe_slow ); 7839 %} 7840 7841 instruct vsll16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 7842 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7843 match(Set dst (LShiftVI src shift)); 7844 format %{ "vpslld $dst,$src,$shift\t! left shift packed16I" %} 7845 ins_encode %{ 7846 int vector_len = 2; 7847 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7848 %} 7849 ins_pipe( pipe_slow ); 7850 %} 7851 7852 // Longs vector left shift 7853 instruct vsll2L(vecX dst, vecS shift) %{ 7854 predicate(n->as_Vector()->length() == 2); 7855 match(Set dst (LShiftVL dst shift)); 7856 format %{ "psllq $dst,$shift\t! left shift packed2L" %} 7857 ins_encode %{ 7858 __ psllq($dst$$XMMRegister, $shift$$XMMRegister); 7859 %} 7860 ins_pipe( pipe_slow ); 7861 %} 7862 7863 instruct vsll2L_imm(vecX dst, immI8 shift) %{ 7864 predicate(n->as_Vector()->length() == 2); 7865 match(Set dst (LShiftVL dst shift)); 7866 format %{ "psllq $dst,$shift\t! left shift packed2L" %} 7867 ins_encode %{ 7868 __ psllq($dst$$XMMRegister, (int)$shift$$constant); 7869 %} 7870 ins_pipe( pipe_slow ); 7871 %} 7872 7873 instruct vsll2L_reg(vecX dst, vecX src, vecS shift) %{ 7874 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7875 match(Set dst (LShiftVL src shift)); 7876 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} 7877 ins_encode %{ 7878 int vector_len = 0; 7879 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7880 %} 7881 ins_pipe( pipe_slow ); 7882 %} 7883 7884 instruct vsll2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ 7885 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7886 match(Set dst (LShiftVL src shift)); 7887 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} 7888 ins_encode %{ 7889 int vector_len = 0; 7890 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7891 %} 7892 ins_pipe( pipe_slow ); 7893 %} 7894 7895 instruct vsll4L_reg(vecY dst, vecY src, vecS shift) %{ 7896 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 7897 match(Set dst (LShiftVL src shift)); 7898 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} 7899 ins_encode %{ 7900 int vector_len = 1; 7901 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7902 %} 7903 ins_pipe( pipe_slow ); 7904 %} 7905 7906 instruct vsll4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ 7907 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 7908 match(Set dst (LShiftVL src shift)); 7909 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} 7910 ins_encode %{ 7911 int vector_len = 1; 7912 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7913 %} 7914 ins_pipe( pipe_slow ); 7915 %} 7916 7917 instruct vsll8L_reg(vecZ dst, vecZ src, vecS shift) %{ 7918 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7919 match(Set dst (LShiftVL src shift)); 7920 format %{ "vpsllq $dst,$src,$shift\t! left shift packed8L" %} 7921 ins_encode %{ 7922 int vector_len = 2; 7923 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7924 %} 7925 ins_pipe( pipe_slow ); 7926 %} 7927 7928 instruct vsll8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 7929 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7930 match(Set dst (LShiftVL src shift)); 7931 format %{ "vpsllq $dst,$src,$shift\t! left shift packed8L" %} 7932 ins_encode %{ 7933 int vector_len = 2; 7934 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7935 %} 7936 ins_pipe( pipe_slow ); 7937 %} 7938 7939 // ----------------------- LogicalRightShift ----------------------------------- 7940 7941 // Shorts vector logical right shift produces incorrect Java result 7942 // for negative data because java code convert short value into int with 7943 // sign extension before a shift. But char vectors are fine since chars are 7944 // unsigned values. 7945 7946 instruct vsrl2S(vecS dst, vecS shift) %{ 7947 predicate(n->as_Vector()->length() == 2); 7948 match(Set dst (URShiftVS dst shift)); 7949 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} 7950 ins_encode %{ 7951 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 7952 %} 7953 ins_pipe( pipe_slow ); 7954 %} 7955 7956 instruct vsrl2S_imm(vecS dst, immI8 shift) %{ 7957 predicate(n->as_Vector()->length() == 2); 7958 match(Set dst (URShiftVS dst shift)); 7959 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} 7960 ins_encode %{ 7961 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 7962 %} 7963 ins_pipe( pipe_slow ); 7964 %} 7965 7966 instruct vsrl2S_reg(vecS dst, vecS src, vecS shift) %{ 7967 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7968 match(Set dst (URShiftVS src shift)); 7969 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 7970 ins_encode %{ 7971 int vector_len = 0; 7972 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7973 %} 7974 ins_pipe( pipe_slow ); 7975 %} 7976 7977 instruct vsrl2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ 7978 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7979 match(Set dst (URShiftVS src shift)); 7980 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 7981 ins_encode %{ 7982 int vector_len = 0; 7983 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7984 %} 7985 ins_pipe( pipe_slow ); 7986 %} 7987 7988 instruct vsrl4S(vecD dst, vecS shift) %{ 7989 predicate(n->as_Vector()->length() == 4); 7990 match(Set dst (URShiftVS dst shift)); 7991 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} 7992 ins_encode %{ 7993 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 7994 %} 7995 ins_pipe( pipe_slow ); 7996 %} 7997 7998 instruct vsrl4S_imm(vecD dst, immI8 shift) %{ 7999 predicate(n->as_Vector()->length() == 4); 8000 match(Set dst (URShiftVS dst shift)); 8001 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} 8002 ins_encode %{ 8003 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 8004 %} 8005 ins_pipe( pipe_slow ); 8006 %} 8007 8008 instruct vsrl4S_reg(vecD dst, vecD src, vecS shift) %{ 8009 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8010 match(Set dst (URShiftVS src shift)); 8011 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 8012 ins_encode %{ 8013 int vector_len = 0; 8014 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8015 %} 8016 ins_pipe( pipe_slow ); 8017 %} 8018 8019 instruct vsrl4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ 8020 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8021 match(Set dst (URShiftVS src shift)); 8022 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 8023 ins_encode %{ 8024 int vector_len = 0; 8025 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8026 %} 8027 ins_pipe( pipe_slow ); 8028 %} 8029 8030 instruct vsrl8S(vecX dst, vecS shift) %{ 8031 predicate(n->as_Vector()->length() == 8); 8032 match(Set dst (URShiftVS dst shift)); 8033 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} 8034 ins_encode %{ 8035 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 8036 %} 8037 ins_pipe( pipe_slow ); 8038 %} 8039 8040 instruct vsrl8S_imm(vecX dst, immI8 shift) %{ 8041 predicate(n->as_Vector()->length() == 8); 8042 match(Set dst (URShiftVS dst shift)); 8043 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} 8044 ins_encode %{ 8045 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 8046 %} 8047 ins_pipe( pipe_slow ); 8048 %} 8049 8050 instruct vsrl8S_reg(vecX dst, vecX src, vecS shift) %{ 8051 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8052 match(Set dst (URShiftVS src shift)); 8053 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 8054 ins_encode %{ 8055 int vector_len = 0; 8056 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8057 %} 8058 ins_pipe( pipe_slow ); 8059 %} 8060 8061 instruct vsrl8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ 8062 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8063 match(Set dst (URShiftVS src shift)); 8064 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 8065 ins_encode %{ 8066 int vector_len = 0; 8067 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8068 %} 8069 ins_pipe( pipe_slow ); 8070 %} 8071 8072 instruct vsrl16S_reg(vecY dst, vecY src, vecS shift) %{ 8073 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 8074 match(Set dst (URShiftVS src shift)); 8075 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 8076 ins_encode %{ 8077 int vector_len = 1; 8078 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8079 %} 8080 ins_pipe( pipe_slow ); 8081 %} 8082 8083 instruct vsrl16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ 8084 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 8085 match(Set dst (URShiftVS src shift)); 8086 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 8087 ins_encode %{ 8088 int vector_len = 1; 8089 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8090 %} 8091 ins_pipe( pipe_slow ); 8092 %} 8093 8094 instruct vsrl32S_reg(vecZ dst, vecZ src, vecS shift) %{ 8095 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); 8096 match(Set dst (URShiftVS src shift)); 8097 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %} 8098 ins_encode %{ 8099 int vector_len = 2; 8100 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8101 %} 8102 ins_pipe( pipe_slow ); 8103 %} 8104 8105 instruct vsrl32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 8106 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); 8107 match(Set dst (URShiftVS src shift)); 8108 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %} 8109 ins_encode %{ 8110 int vector_len = 2; 8111 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8112 %} 8113 ins_pipe( pipe_slow ); 8114 %} 8115 8116 // Integers vector logical right shift 8117 instruct vsrl2I(vecD dst, vecS shift) %{ 8118 predicate(n->as_Vector()->length() == 2); 8119 match(Set dst (URShiftVI dst shift)); 8120 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} 8121 ins_encode %{ 8122 __ psrld($dst$$XMMRegister, $shift$$XMMRegister); 8123 %} 8124 ins_pipe( pipe_slow ); 8125 %} 8126 8127 instruct vsrl2I_imm(vecD dst, immI8 shift) %{ 8128 predicate(n->as_Vector()->length() == 2); 8129 match(Set dst (URShiftVI dst shift)); 8130 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} 8131 ins_encode %{ 8132 __ psrld($dst$$XMMRegister, (int)$shift$$constant); 8133 %} 8134 ins_pipe( pipe_slow ); 8135 %} 8136 8137 instruct vsrl2I_reg(vecD dst, vecD src, vecS shift) %{ 8138 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8139 match(Set dst (URShiftVI src shift)); 8140 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} 8141 ins_encode %{ 8142 int vector_len = 0; 8143 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8144 %} 8145 ins_pipe( pipe_slow ); 8146 %} 8147 8148 instruct vsrl2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 8149 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8150 match(Set dst (URShiftVI src shift)); 8151 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} 8152 ins_encode %{ 8153 int vector_len = 0; 8154 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8155 %} 8156 ins_pipe( pipe_slow ); 8157 %} 8158 8159 instruct vsrl4I(vecX dst, vecS shift) %{ 8160 predicate(n->as_Vector()->length() == 4); 8161 match(Set dst (URShiftVI dst shift)); 8162 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} 8163 ins_encode %{ 8164 __ psrld($dst$$XMMRegister, $shift$$XMMRegister); 8165 %} 8166 ins_pipe( pipe_slow ); 8167 %} 8168 8169 instruct vsrl4I_imm(vecX dst, immI8 shift) %{ 8170 predicate(n->as_Vector()->length() == 4); 8171 match(Set dst (URShiftVI dst shift)); 8172 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} 8173 ins_encode %{ 8174 __ psrld($dst$$XMMRegister, (int)$shift$$constant); 8175 %} 8176 ins_pipe( pipe_slow ); 8177 %} 8178 8179 instruct vsrl4I_reg(vecX dst, vecX src, vecS shift) %{ 8180 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8181 match(Set dst (URShiftVI src shift)); 8182 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} 8183 ins_encode %{ 8184 int vector_len = 0; 8185 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8186 %} 8187 ins_pipe( pipe_slow ); 8188 %} 8189 8190 instruct vsrl4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 8191 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8192 match(Set dst (URShiftVI src shift)); 8193 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} 8194 ins_encode %{ 8195 int vector_len = 0; 8196 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8197 %} 8198 ins_pipe( pipe_slow ); 8199 %} 8200 8201 instruct vsrl8I_reg(vecY dst, vecY src, vecS shift) %{ 8202 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 8203 match(Set dst (URShiftVI src shift)); 8204 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} 8205 ins_encode %{ 8206 int vector_len = 1; 8207 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8208 %} 8209 ins_pipe( pipe_slow ); 8210 %} 8211 8212 instruct vsrl8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 8213 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 8214 match(Set dst (URShiftVI src shift)); 8215 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} 8216 ins_encode %{ 8217 int vector_len = 1; 8218 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8219 %} 8220 ins_pipe( pipe_slow ); 8221 %} 8222 8223 instruct vsrl16I_reg(vecZ dst, vecZ src, vecS shift) %{ 8224 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8225 match(Set dst (URShiftVI src shift)); 8226 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed16I" %} 8227 ins_encode %{ 8228 int vector_len = 2; 8229 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8230 %} 8231 ins_pipe( pipe_slow ); 8232 %} 8233 8234 instruct vsrl16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 8235 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8236 match(Set dst (URShiftVI src shift)); 8237 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed16I" %} 8238 ins_encode %{ 8239 int vector_len = 2; 8240 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8241 %} 8242 ins_pipe( pipe_slow ); 8243 %} 8244 8245 // Longs vector logical right shift 8246 instruct vsrl2L(vecX dst, vecS shift) %{ 8247 predicate(n->as_Vector()->length() == 2); 8248 match(Set dst (URShiftVL dst shift)); 8249 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} 8250 ins_encode %{ 8251 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister); 8252 %} 8253 ins_pipe( pipe_slow ); 8254 %} 8255 8256 instruct vsrl2L_imm(vecX dst, immI8 shift) %{ 8257 predicate(n->as_Vector()->length() == 2); 8258 match(Set dst (URShiftVL dst shift)); 8259 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} 8260 ins_encode %{ 8261 __ psrlq($dst$$XMMRegister, (int)$shift$$constant); 8262 %} 8263 ins_pipe( pipe_slow ); 8264 %} 8265 8266 instruct vsrl2L_reg(vecX dst, vecX src, vecS shift) %{ 8267 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8268 match(Set dst (URShiftVL src shift)); 8269 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} 8270 ins_encode %{ 8271 int vector_len = 0; 8272 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8273 %} 8274 ins_pipe( pipe_slow ); 8275 %} 8276 8277 instruct vsrl2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ 8278 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8279 match(Set dst (URShiftVL src shift)); 8280 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} 8281 ins_encode %{ 8282 int vector_len = 0; 8283 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8284 %} 8285 ins_pipe( pipe_slow ); 8286 %} 8287 8288 instruct vsrl4L_reg(vecY dst, vecY src, vecS shift) %{ 8289 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 8290 match(Set dst (URShiftVL src shift)); 8291 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} 8292 ins_encode %{ 8293 int vector_len = 1; 8294 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8295 %} 8296 ins_pipe( pipe_slow ); 8297 %} 8298 8299 instruct vsrl4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ 8300 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 8301 match(Set dst (URShiftVL src shift)); 8302 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} 8303 ins_encode %{ 8304 int vector_len = 1; 8305 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8306 %} 8307 ins_pipe( pipe_slow ); 8308 %} 8309 8310 instruct vsrl8L_reg(vecZ dst, vecZ src, vecS shift) %{ 8311 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8312 match(Set dst (URShiftVL src shift)); 8313 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed8L" %} 8314 ins_encode %{ 8315 int vector_len = 2; 8316 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8317 %} 8318 ins_pipe( pipe_slow ); 8319 %} 8320 8321 instruct vsrl8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 8322 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8323 match(Set dst (URShiftVL src shift)); 8324 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed8L" %} 8325 ins_encode %{ 8326 int vector_len = 2; 8327 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8328 %} 8329 ins_pipe( pipe_slow ); 8330 %} 8331 8332 // ------------------- ArithmeticRightShift ----------------------------------- 8333 8334 // Shorts/Chars vector arithmetic right shift 8335 instruct vsra2S(vecS dst, vecS shift) %{ 8336 predicate(n->as_Vector()->length() == 2); 8337 match(Set dst (RShiftVS dst shift)); 8338 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} 8339 ins_encode %{ 8340 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 8341 %} 8342 ins_pipe( pipe_slow ); 8343 %} 8344 8345 instruct vsra2S_imm(vecS dst, immI8 shift) %{ 8346 predicate(n->as_Vector()->length() == 2); 8347 match(Set dst (RShiftVS dst shift)); 8348 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} 8349 ins_encode %{ 8350 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 8351 %} 8352 ins_pipe( pipe_slow ); 8353 %} 8354 8355 instruct vsra2S_reg(vecS dst, vecS src, vecS shift) %{ 8356 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8357 match(Set dst (RShiftVS src shift)); 8358 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 8359 ins_encode %{ 8360 int vector_len = 0; 8361 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8362 %} 8363 ins_pipe( pipe_slow ); 8364 %} 8365 8366 instruct vsra2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ 8367 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8368 match(Set dst (RShiftVS src shift)); 8369 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 8370 ins_encode %{ 8371 int vector_len = 0; 8372 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8373 %} 8374 ins_pipe( pipe_slow ); 8375 %} 8376 8377 instruct vsra4S(vecD dst, vecS shift) %{ 8378 predicate(n->as_Vector()->length() == 4); 8379 match(Set dst (RShiftVS dst shift)); 8380 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} 8381 ins_encode %{ 8382 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 8383 %} 8384 ins_pipe( pipe_slow ); 8385 %} 8386 8387 instruct vsra4S_imm(vecD dst, immI8 shift) %{ 8388 predicate(n->as_Vector()->length() == 4); 8389 match(Set dst (RShiftVS dst shift)); 8390 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} 8391 ins_encode %{ 8392 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 8393 %} 8394 ins_pipe( pipe_slow ); 8395 %} 8396 8397 instruct vsra4S_reg(vecD dst, vecD src, vecS shift) %{ 8398 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8399 match(Set dst (RShiftVS src shift)); 8400 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 8401 ins_encode %{ 8402 int vector_len = 0; 8403 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8404 %} 8405 ins_pipe( pipe_slow ); 8406 %} 8407 8408 instruct vsra4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ 8409 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8410 match(Set dst (RShiftVS src shift)); 8411 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 8412 ins_encode %{ 8413 int vector_len = 0; 8414 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8415 %} 8416 ins_pipe( pipe_slow ); 8417 %} 8418 8419 instruct vsra8S(vecX dst, vecS shift) %{ 8420 predicate(n->as_Vector()->length() == 8); 8421 match(Set dst (RShiftVS dst shift)); 8422 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} 8423 ins_encode %{ 8424 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 8425 %} 8426 ins_pipe( pipe_slow ); 8427 %} 8428 8429 instruct vsra8S_imm(vecX dst, immI8 shift) %{ 8430 predicate(n->as_Vector()->length() == 8); 8431 match(Set dst (RShiftVS dst shift)); 8432 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} 8433 ins_encode %{ 8434 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 8435 %} 8436 ins_pipe( pipe_slow ); 8437 %} 8438 8439 instruct vsra8S_reg(vecX dst, vecX src, vecS shift) %{ 8440 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8441 match(Set dst (RShiftVS src shift)); 8442 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 8443 ins_encode %{ 8444 int vector_len = 0; 8445 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8446 %} 8447 ins_pipe( pipe_slow ); 8448 %} 8449 8450 instruct vsra8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ 8451 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8452 match(Set dst (RShiftVS src shift)); 8453 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 8454 ins_encode %{ 8455 int vector_len = 0; 8456 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8457 %} 8458 ins_pipe( pipe_slow ); 8459 %} 8460 8461 instruct vsra16S_reg(vecY dst, vecY src, vecS shift) %{ 8462 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 8463 match(Set dst (RShiftVS src shift)); 8464 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 8465 ins_encode %{ 8466 int vector_len = 1; 8467 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8468 %} 8469 ins_pipe( pipe_slow ); 8470 %} 8471 8472 instruct vsra16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ 8473 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 8474 match(Set dst (RShiftVS src shift)); 8475 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 8476 ins_encode %{ 8477 int vector_len = 1; 8478 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8479 %} 8480 ins_pipe( pipe_slow ); 8481 %} 8482 8483 instruct vsra32S_reg(vecZ dst, vecZ src, vecS shift) %{ 8484 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); 8485 match(Set dst (RShiftVS src shift)); 8486 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %} 8487 ins_encode %{ 8488 int vector_len = 2; 8489 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8490 %} 8491 ins_pipe( pipe_slow ); 8492 %} 8493 8494 instruct vsra32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 8495 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); 8496 match(Set dst (RShiftVS src shift)); 8497 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %} 8498 ins_encode %{ 8499 int vector_len = 2; 8500 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8501 %} 8502 ins_pipe( pipe_slow ); 8503 %} 8504 8505 // Integers vector arithmetic right shift 8506 instruct vsra2I(vecD dst, vecS shift) %{ 8507 predicate(n->as_Vector()->length() == 2); 8508 match(Set dst (RShiftVI dst shift)); 8509 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} 8510 ins_encode %{ 8511 __ psrad($dst$$XMMRegister, $shift$$XMMRegister); 8512 %} 8513 ins_pipe( pipe_slow ); 8514 %} 8515 8516 instruct vsra2I_imm(vecD dst, immI8 shift) %{ 8517 predicate(n->as_Vector()->length() == 2); 8518 match(Set dst (RShiftVI dst shift)); 8519 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} 8520 ins_encode %{ 8521 __ psrad($dst$$XMMRegister, (int)$shift$$constant); 8522 %} 8523 ins_pipe( pipe_slow ); 8524 %} 8525 8526 instruct vsra2I_reg(vecD dst, vecD src, vecS shift) %{ 8527 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8528 match(Set dst (RShiftVI src shift)); 8529 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} 8530 ins_encode %{ 8531 int vector_len = 0; 8532 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8533 %} 8534 ins_pipe( pipe_slow ); 8535 %} 8536 8537 instruct vsra2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 8538 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8539 match(Set dst (RShiftVI src shift)); 8540 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} 8541 ins_encode %{ 8542 int vector_len = 0; 8543 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8544 %} 8545 ins_pipe( pipe_slow ); 8546 %} 8547 8548 instruct vsra4I(vecX dst, vecS shift) %{ 8549 predicate(n->as_Vector()->length() == 4); 8550 match(Set dst (RShiftVI dst shift)); 8551 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} 8552 ins_encode %{ 8553 __ psrad($dst$$XMMRegister, $shift$$XMMRegister); 8554 %} 8555 ins_pipe( pipe_slow ); 8556 %} 8557 8558 instruct vsra4I_imm(vecX dst, immI8 shift) %{ 8559 predicate(n->as_Vector()->length() == 4); 8560 match(Set dst (RShiftVI dst shift)); 8561 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} 8562 ins_encode %{ 8563 __ psrad($dst$$XMMRegister, (int)$shift$$constant); 8564 %} 8565 ins_pipe( pipe_slow ); 8566 %} 8567 8568 instruct vsra4I_reg(vecX dst, vecX src, vecS shift) %{ 8569 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8570 match(Set dst (RShiftVI src shift)); 8571 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} 8572 ins_encode %{ 8573 int vector_len = 0; 8574 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8575 %} 8576 ins_pipe( pipe_slow ); 8577 %} 8578 8579 instruct vsra4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 8580 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8581 match(Set dst (RShiftVI src shift)); 8582 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} 8583 ins_encode %{ 8584 int vector_len = 0; 8585 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8586 %} 8587 ins_pipe( pipe_slow ); 8588 %} 8589 8590 instruct vsra8I_reg(vecY dst, vecY src, vecS shift) %{ 8591 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 8592 match(Set dst (RShiftVI src shift)); 8593 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} 8594 ins_encode %{ 8595 int vector_len = 1; 8596 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8597 %} 8598 ins_pipe( pipe_slow ); 8599 %} 8600 8601 instruct vsra8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 8602 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 8603 match(Set dst (RShiftVI src shift)); 8604 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} 8605 ins_encode %{ 8606 int vector_len = 1; 8607 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8608 %} 8609 ins_pipe( pipe_slow ); 8610 %} 8611 8612 instruct vsra16I_reg(vecZ dst, vecZ src, vecS shift) %{ 8613 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8614 match(Set dst (RShiftVI src shift)); 8615 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed16I" %} 8616 ins_encode %{ 8617 int vector_len = 2; 8618 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8619 %} 8620 ins_pipe( pipe_slow ); 8621 %} 8622 8623 instruct vsra16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 8624 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8625 match(Set dst (RShiftVI src shift)); 8626 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed16I" %} 8627 ins_encode %{ 8628 int vector_len = 2; 8629 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8630 %} 8631 ins_pipe( pipe_slow ); 8632 %} 8633 8634 // There are no longs vector arithmetic right shift instructions. 8635 8636 8637 // --------------------------------- AND -------------------------------------- 8638 8639 instruct vand4B(vecS dst, vecS src) %{ 8640 predicate(n->as_Vector()->length_in_bytes() == 4); 8641 match(Set dst (AndV dst src)); 8642 format %{ "pand $dst,$src\t! and vectors (4 bytes)" %} 8643 ins_encode %{ 8644 __ pand($dst$$XMMRegister, $src$$XMMRegister); 8645 %} 8646 ins_pipe( pipe_slow ); 8647 %} 8648 8649 instruct vand4B_reg(vecS dst, vecS src1, vecS src2) %{ 8650 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 8651 match(Set dst (AndV src1 src2)); 8652 format %{ "vpand $dst,$src1,$src2\t! and vectors (4 bytes)" %} 8653 ins_encode %{ 8654 int vector_len = 0; 8655 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8656 %} 8657 ins_pipe( pipe_slow ); 8658 %} 8659 8660 instruct vand4B_mem(vecS dst, vecS src, memory mem) %{ 8661 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 8662 match(Set dst (AndV src (LoadVector mem))); 8663 format %{ "vpand $dst,$src,$mem\t! and vectors (4 bytes)" %} 8664 ins_encode %{ 8665 int vector_len = 0; 8666 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8667 %} 8668 ins_pipe( pipe_slow ); 8669 %} 8670 8671 instruct vand8B(vecD dst, vecD src) %{ 8672 predicate(n->as_Vector()->length_in_bytes() == 8); 8673 match(Set dst (AndV dst src)); 8674 format %{ "pand $dst,$src\t! and vectors (8 bytes)" %} 8675 ins_encode %{ 8676 __ pand($dst$$XMMRegister, $src$$XMMRegister); 8677 %} 8678 ins_pipe( pipe_slow ); 8679 %} 8680 8681 instruct vand8B_reg(vecD dst, vecD src1, vecD src2) %{ 8682 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 8683 match(Set dst (AndV src1 src2)); 8684 format %{ "vpand $dst,$src1,$src2\t! and vectors (8 bytes)" %} 8685 ins_encode %{ 8686 int vector_len = 0; 8687 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8688 %} 8689 ins_pipe( pipe_slow ); 8690 %} 8691 8692 instruct vand8B_mem(vecD dst, vecD src, memory mem) %{ 8693 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 8694 match(Set dst (AndV src (LoadVector mem))); 8695 format %{ "vpand $dst,$src,$mem\t! and vectors (8 bytes)" %} 8696 ins_encode %{ 8697 int vector_len = 0; 8698 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8699 %} 8700 ins_pipe( pipe_slow ); 8701 %} 8702 8703 instruct vand16B(vecX dst, vecX src) %{ 8704 predicate(n->as_Vector()->length_in_bytes() == 16); 8705 match(Set dst (AndV dst src)); 8706 format %{ "pand $dst,$src\t! and vectors (16 bytes)" %} 8707 ins_encode %{ 8708 __ pand($dst$$XMMRegister, $src$$XMMRegister); 8709 %} 8710 ins_pipe( pipe_slow ); 8711 %} 8712 8713 instruct vand16B_reg(vecX dst, vecX src1, vecX src2) %{ 8714 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 8715 match(Set dst (AndV src1 src2)); 8716 format %{ "vpand $dst,$src1,$src2\t! and vectors (16 bytes)" %} 8717 ins_encode %{ 8718 int vector_len = 0; 8719 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8720 %} 8721 ins_pipe( pipe_slow ); 8722 %} 8723 8724 instruct vand16B_mem(vecX dst, vecX src, memory mem) %{ 8725 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 8726 match(Set dst (AndV src (LoadVector mem))); 8727 format %{ "vpand $dst,$src,$mem\t! and vectors (16 bytes)" %} 8728 ins_encode %{ 8729 int vector_len = 0; 8730 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8731 %} 8732 ins_pipe( pipe_slow ); 8733 %} 8734 8735 instruct vand32B_reg(vecY dst, vecY src1, vecY src2) %{ 8736 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 8737 match(Set dst (AndV src1 src2)); 8738 format %{ "vpand $dst,$src1,$src2\t! and vectors (32 bytes)" %} 8739 ins_encode %{ 8740 int vector_len = 1; 8741 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8742 %} 8743 ins_pipe( pipe_slow ); 8744 %} 8745 8746 instruct vand32B_mem(vecY dst, vecY src, memory mem) %{ 8747 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 8748 match(Set dst (AndV src (LoadVector mem))); 8749 format %{ "vpand $dst,$src,$mem\t! and vectors (32 bytes)" %} 8750 ins_encode %{ 8751 int vector_len = 1; 8752 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8753 %} 8754 ins_pipe( pipe_slow ); 8755 %} 8756 8757 instruct vand64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8758 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 8759 match(Set dst (AndV src1 src2)); 8760 format %{ "vpand $dst,$src1,$src2\t! and vectors (64 bytes)" %} 8761 ins_encode %{ 8762 int vector_len = 2; 8763 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8764 %} 8765 ins_pipe( pipe_slow ); 8766 %} 8767 8768 instruct vand64B_mem(vecZ dst, vecZ src, memory mem) %{ 8769 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 8770 match(Set dst (AndV src (LoadVector mem))); 8771 format %{ "vpand $dst,$src,$mem\t! and vectors (64 bytes)" %} 8772 ins_encode %{ 8773 int vector_len = 2; 8774 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8775 %} 8776 ins_pipe( pipe_slow ); 8777 %} 8778 8779 // --------------------------------- OR --------------------------------------- 8780 8781 instruct vor4B(vecS dst, vecS src) %{ 8782 predicate(n->as_Vector()->length_in_bytes() == 4); 8783 match(Set dst (OrV dst src)); 8784 format %{ "por $dst,$src\t! or vectors (4 bytes)" %} 8785 ins_encode %{ 8786 __ por($dst$$XMMRegister, $src$$XMMRegister); 8787 %} 8788 ins_pipe( pipe_slow ); 8789 %} 8790 8791 instruct vor4B_reg(vecS dst, vecS src1, vecS src2) %{ 8792 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 8793 match(Set dst (OrV src1 src2)); 8794 format %{ "vpor $dst,$src1,$src2\t! or vectors (4 bytes)" %} 8795 ins_encode %{ 8796 int vector_len = 0; 8797 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8798 %} 8799 ins_pipe( pipe_slow ); 8800 %} 8801 8802 instruct vor4B_mem(vecS dst, vecS src, memory mem) %{ 8803 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 8804 match(Set dst (OrV src (LoadVector mem))); 8805 format %{ "vpor $dst,$src,$mem\t! or vectors (4 bytes)" %} 8806 ins_encode %{ 8807 int vector_len = 0; 8808 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8809 %} 8810 ins_pipe( pipe_slow ); 8811 %} 8812 8813 instruct vor8B(vecD dst, vecD src) %{ 8814 predicate(n->as_Vector()->length_in_bytes() == 8); 8815 match(Set dst (OrV dst src)); 8816 format %{ "por $dst,$src\t! or vectors (8 bytes)" %} 8817 ins_encode %{ 8818 __ por($dst$$XMMRegister, $src$$XMMRegister); 8819 %} 8820 ins_pipe( pipe_slow ); 8821 %} 8822 8823 instruct vor8B_reg(vecD dst, vecD src1, vecD src2) %{ 8824 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 8825 match(Set dst (OrV src1 src2)); 8826 format %{ "vpor $dst,$src1,$src2\t! or vectors (8 bytes)" %} 8827 ins_encode %{ 8828 int vector_len = 0; 8829 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8830 %} 8831 ins_pipe( pipe_slow ); 8832 %} 8833 8834 instruct vor8B_mem(vecD dst, vecD src, memory mem) %{ 8835 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 8836 match(Set dst (OrV src (LoadVector mem))); 8837 format %{ "vpor $dst,$src,$mem\t! or vectors (8 bytes)" %} 8838 ins_encode %{ 8839 int vector_len = 0; 8840 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8841 %} 8842 ins_pipe( pipe_slow ); 8843 %} 8844 8845 instruct vor16B(vecX dst, vecX src) %{ 8846 predicate(n->as_Vector()->length_in_bytes() == 16); 8847 match(Set dst (OrV dst src)); 8848 format %{ "por $dst,$src\t! or vectors (16 bytes)" %} 8849 ins_encode %{ 8850 __ por($dst$$XMMRegister, $src$$XMMRegister); 8851 %} 8852 ins_pipe( pipe_slow ); 8853 %} 8854 8855 instruct vor16B_reg(vecX dst, vecX src1, vecX src2) %{ 8856 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 8857 match(Set dst (OrV src1 src2)); 8858 format %{ "vpor $dst,$src1,$src2\t! or vectors (16 bytes)" %} 8859 ins_encode %{ 8860 int vector_len = 0; 8861 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8862 %} 8863 ins_pipe( pipe_slow ); 8864 %} 8865 8866 instruct vor16B_mem(vecX dst, vecX src, memory mem) %{ 8867 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 8868 match(Set dst (OrV src (LoadVector mem))); 8869 format %{ "vpor $dst,$src,$mem\t! or vectors (16 bytes)" %} 8870 ins_encode %{ 8871 int vector_len = 0; 8872 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8873 %} 8874 ins_pipe( pipe_slow ); 8875 %} 8876 8877 instruct vor32B_reg(vecY dst, vecY src1, vecY src2) %{ 8878 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 8879 match(Set dst (OrV src1 src2)); 8880 format %{ "vpor $dst,$src1,$src2\t! or vectors (32 bytes)" %} 8881 ins_encode %{ 8882 int vector_len = 1; 8883 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8884 %} 8885 ins_pipe( pipe_slow ); 8886 %} 8887 8888 instruct vor32B_mem(vecY dst, vecY src, memory mem) %{ 8889 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 8890 match(Set dst (OrV src (LoadVector mem))); 8891 format %{ "vpor $dst,$src,$mem\t! or vectors (32 bytes)" %} 8892 ins_encode %{ 8893 int vector_len = 1; 8894 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8895 %} 8896 ins_pipe( pipe_slow ); 8897 %} 8898 8899 instruct vor64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8900 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 8901 match(Set dst (OrV src1 src2)); 8902 format %{ "vpor $dst,$src1,$src2\t! or vectors (64 bytes)" %} 8903 ins_encode %{ 8904 int vector_len = 2; 8905 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8906 %} 8907 ins_pipe( pipe_slow ); 8908 %} 8909 8910 instruct vor64B_mem(vecZ dst, vecZ src, memory mem) %{ 8911 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 8912 match(Set dst (OrV src (LoadVector mem))); 8913 format %{ "vpor $dst,$src,$mem\t! or vectors (64 bytes)" %} 8914 ins_encode %{ 8915 int vector_len = 2; 8916 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8917 %} 8918 ins_pipe( pipe_slow ); 8919 %} 8920 8921 // --------------------------------- XOR -------------------------------------- 8922 8923 instruct vxor4B(vecS dst, vecS src) %{ 8924 predicate(n->as_Vector()->length_in_bytes() == 4); 8925 match(Set dst (XorV dst src)); 8926 format %{ "pxor $dst,$src\t! xor vectors (4 bytes)" %} 8927 ins_encode %{ 8928 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 8929 %} 8930 ins_pipe( pipe_slow ); 8931 %} 8932 8933 instruct vxor4B_reg(vecS dst, vecS src1, vecS src2) %{ 8934 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 8935 match(Set dst (XorV src1 src2)); 8936 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (4 bytes)" %} 8937 ins_encode %{ 8938 int vector_len = 0; 8939 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8940 %} 8941 ins_pipe( pipe_slow ); 8942 %} 8943 8944 instruct vxor4B_mem(vecS dst, vecS src, memory mem) %{ 8945 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 8946 match(Set dst (XorV src (LoadVector mem))); 8947 format %{ "vpxor $dst,$src,$mem\t! xor vectors (4 bytes)" %} 8948 ins_encode %{ 8949 int vector_len = 0; 8950 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8951 %} 8952 ins_pipe( pipe_slow ); 8953 %} 8954 8955 instruct vxor8B(vecD dst, vecD src) %{ 8956 predicate(n->as_Vector()->length_in_bytes() == 8); 8957 match(Set dst (XorV dst src)); 8958 format %{ "pxor $dst,$src\t! xor vectors (8 bytes)" %} 8959 ins_encode %{ 8960 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 8961 %} 8962 ins_pipe( pipe_slow ); 8963 %} 8964 8965 instruct vxor8B_reg(vecD dst, vecD src1, vecD src2) %{ 8966 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 8967 match(Set dst (XorV src1 src2)); 8968 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (8 bytes)" %} 8969 ins_encode %{ 8970 int vector_len = 0; 8971 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8972 %} 8973 ins_pipe( pipe_slow ); 8974 %} 8975 8976 instruct vxor8B_mem(vecD dst, vecD src, memory mem) %{ 8977 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 8978 match(Set dst (XorV src (LoadVector mem))); 8979 format %{ "vpxor $dst,$src,$mem\t! xor vectors (8 bytes)" %} 8980 ins_encode %{ 8981 int vector_len = 0; 8982 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8983 %} 8984 ins_pipe( pipe_slow ); 8985 %} 8986 8987 instruct vxor16B(vecX dst, vecX src) %{ 8988 predicate(n->as_Vector()->length_in_bytes() == 16); 8989 match(Set dst (XorV dst src)); 8990 format %{ "pxor $dst,$src\t! xor vectors (16 bytes)" %} 8991 ins_encode %{ 8992 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 8993 %} 8994 ins_pipe( pipe_slow ); 8995 %} 8996 8997 instruct vxor16B_reg(vecX dst, vecX src1, vecX src2) %{ 8998 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 8999 match(Set dst (XorV src1 src2)); 9000 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (16 bytes)" %} 9001 ins_encode %{ 9002 int vector_len = 0; 9003 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9004 %} 9005 ins_pipe( pipe_slow ); 9006 %} 9007 9008 instruct vxor16B_mem(vecX dst, vecX src, memory mem) %{ 9009 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 9010 match(Set dst (XorV src (LoadVector mem))); 9011 format %{ "vpxor $dst,$src,$mem\t! xor vectors (16 bytes)" %} 9012 ins_encode %{ 9013 int vector_len = 0; 9014 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9015 %} 9016 ins_pipe( pipe_slow ); 9017 %} 9018 9019 instruct vxor32B_reg(vecY dst, vecY src1, vecY src2) %{ 9020 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 9021 match(Set dst (XorV src1 src2)); 9022 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (32 bytes)" %} 9023 ins_encode %{ 9024 int vector_len = 1; 9025 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9026 %} 9027 ins_pipe( pipe_slow ); 9028 %} 9029 9030 instruct vxor32B_mem(vecY dst, vecY src, memory mem) %{ 9031 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 9032 match(Set dst (XorV src (LoadVector mem))); 9033 format %{ "vpxor $dst,$src,$mem\t! xor vectors (32 bytes)" %} 9034 ins_encode %{ 9035 int vector_len = 1; 9036 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9037 %} 9038 ins_pipe( pipe_slow ); 9039 %} 9040 9041 instruct vxor64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 9042 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 9043 match(Set dst (XorV src1 src2)); 9044 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (64 bytes)" %} 9045 ins_encode %{ 9046 int vector_len = 2; 9047 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9048 %} 9049 ins_pipe( pipe_slow ); 9050 %} 9051 9052 instruct vxor64B_mem(vecZ dst, vecZ src, memory mem) %{ 9053 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 9054 match(Set dst (XorV src (LoadVector mem))); 9055 format %{ "vpxor $dst,$src,$mem\t! xor vectors (64 bytes)" %} 9056 ins_encode %{ 9057 int vector_len = 2; 9058 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9059 %} 9060 ins_pipe( pipe_slow ); 9061 %} 9062