1 // 2 // Copyright (c) 2011, 2014, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Common Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // archtecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // XMM registers. 512-bit registers or 8 words each, labeled (a)-p. 63 // Word a in each register holds a Float, words ab hold a Double. 64 // The whole registers are used in SSE4.2 version intrinsics, 65 // array copy stubs and superword operations (see UseSSE42Intrinsics, 66 // UseXMMForArrayCopy and UseSuperword flags). 67 // For pre EVEX enabled architectures: 68 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX) 69 // For EVEX enabled architectures: 70 // XMM8-XMM31 must be encoded with REX (EVEX for UseAVX). 71 // 72 // Linux ABI: No register preserved across function calls 73 // XMM0-XMM7 might hold parameters 74 // Windows ABI: XMM6-XMM31 preserved across function calls 75 // XMM0-XMM3 might hold parameters 76 77 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()); 78 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1)); 79 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2)); 80 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3)); 81 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4)); 82 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5)); 83 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6)); 84 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7)); 85 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8)); 86 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9)); 87 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10)); 88 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11)); 89 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12)); 90 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13)); 91 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14)); 92 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15)); 93 94 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()); 95 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1)); 96 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2)); 97 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3)); 98 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4)); 99 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5)); 100 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6)); 101 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7)); 102 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8)); 103 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9)); 104 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10)); 105 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11)); 106 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12)); 107 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13)); 108 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14)); 109 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15)); 110 111 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()); 112 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1)); 113 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2)); 114 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3)); 115 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4)); 116 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5)); 117 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6)); 118 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7)); 119 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8)); 120 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9)); 121 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10)); 122 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11)); 123 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12)); 124 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13)); 125 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14)); 126 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15)); 127 128 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()); 129 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1)); 130 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2)); 131 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3)); 132 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4)); 133 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5)); 134 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6)); 135 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7)); 136 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8)); 137 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9)); 138 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10)); 139 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11)); 140 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12)); 141 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13)); 142 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14)); 143 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15)); 144 145 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()); 146 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1)); 147 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2)); 148 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3)); 149 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4)); 150 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5)); 151 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6)); 152 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7)); 153 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8)); 154 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9)); 155 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10)); 156 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11)); 157 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12)); 158 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13)); 159 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14)); 160 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15)); 161 162 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()); 163 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1)); 164 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2)); 165 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3)); 166 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4)); 167 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5)); 168 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6)); 169 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7)); 170 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8)); 171 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9)); 172 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10)); 173 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11)); 174 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12)); 175 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13)); 176 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14)); 177 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15)); 178 179 #ifdef _WIN64 180 181 reg_def XMM6 ( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()); 182 reg_def XMM6b( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(1)); 183 reg_def XMM6c( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(2)); 184 reg_def XMM6d( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(3)); 185 reg_def XMM6e( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(4)); 186 reg_def XMM6f( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(5)); 187 reg_def XMM6g( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(6)); 188 reg_def XMM6h( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(7)); 189 reg_def XMM6i( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(8)); 190 reg_def XMM6j( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(9)); 191 reg_def XMM6k( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(10)); 192 reg_def XMM6l( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(11)); 193 reg_def XMM6m( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(12)); 194 reg_def XMM6n( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(13)); 195 reg_def XMM6o( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(14)); 196 reg_def XMM6p( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(15)); 197 198 reg_def XMM7 ( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()); 199 reg_def XMM7b( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(1)); 200 reg_def XMM7c( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(2)); 201 reg_def XMM7d( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(3)); 202 reg_def XMM7e( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(4)); 203 reg_def XMM7f( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(5)); 204 reg_def XMM7g( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(6)); 205 reg_def XMM7h( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(7)); 206 reg_def XMM7i( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(8)); 207 reg_def XMM7j( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(9)); 208 reg_def XMM7k( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(10)); 209 reg_def XMM7l( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(11)); 210 reg_def XMM7m( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(12)); 211 reg_def XMM7n( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(13)); 212 reg_def XMM7o( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(14)); 213 reg_def XMM7p( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(15)); 214 215 reg_def XMM8 ( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()); 216 reg_def XMM8b( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(1)); 217 reg_def XMM8c( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(2)); 218 reg_def XMM8d( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(3)); 219 reg_def XMM8e( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(4)); 220 reg_def XMM8f( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(5)); 221 reg_def XMM8g( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(6)); 222 reg_def XMM8h( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(7)); 223 reg_def XMM8i( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(8)); 224 reg_def XMM8j( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(9)); 225 reg_def XMM8k( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(10)); 226 reg_def XMM8l( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(11)); 227 reg_def XMM8m( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(12)); 228 reg_def XMM8n( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(13)); 229 reg_def XMM8o( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(14)); 230 reg_def XMM8p( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(15)); 231 232 reg_def XMM9 ( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()); 233 reg_def XMM9b( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(1)); 234 reg_def XMM9c( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(2)); 235 reg_def XMM9d( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(3)); 236 reg_def XMM9e( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(4)); 237 reg_def XMM9f( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(5)); 238 reg_def XMM9g( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(6)); 239 reg_def XMM9h( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(7)); 240 reg_def XMM9i( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(8)); 241 reg_def XMM9j( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(9)); 242 reg_def XMM9k( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(10)); 243 reg_def XMM9l( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(11)); 244 reg_def XMM9m( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(12)); 245 reg_def XMM9n( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(13)); 246 reg_def XMM9o( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(14)); 247 reg_def XMM9p( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(15)); 248 249 reg_def XMM10 ( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()); 250 reg_def XMM10b( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(1)); 251 reg_def XMM10c( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(2)); 252 reg_def XMM10d( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(3)); 253 reg_def XMM10e( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(4)); 254 reg_def XMM10f( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(5)); 255 reg_def XMM10g( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(6)); 256 reg_def XMM10h( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(7)); 257 reg_def XMM10i( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(8)); 258 reg_def XMM10j( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(9)); 259 reg_def XMM10k( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(10)); 260 reg_def XMM10l( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(11)); 261 reg_def XMM10m( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(12)); 262 reg_def XMM10n( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(13)); 263 reg_def XMM10o( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(14)); 264 reg_def XMM10p( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(15)); 265 266 reg_def XMM11 ( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()); 267 reg_def XMM11b( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(1)); 268 reg_def XMM11c( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(2)); 269 reg_def XMM11d( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(3)); 270 reg_def XMM11e( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(4)); 271 reg_def XMM11f( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(5)); 272 reg_def XMM11g( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(6)); 273 reg_def XMM11h( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(7)); 274 reg_def XMM11i( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(8)); 275 reg_def XMM11j( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(9)); 276 reg_def XMM11k( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(10)); 277 reg_def XMM11l( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(11)); 278 reg_def XMM11m( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(12)); 279 reg_def XMM11n( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(13)); 280 reg_def XMM11o( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(14)); 281 reg_def XMM11p( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(15)); 282 283 reg_def XMM12 ( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()); 284 reg_def XMM12b( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(1)); 285 reg_def XMM12c( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(2)); 286 reg_def XMM12d( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(3)); 287 reg_def XMM12e( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(4)); 288 reg_def XMM12f( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(5)); 289 reg_def XMM12g( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(6)); 290 reg_def XMM12h( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(7)); 291 reg_def XMM12i( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(8)); 292 reg_def XMM12j( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(9)); 293 reg_def XMM12k( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(10)); 294 reg_def XMM12l( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(11)); 295 reg_def XMM12m( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(12)); 296 reg_def XMM12n( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(13)); 297 reg_def XMM12o( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(14)); 298 reg_def XMM12p( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(15)); 299 300 reg_def XMM13 ( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()); 301 reg_def XMM13b( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(1)); 302 reg_def XMM13c( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(2)); 303 reg_def XMM13d( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(3)); 304 reg_def XMM13e( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(4)); 305 reg_def XMM13f( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(5)); 306 reg_def XMM13g( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(6)); 307 reg_def XMM13h( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(7)); 308 reg_def XMM13i( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(8)); 309 reg_def XMM13j( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(9)); 310 reg_def XMM13k( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(10)); 311 reg_def XMM13l( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(11)); 312 reg_def XMM13m( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(12)); 313 reg_def XMM13n( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(13)); 314 reg_def XMM13o( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(14)); 315 reg_def XMM13p( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(15)); 316 317 reg_def XMM14 ( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()); 318 reg_def XMM14b( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(1)); 319 reg_def XMM14c( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(2)); 320 reg_def XMM14d( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(3)); 321 reg_def XMM14e( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(4)); 322 reg_def XMM14f( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(5)); 323 reg_def XMM14g( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(6)); 324 reg_def XMM14h( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(7)); 325 reg_def XMM14i( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(8)); 326 reg_def XMM14j( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(9)); 327 reg_def XMM14k( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(10)); 328 reg_def XMM14l( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(11)); 329 reg_def XMM14m( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(12)); 330 reg_def XMM14n( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(13)); 331 reg_def XMM14o( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(14)); 332 reg_def XMM14p( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(15)); 333 334 reg_def XMM15 ( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()); 335 reg_def XMM15b( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(1)); 336 reg_def XMM15c( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(2)); 337 reg_def XMM15d( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(3)); 338 reg_def XMM15e( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(4)); 339 reg_def XMM15f( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(5)); 340 reg_def XMM15g( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(6)); 341 reg_def XMM15h( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(7)); 342 reg_def XMM15i( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(8)); 343 reg_def XMM15j( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(9)); 344 reg_def XMM15k( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(10)); 345 reg_def XMM15l( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(11)); 346 reg_def XMM15m( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(12)); 347 reg_def XMM15n( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(13)); 348 reg_def XMM15o( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(14)); 349 reg_def XMM15p( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(15)); 350 351 reg_def XMM16 ( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()); 352 reg_def XMM16b( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(1)); 353 reg_def XMM16c( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(2)); 354 reg_def XMM16d( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(3)); 355 reg_def XMM16e( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(4)); 356 reg_def XMM16f( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(5)); 357 reg_def XMM16g( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(6)); 358 reg_def XMM16h( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(7)); 359 reg_def XMM16i( SOC, SOE, Op_RegF, 16, xmm15->as_VMReg()->next(8)); 360 reg_def XMM16j( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(9)); 361 reg_def XMM16k( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(10)); 362 reg_def XMM16l( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(11)); 363 reg_def XMM16m( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(12)); 364 reg_def XMM16n( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(13)); 365 reg_def XMM16o( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(14)); 366 reg_def XMM16p( SOC, SOE, Op_RegF, 16, xmm16->as_VMReg()->next(15)); 367 368 reg_def XMM17 ( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()); 369 reg_def XMM17b( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(1)); 370 reg_def XMM17c( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(2)); 371 reg_def XMM17d( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(3)); 372 reg_def XMM17e( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(4)); 373 reg_def XMM17f( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(5)); 374 reg_def XMM17g( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(6)); 375 reg_def XMM17h( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(7)); 376 reg_def XMM17i( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(8)); 377 reg_def XMM17j( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(9)); 378 reg_def XMM17k( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(10)); 379 reg_def XMM17l( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(11)); 380 reg_def XMM17m( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(12)); 381 reg_def XMM17n( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(13)); 382 reg_def XMM17o( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(14)); 383 reg_def XMM17p( SOC, SOE, Op_RegF, 17, xmm17->as_VMReg()->next(15)); 384 385 reg_def XMM18 ( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()); 386 reg_def XMM18b( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(1)); 387 reg_def XMM18c( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(2)); 388 reg_def XMM18d( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(3)); 389 reg_def XMM18e( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(4)); 390 reg_def XMM18f( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(5)); 391 reg_def XMM18g( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(6)); 392 reg_def XMM18h( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(7)); 393 reg_def XMM18i( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(8)); 394 reg_def XMM18j( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(9)); 395 reg_def XMM18k( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(10)); 396 reg_def XMM18l( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(11)); 397 reg_def XMM18m( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(12)); 398 reg_def XMM18n( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(13)); 399 reg_def XMM18o( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(14)); 400 reg_def XMM18p( SOC, SOE, Op_RegF, 18, xmm18->as_VMReg()->next(15)); 401 402 reg_def XMM19 ( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()); 403 reg_def XMM19b( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(1)); 404 reg_def XMM19c( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(2)); 405 reg_def XMM19d( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(3)); 406 reg_def XMM19e( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(4)); 407 reg_def XMM19f( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(5)); 408 reg_def XMM19g( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(6)); 409 reg_def XMM19h( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(7)); 410 reg_def XMM19i( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(8)); 411 reg_def XMM19j( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(9)); 412 reg_def XMM19k( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(10)); 413 reg_def XMM19l( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(11)); 414 reg_def XMM19m( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(12)); 415 reg_def XMM19n( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(13)); 416 reg_def XMM19o( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(14)); 417 reg_def XMM19p( SOC, SOE, Op_RegF, 19, xmm19->as_VMReg()->next(15)); 418 419 reg_def XMM20 ( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()); 420 reg_def XMM20b( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(1)); 421 reg_def XMM20c( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(2)); 422 reg_def XMM20d( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(3)); 423 reg_def XMM20e( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(4)); 424 reg_def XMM20f( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(5)); 425 reg_def XMM20g( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(6)); 426 reg_def XMM20h( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(7)); 427 reg_def XMM20i( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(8)); 428 reg_def XMM20j( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(9)); 429 reg_def XMM20k( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(10)); 430 reg_def XMM20l( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(11)); 431 reg_def XMM20m( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(12)); 432 reg_def XMM20n( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(13)); 433 reg_def XMM20o( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(14)); 434 reg_def XMM20p( SOC, SOE, Op_RegF, 20, xmm20->as_VMReg()->next(15)); 435 436 reg_def XMM21 ( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()); 437 reg_def XMM21b( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(1)); 438 reg_def XMM21c( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(2)); 439 reg_def XMM21d( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(3)); 440 reg_def XMM21e( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(4)); 441 reg_def XMM21f( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(5)); 442 reg_def XMM21g( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(6)); 443 reg_def XMM21h( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(7)); 444 reg_def XMM21i( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(8)); 445 reg_def XMM21j( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(9)); 446 reg_def XMM21k( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(10)); 447 reg_def XMM21l( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(11)); 448 reg_def XMM21m( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(12)); 449 reg_def XMM21n( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(13)); 450 reg_def XMM21o( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(14)); 451 reg_def XMM21p( SOC, SOE, Op_RegF, 21, xmm21->as_VMReg()->next(15)); 452 453 reg_def XMM22 ( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()); 454 reg_def XMM22b( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(1)); 455 reg_def XMM22c( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(2)); 456 reg_def XMM22d( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(3)); 457 reg_def XMM22e( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(4)); 458 reg_def XMM22f( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(5)); 459 reg_def XMM22g( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(6)); 460 reg_def XMM22h( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(7)); 461 reg_def XMM22i( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(8)); 462 reg_def XMM22j( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(9)); 463 reg_def XMM22k( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(10)); 464 reg_def XMM22l( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(11)); 465 reg_def XMM22m( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(12)); 466 reg_def XMM22n( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(13)); 467 reg_def XMM22o( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(14)); 468 reg_def XMM22p( SOC, SOE, Op_RegF, 22, xmm22->as_VMReg()->next(15)); 469 470 reg_def XMM23 ( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()); 471 reg_def XMM23b( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(1)); 472 reg_def XMM23c( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(2)); 473 reg_def XMM23d( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(3)); 474 reg_def XMM23e( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(4)); 475 reg_def XMM23f( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(5)); 476 reg_def XMM23g( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(6)); 477 reg_def XMM23h( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(7)); 478 reg_def XMM23i( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(8)); 479 reg_def XMM23j( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(9)); 480 reg_def XMM23k( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(10)); 481 reg_def XMM23l( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(11)); 482 reg_def XMM23m( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(12)); 483 reg_def XMM23n( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(13)); 484 reg_def XMM23o( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(14)); 485 reg_def XMM23p( SOC, SOE, Op_RegF, 23, xmm23->as_VMReg()->next(15)); 486 487 reg_def XMM24 ( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()); 488 reg_def XMM24b( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(1)); 489 reg_def XMM24c( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(2)); 490 reg_def XMM24d( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(3)); 491 reg_def XMM24e( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(4)); 492 reg_def XMM24f( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(5)); 493 reg_def XMM24g( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(6)); 494 reg_def XMM24h( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(7)); 495 reg_def XMM24i( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(8)); 496 reg_def XMM24j( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(9)); 497 reg_def XMM24k( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(10)); 498 reg_def XMM24l( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(11)); 499 reg_def XMM24m( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(12)); 500 reg_def XMM24n( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(13)); 501 reg_def XMM24o( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(14)); 502 reg_def XMM24p( SOC, SOE, Op_RegF, 24, xmm24->as_VMReg()->next(15)); 503 504 reg_def XMM25 ( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()); 505 reg_def XMM25b( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(1)); 506 reg_def XMM25c( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(2)); 507 reg_def XMM25d( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(3)); 508 reg_def XMM25e( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(4)); 509 reg_def XMM25f( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(5)); 510 reg_def XMM25g( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(6)); 511 reg_def XMM25h( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(7)); 512 reg_def XMM25i( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(8)); 513 reg_def XMM25j( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(9)); 514 reg_def XMM25k( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(10)); 515 reg_def XMM25l( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(11)); 516 reg_def XMM25m( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(12)); 517 reg_def XMM25n( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(13)); 518 reg_def XMM25o( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(14)); 519 reg_def XMM25p( SOC, SOE, Op_RegF, 25, xmm25->as_VMReg()->next(15)); 520 521 reg_def XMM26 ( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()); 522 reg_def XMM26b( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(1)); 523 reg_def XMM26c( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(2)); 524 reg_def XMM26d( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(3)); 525 reg_def XMM26e( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(4)); 526 reg_def XMM26f( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(5)); 527 reg_def XMM26g( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(6)); 528 reg_def XMM26h( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(7)); 529 reg_def XMM26i( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(8)); 530 reg_def XMM26j( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(9)); 531 reg_def XMM26k( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(10)); 532 reg_def XMM26l( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(11)); 533 reg_def XMM26m( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(12)); 534 reg_def XMM26n( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(13)); 535 reg_def XMM26o( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(14)); 536 reg_def XMM26p( SOC, SOE, Op_RegF, 26, xmm26->as_VMReg()->next(15)); 537 538 reg_def XMM27g( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(1)); 539 reg_def XMM27c( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(2)); 540 reg_def XMM27d( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(3)); 541 reg_def XMM27e( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(4)); 542 reg_def XMM27f( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(5)); 543 reg_def XMM27g( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(6)); 544 reg_def XMM27h( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(7)); 545 reg_def XMM27i( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(8)); 546 reg_def XMM27j( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(9)); 547 reg_def XMM27k( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(10)); 548 reg_def XMM27l( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(11)); 549 reg_def XMM27m( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(12)); 550 reg_def XMM27n( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(13)); 551 reg_def XMM27o( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(14)); 552 reg_def XMM27p( SOC, SOE, Op_RegF, 27, xmm27->as_VMReg()->next(15)); 553 554 reg_def XMM28 ( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()); 555 reg_def XMM28b( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(1)); 556 reg_def XMM28c( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(2)); 557 reg_def XMM28d( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(3)); 558 reg_def XMM28e( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(4)); 559 reg_def XMM28f( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(5)); 560 reg_def XMM28g( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(6)); 561 reg_def XMM28h( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(7)); 562 reg_def XMM28i( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(8)); 563 reg_def XMM28j( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(9)); 564 reg_def XMM28k( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(10)); 565 reg_def XMM28l( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(11)); 566 reg_def XMM28m( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(12)); 567 reg_def XMM28n( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(13)); 568 reg_def XMM28o( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(14)); 569 reg_def XMM28p( SOC, SOE, Op_RegF, 28, xmm28->as_VMReg()->next(15)); 570 571 reg_def XMM29 ( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()); 572 reg_def XMM29b( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(1)); 573 reg_def XMM29c( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(2)); 574 reg_def XMM29d( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(3)); 575 reg_def XMM29e( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(4)); 576 reg_def XMM29f( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(5)); 577 reg_def XMM29g( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(6)); 578 reg_def XMM29h( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(7)); 579 reg_def XMM29i( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(8)); 580 reg_def XMM29j( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(9)); 581 reg_def XMM29k( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(10)); 582 reg_def XMM29l( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(11)); 583 reg_def XMM29m( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(12)); 584 reg_def XMM29n( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(13)); 585 reg_def XMM29o( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(14)); 586 reg_def XMM29p( SOC, SOE, Op_RegF, 29, xmm29->as_VMReg()->next(15)); 587 588 reg_def XMM30 ( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()); 589 reg_def XMM30b( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(1)); 590 reg_def XMM30c( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(2)); 591 reg_def XMM30d( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(3)); 592 reg_def XMM30e( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(4)); 593 reg_def XMM30f( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(5)); 594 reg_def XMM30g( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(6)); 595 reg_def XMM30h( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(7)); 596 reg_def XMM30i( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(8)); 597 reg_def XMM30j( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(9)); 598 reg_def XMM30k( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(10)); 599 reg_def XMM30l( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(11)); 600 reg_def XMM30m( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(12)); 601 reg_def XMM30n( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(13)); 602 reg_def XMM30o( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(14)); 603 reg_def XMM30p( SOC, SOE, Op_RegF, 30, xmm30->as_VMReg()->next(15)); 604 605 reg_def XMM31 ( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()); 606 reg_def XMM31b( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(1)); 607 reg_def XMM31c( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(2)); 608 reg_def XMM31d( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(3)); 609 reg_def XMM31e( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(4)); 610 reg_def XMM31f( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(5)); 611 reg_def XMM31g( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(6)); 612 reg_def XMM31h( SOC, SOE, Op_RegF, 31, xmm31>-as_VMReg()->next(7)); 613 reg_def XMM31i( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(8)); 614 reg_def XMM31j( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(9)); 615 reg_def XMM31k( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(10)); 616 reg_def XMM31l( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(11)); 617 reg_def XMM31m( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(12)); 618 reg_def XMM31n( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(13)); 619 reg_def XMM31o( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(14)); 620 reg_def XMM31p( SOC, SOE, Op_RegF, 31, xmm31->as_VMReg()->next(15)); 621 622 #else // _WIN64 623 624 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()); 625 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1)); 626 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2)); 627 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3)); 628 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4)); 629 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5)); 630 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6)); 631 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7)); 632 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8)); 633 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9)); 634 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10)); 635 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11)); 636 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12)); 637 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13)); 638 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14)); 639 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15)); 640 641 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()); 642 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1)); 643 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2)); 644 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3)); 645 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4)); 646 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5)); 647 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6)); 648 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7)); 649 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8)); 650 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9)); 651 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10)); 652 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11)); 653 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12)); 654 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13)); 655 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14)); 656 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15)); 657 658 #ifdef _LP64 659 660 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()); 661 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1)); 662 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2)); 663 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3)); 664 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4)); 665 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5)); 666 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6)); 667 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7)); 668 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8)); 669 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9)); 670 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10)); 671 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11)); 672 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12)); 673 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13)); 674 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14)); 675 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15)); 676 677 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()); 678 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1)); 679 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2)); 680 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3)); 681 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4)); 682 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5)); 683 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6)); 684 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7)); 685 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8)); 686 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9)); 687 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10)); 688 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11)); 689 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12)); 690 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13)); 691 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14)); 692 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15)); 693 694 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()); 695 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1)); 696 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2)); 697 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3)); 698 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4)); 699 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5)); 700 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6)); 701 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7)); 702 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8)); 703 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9)); 704 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10)); 705 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11)); 706 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12)); 707 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13)); 708 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14)); 709 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15)); 710 711 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()); 712 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1)); 713 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2)); 714 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3)); 715 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4)); 716 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5)); 717 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6)); 718 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7)); 719 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8)); 720 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9)); 721 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10)); 722 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11)); 723 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12)); 724 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13)); 725 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14)); 726 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15)); 727 728 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()); 729 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1)); 730 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2)); 731 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3)); 732 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4)); 733 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5)); 734 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6)); 735 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7)); 736 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8)); 737 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9)); 738 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10)); 739 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11)); 740 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12)); 741 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13)); 742 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14)); 743 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15)); 744 745 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()); 746 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1)); 747 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2)); 748 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3)); 749 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4)); 750 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5)); 751 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6)); 752 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7)); 753 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8)); 754 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9)); 755 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10)); 756 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11)); 757 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12)); 758 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13)); 759 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14)); 760 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15)); 761 762 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()); 763 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1)); 764 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2)); 765 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3)); 766 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4)); 767 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5)); 768 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6)); 769 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7)); 770 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8)); 771 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9)); 772 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10)); 773 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11)); 774 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12)); 775 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13)); 776 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14)); 777 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15)); 778 779 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()); 780 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1)); 781 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2)); 782 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3)); 783 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4)); 784 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5)); 785 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6)); 786 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7)); 787 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8)); 788 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9)); 789 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10)); 790 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11)); 791 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12)); 792 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13)); 793 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14)); 794 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15)); 795 796 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()); 797 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1)); 798 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2)); 799 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3)); 800 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4)); 801 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5)); 802 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6)); 803 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7)); 804 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8)); 805 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9)); 806 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10)); 807 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11)); 808 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12)); 809 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13)); 810 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14)); 811 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15)); 812 813 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()); 814 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1)); 815 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2)); 816 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3)); 817 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4)); 818 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5)); 819 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6)); 820 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7)); 821 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8)); 822 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9)); 823 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10)); 824 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11)); 825 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12)); 826 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13)); 827 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14)); 828 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15)); 829 830 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()); 831 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1)); 832 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2)); 833 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3)); 834 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4)); 835 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5)); 836 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6)); 837 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7)); 838 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8)); 839 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9)); 840 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10)); 841 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11)); 842 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12)); 843 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13)); 844 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14)); 845 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15)); 846 847 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()); 848 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1)); 849 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2)); 850 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3)); 851 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4)); 852 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5)); 853 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6)); 854 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7)); 855 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8)); 856 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9)); 857 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10)); 858 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11)); 859 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12)); 860 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13)); 861 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14)); 862 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15)); 863 864 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()); 865 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1)); 866 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2)); 867 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3)); 868 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4)); 869 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5)); 870 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6)); 871 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7)); 872 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8)); 873 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9)); 874 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10)); 875 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11)); 876 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12)); 877 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13)); 878 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14)); 879 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15)); 880 881 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()); 882 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1)); 883 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2)); 884 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3)); 885 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4)); 886 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5)); 887 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6)); 888 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7)); 889 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8)); 890 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9)); 891 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10)); 892 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11)); 893 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12)); 894 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13)); 895 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14)); 896 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15)); 897 898 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()); 899 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1)); 900 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2)); 901 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3)); 902 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4)); 903 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5)); 904 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6)); 905 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7)); 906 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8)); 907 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9)); 908 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10)); 909 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11)); 910 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12)); 911 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13)); 912 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14)); 913 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15)); 914 915 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()); 916 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1)); 917 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2)); 918 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3)); 919 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4)); 920 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5)); 921 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6)); 922 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7)); 923 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8)); 924 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9)); 925 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10)); 926 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11)); 927 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12)); 928 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13)); 929 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14)); 930 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15)); 931 932 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()); 933 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1)); 934 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2)); 935 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3)); 936 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4)); 937 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5)); 938 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6)); 939 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7)); 940 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8)); 941 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9)); 942 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10)); 943 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11)); 944 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12)); 945 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13)); 946 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14)); 947 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15)); 948 949 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()); 950 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1)); 951 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2)); 952 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3)); 953 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4)); 954 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5)); 955 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6)); 956 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7)); 957 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8)); 958 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9)); 959 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10)); 960 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11)); 961 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12)); 962 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13)); 963 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14)); 964 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15)); 965 966 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()); 967 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1)); 968 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2)); 969 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3)); 970 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4)); 971 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5)); 972 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6)); 973 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7)); 974 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8)); 975 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9)); 976 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10)); 977 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11)); 978 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12)); 979 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13)); 980 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14)); 981 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15)); 982 983 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()); 984 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1)); 985 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2)); 986 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3)); 987 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4)); 988 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5)); 989 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6)); 990 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7)); 991 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8)); 992 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9)); 993 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10)); 994 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11)); 995 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12)); 996 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13)); 997 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14)); 998 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15)); 999 1000 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()); 1001 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1)); 1002 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2)); 1003 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3)); 1004 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4)); 1005 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5)); 1006 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6)); 1007 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7)); 1008 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8)); 1009 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9)); 1010 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10)); 1011 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11)); 1012 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12)); 1013 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13)); 1014 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14)); 1015 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15)); 1016 1017 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()); 1018 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1)); 1019 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2)); 1020 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3)); 1021 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4)); 1022 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5)); 1023 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6)); 1024 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7)); 1025 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8)); 1026 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9)); 1027 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10)); 1028 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11)); 1029 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12)); 1030 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13)); 1031 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14)); 1032 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15)); 1033 1034 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()); 1035 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1)); 1036 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2)); 1037 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3)); 1038 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4)); 1039 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5)); 1040 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6)); 1041 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7)); 1042 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8)); 1043 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9)); 1044 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10)); 1045 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11)); 1046 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12)); 1047 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13)); 1048 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14)); 1049 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15)); 1050 1051 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()); 1052 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1)); 1053 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2)); 1054 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3)); 1055 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4)); 1056 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5)); 1057 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6)); 1058 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7)); 1059 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8)); 1060 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9)); 1061 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10)); 1062 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11)); 1063 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12)); 1064 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13)); 1065 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14)); 1066 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15)); 1067 1068 #endif // _LP64 1069 1070 #endif // _WIN64 1071 1072 #ifdef _LP64 1073 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad()); 1074 #else 1075 reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad()); 1076 #endif // _LP64 1077 1078 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1079 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1080 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1081 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1082 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1083 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1084 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1085 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1086 #ifdef _LP64 1087 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1088 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1089 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1090 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1091 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1092 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1093 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1094 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1095 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 1096 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 1097 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 1098 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 1099 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 1100 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 1101 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 1102 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 1103 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 1104 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 1105 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 1106 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 1107 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 1108 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 1109 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 1110 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 1111 #endif 1112 ); 1113 1114 // flags allocation class should be last. 1115 alloc_class chunk2(RFLAGS); 1116 1117 // Singleton class for condition codes 1118 reg_class int_flags(RFLAGS); 1119 1120 // Class for pre evex float registers 1121 reg_class float_reg_legacy(XMM0, 1122 XMM1, 1123 XMM2, 1124 XMM3, 1125 XMM4, 1126 XMM5, 1127 XMM6, 1128 XMM7 1129 #ifdef _LP64 1130 ,XMM8, 1131 XMM9, 1132 XMM10, 1133 XMM11, 1134 XMM12, 1135 XMM13, 1136 XMM14, 1137 XMM15 1138 #endif 1139 ); 1140 1141 // Class for evex float registers 1142 reg_class float_reg_evex(XMM0, 1143 XMM1, 1144 XMM2, 1145 XMM3, 1146 XMM4, 1147 XMM5, 1148 XMM6, 1149 XMM7 1150 #ifdef _LP64 1151 ,XMM8, 1152 XMM9, 1153 XMM10, 1154 XMM11, 1155 XMM12, 1156 XMM13, 1157 XMM14, 1158 XMM15, 1159 XMM16, 1160 XMM17, 1161 XMM18, 1162 XMM19, 1163 XMM20, 1164 XMM21, 1165 XMM22, 1166 XMM23, 1167 XMM24, 1168 XMM25, 1169 XMM26, 1170 XMM27, 1171 XMM28, 1172 XMM29, 1173 XMM30, 1174 XMM31 1175 #endif 1176 ); 1177 1178 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} ); 1179 1180 // Class for pre evex double registers 1181 reg_class double_reg_legacy(XMM0, XMM0b, 1182 XMM1, XMM1b, 1183 XMM2, XMM2b, 1184 XMM3, XMM3b, 1185 XMM4, XMM4b, 1186 XMM5, XMM5b, 1187 XMM6, XMM6b, 1188 XMM7, XMM7b 1189 #ifdef _LP64 1190 ,XMM8, XMM8b, 1191 XMM9, XMM9b, 1192 XMM10, XMM10b, 1193 XMM11, XMM11b, 1194 XMM12, XMM12b, 1195 XMM13, XMM13b, 1196 XMM14, XMM14b, 1197 XMM15, XMM15b 1198 #endif 1199 ); 1200 1201 // Class for evex double registers 1202 reg_class double_reg_evex(XMM0, XMM0b, 1203 XMM1, XMM1b, 1204 XMM2, XMM2b, 1205 XMM3, XMM3b, 1206 XMM4, XMM4b, 1207 XMM5, XMM5b, 1208 XMM6, XMM6b, 1209 XMM7, XMM7b 1210 #ifdef _LP64 1211 ,XMM8, XMM8b, 1212 XMM9, XMM9b, 1213 XMM10, XMM10b, 1214 XMM11, XMM11b, 1215 XMM12, XMM12b, 1216 XMM13, XMM13b, 1217 XMM14, XMM14b, 1218 XMM15, XMM15b, 1219 XMM16, XMM16b, 1220 XMM17, XMM17b, 1221 XMM18, XMM18b, 1222 XMM19, XMM19b, 1223 XMM20, XMM20b, 1224 XMM21, XMM21b, 1225 XMM22, XMM22b, 1226 XMM23, XMM23b, 1227 XMM24, XMM24b, 1228 XMM25, XMM25b, 1229 XMM26, XMM26b, 1230 XMM27, XMM27b, 1231 XMM28, XMM28b, 1232 XMM29, XMM29b, 1233 XMM30, XMM30b, 1234 XMM31, XMM31b 1235 #endif 1236 ); 1237 1238 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} ); 1239 1240 // Class for pre evex 32bit vector registers 1241 reg_class vectors_reg_legacy(XMM0, 1242 XMM1, 1243 XMM2, 1244 XMM3, 1245 XMM4, 1246 XMM5, 1247 XMM6, 1248 XMM7 1249 #ifdef _LP64 1250 ,XMM8, 1251 XMM9, 1252 XMM10, 1253 XMM11, 1254 XMM12, 1255 XMM13, 1256 XMM14, 1257 XMM15 1258 #endif 1259 ); 1260 1261 // Class for evex 32bit vector registers 1262 reg_class vectors_reg_evex(XMM0, 1263 XMM1, 1264 XMM2, 1265 XMM3, 1266 XMM4, 1267 XMM5, 1268 XMM6, 1269 XMM7 1270 #ifdef _LP64 1271 ,XMM8, 1272 XMM9, 1273 XMM10, 1274 XMM11, 1275 XMM12, 1276 XMM13, 1277 XMM14, 1278 XMM15, 1279 XMM16, 1280 XMM17, 1281 XMM18, 1282 XMM19, 1283 XMM20, 1284 XMM21, 1285 XMM22, 1286 XMM23, 1287 XMM24, 1288 XMM25, 1289 XMM26, 1290 XMM27, 1291 XMM28, 1292 XMM29, 1293 XMM30, 1294 XMM31 1295 #endif 1296 ); 1297 1298 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} ); 1299 1300 // Class for all 64bit vector registers 1301 reg_class vectord_reg_legacy(XMM0, XMM0b, 1302 XMM1, XMM1b, 1303 XMM2, XMM2b, 1304 XMM3, XMM3b, 1305 XMM4, XMM4b, 1306 XMM5, XMM5b, 1307 XMM6, XMM6b, 1308 XMM7, XMM7b 1309 #ifdef _LP64 1310 ,XMM8, XMM8b, 1311 XMM9, XMM9b, 1312 XMM10, XMM10b, 1313 XMM11, XMM11b, 1314 XMM12, XMM12b, 1315 XMM13, XMM13b, 1316 XMM14, XMM14b, 1317 XMM15, XMM15b 1318 #endif 1319 ); 1320 1321 // Class for all 64bit vector registers 1322 reg_class vectord_reg_evex(XMM0, XMM0b, 1323 XMM1, XMM1b, 1324 XMM2, XMM2b, 1325 XMM3, XMM3b, 1326 XMM4, XMM4b, 1327 XMM5, XMM5b, 1328 XMM6, XMM6b, 1329 XMM7, XMM7b 1330 #ifdef _LP64 1331 ,XMM8, XMM8b, 1332 XMM9, XMM9b, 1333 XMM10, XMM10b, 1334 XMM11, XMM11b, 1335 XMM12, XMM12b, 1336 XMM13, XMM13b, 1337 XMM14, XMM14b, 1338 XMM15, XMM15b, 1339 XMM16, XMM16b, 1340 XMM17, XMM17b, 1341 XMM18, XMM18b, 1342 XMM19, XMM19b, 1343 XMM20, XMM20b, 1344 XMM21, XMM21b, 1345 XMM22, XMM22b, 1346 XMM23, XMM23b, 1347 XMM24, XMM24b, 1348 XMM25, XMM25b, 1349 XMM26, XMM26b, 1350 XMM27, XMM27b, 1351 XMM28, XMM28b, 1352 XMM29, XMM29b, 1353 XMM30, XMM30b, 1354 XMM31, XMM31b 1355 #endif 1356 ); 1357 1358 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} ); 1359 1360 // Class for all 128bit vector registers 1361 reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, 1362 XMM1, XMM1b, XMM1c, XMM1d, 1363 XMM2, XMM2b, XMM2c, XMM2d, 1364 XMM3, XMM3b, XMM3c, XMM3d, 1365 XMM4, XMM4b, XMM4c, XMM4d, 1366 XMM5, XMM5b, XMM5c, XMM5d, 1367 XMM6, XMM6b, XMM6c, XMM6d, 1368 XMM7, XMM7b, XMM7c, XMM7d 1369 #ifdef _LP64 1370 ,XMM8, XMM8b, XMM8c, XMM8d, 1371 XMM9, XMM9b, XMM9c, XMM9d, 1372 XMM10, XMM10b, XMM10c, XMM10d, 1373 XMM11, XMM11b, XMM11c, XMM11d, 1374 XMM12, XMM12b, XMM12c, XMM12d, 1375 XMM13, XMM13b, XMM13c, XMM13d, 1376 XMM14, XMM14b, XMM14c, XMM14d, 1377 XMM15, XMM15b, XMM15c, XMM15d 1378 #endif 1379 ); 1380 1381 // Class for all 128bit vector registers 1382 reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, 1383 XMM1, XMM1b, XMM1c, XMM1d, 1384 XMM2, XMM2b, XMM2c, XMM2d, 1385 XMM3, XMM3b, XMM3c, XMM3d, 1386 XMM4, XMM4b, XMM4c, XMM4d, 1387 XMM5, XMM5b, XMM5c, XMM5d, 1388 XMM6, XMM6b, XMM6c, XMM6d, 1389 XMM7, XMM7b, XMM7c, XMM7d 1390 #ifdef _LP64 1391 ,XMM8, XMM8b, XMM8c, XMM8d, 1392 XMM9, XMM9b, XMM9c, XMM9d, 1393 XMM10, XMM10b, XMM10c, XMM10d, 1394 XMM11, XMM11b, XMM11c, XMM11d, 1395 XMM12, XMM12b, XMM12c, XMM12d, 1396 XMM13, XMM13b, XMM13c, XMM13d, 1397 XMM14, XMM14b, XMM14c, XMM14d, 1398 XMM15, XMM15b, XMM15c, XMM15d, 1399 XMM16, XMM16b, XMM16c, XMM16d, 1400 XMM17, XMM17b, XMM17c, XMM17d, 1401 XMM18, XMM18b, XMM18c, XMM18d, 1402 XMM19, XMM19b, XMM19c, XMM19d, 1403 XMM20, XMM20b, XMM20c, XMM20d, 1404 XMM21, XMM21b, XMM21c, XMM21d, 1405 XMM22, XMM22b, XMM22c, XMM22d, 1406 XMM23, XMM23b, XMM23c, XMM23d, 1407 XMM24, XMM24b, XMM24c, XMM24d, 1408 XMM25, XMM25b, XMM25c, XMM25d, 1409 XMM26, XMM26b, XMM26c, XMM26d, 1410 XMM27, XMM27b, XMM27c, XMM27d, 1411 XMM28, XMM28b, XMM28c, XMM28d, 1412 XMM29, XMM29b, XMM29c, XMM29d, 1413 XMM30, XMM30b, XMM30c, XMM30d, 1414 XMM31, XMM31b, XMM31c, XMM31d 1415 #endif 1416 ); 1417 1418 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} ); 1419 1420 // Class for all 256bit vector registers 1421 reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 1422 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 1423 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 1424 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 1425 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 1426 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 1427 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 1428 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 1429 #ifdef _LP64 1430 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 1431 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 1432 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 1433 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 1434 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 1435 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1436 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1437 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h 1438 #endif 1439 ); 1440 1441 // Class for all 256bit vector registers 1442 reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 1443 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 1444 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 1445 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 1446 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 1447 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 1448 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 1449 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 1450 #ifdef _LP64 1451 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 1452 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 1453 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 1454 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 1455 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 1456 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1457 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1458 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, 1459 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, 1460 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, 1461 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, 1462 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, 1463 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, 1464 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, 1465 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, 1466 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, 1467 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, 1468 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, 1469 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, 1470 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, 1471 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, 1472 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, 1473 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, 1474 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h 1475 #endif 1476 ); 1477 1478 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} ); 1479 1480 // Class for all 512bit vector registers 1481 reg_class vectorz_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1482 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1483 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1484 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1485 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1486 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1487 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1488 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1489 #ifdef _LP64 1490 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1491 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1492 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1493 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1494 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1495 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1496 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1497 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1498 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 1499 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 1500 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 1501 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 1502 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 1503 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 1504 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 1505 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 1506 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 1507 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 1508 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 1509 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 1510 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 1511 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 1512 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 1513 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 1514 #endif 1515 ); 1516 1517 %} 1518 1519 1520 //----------SOURCE BLOCK------------------------------------------------------- 1521 // This is a block of C++ code which provides values, functions, and 1522 // definitions necessary in the rest of the architecture description 1523 1524 source_hpp %{ 1525 // Header information of the source block. 1526 // Method declarations/definitions which are used outside 1527 // the ad-scope can conveniently be defined here. 1528 // 1529 // To keep related declarations/definitions/uses close together, 1530 // we switch between source %{ }% and source_hpp %{ }% freely as needed. 1531 1532 class NativeJump; 1533 1534 class CallStubImpl { 1535 1536 //-------------------------------------------------------------- 1537 //---< Used for optimization in Compile::shorten_branches >--- 1538 //-------------------------------------------------------------- 1539 1540 public: 1541 // Size of call trampoline stub. 1542 static uint size_call_trampoline() { 1543 return 0; // no call trampolines on this platform 1544 } 1545 1546 // number of relocations needed by a call trampoline stub 1547 static uint reloc_call_trampoline() { 1548 return 0; // no call trampolines on this platform 1549 } 1550 }; 1551 1552 class HandlerImpl { 1553 1554 public: 1555 1556 static int emit_exception_handler(CodeBuffer &cbuf); 1557 static int emit_deopt_handler(CodeBuffer& cbuf); 1558 1559 static uint size_exception_handler() { 1560 // NativeCall instruction size is the same as NativeJump. 1561 // exception handler starts out as jump and can be patched to 1562 // a call be deoptimization. (4932387) 1563 // Note that this value is also credited (in output.cpp) to 1564 // the size of the code section. 1565 return NativeJump::instruction_size; 1566 } 1567 1568 #ifdef _LP64 1569 static uint size_deopt_handler() { 1570 // three 5 byte instructions 1571 return 15; 1572 } 1573 #else 1574 static uint size_deopt_handler() { 1575 // NativeCall instruction size is the same as NativeJump. 1576 // exception handler starts out as jump and can be patched to 1577 // a call be deoptimization. (4932387) 1578 // Note that this value is also credited (in output.cpp) to 1579 // the size of the code section. 1580 return 5 + NativeJump::instruction_size; // pushl(); jmp; 1581 } 1582 #endif 1583 }; 1584 1585 %} // end source_hpp 1586 1587 source %{ 1588 1589 // Emit exception handler code. 1590 // Stuff framesize into a register and call a VM stub routine. 1591 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) { 1592 1593 // Note that the code buffer's insts_mark is always relative to insts. 1594 // That's why we must use the macroassembler to generate a handler. 1595 MacroAssembler _masm(&cbuf); 1596 address base = __ start_a_stub(size_exception_handler()); 1597 if (base == NULL) { 1598 ciEnv::current()->record_failure("CodeCache is full"); 1599 return 0; // CodeBuffer::expand failed 1600 } 1601 int offset = __ offset(); 1602 __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point())); 1603 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); 1604 __ end_a_stub(); 1605 return offset; 1606 } 1607 1608 // Emit deopt handler code. 1609 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) { 1610 1611 // Note that the code buffer's insts_mark is always relative to insts. 1612 // That's why we must use the macroassembler to generate a handler. 1613 MacroAssembler _masm(&cbuf); 1614 address base = __ start_a_stub(size_deopt_handler()); 1615 if (base == NULL) { 1616 ciEnv::current()->record_failure("CodeCache is full"); 1617 return 0; // CodeBuffer::expand failed 1618 } 1619 int offset = __ offset(); 1620 1621 #ifdef _LP64 1622 address the_pc = (address) __ pc(); 1623 Label next; 1624 // push a "the_pc" on the stack without destroying any registers 1625 // as they all may be live. 1626 1627 // push address of "next" 1628 __ call(next, relocInfo::none); // reloc none is fine since it is a disp32 1629 __ bind(next); 1630 // adjust it so it matches "the_pc" 1631 __ subptr(Address(rsp, 0), __ offset() - offset); 1632 #else 1633 InternalAddress here(__ pc()); 1634 __ pushptr(here.addr()); 1635 #endif 1636 1637 __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack())); 1638 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow"); 1639 __ end_a_stub(); 1640 return offset; 1641 } 1642 1643 1644 //============================================================================= 1645 1646 // Float masks come from different places depending on platform. 1647 #ifdef _LP64 1648 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); } 1649 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); } 1650 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); } 1651 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); } 1652 #else 1653 static address float_signmask() { return (address)float_signmask_pool; } 1654 static address float_signflip() { return (address)float_signflip_pool; } 1655 static address double_signmask() { return (address)double_signmask_pool; } 1656 static address double_signflip() { return (address)double_signflip_pool; } 1657 #endif 1658 1659 1660 const bool Matcher::match_rule_supported(int opcode) { 1661 if (!has_match_rule(opcode)) 1662 return false; 1663 1664 bool ret_value = true; 1665 switch (opcode) { 1666 case Op_PopCountI: 1667 case Op_PopCountL: 1668 if (!UsePopCountInstruction) 1669 ret_value = false; 1670 break; 1671 case Op_MulVI: 1672 if ((UseSSE < 4) && (UseAVX < 1)) // only with SSE4_1 or AVX 1673 ret_value = false; 1674 break; 1675 case Op_MulVL: 1676 case Op_MulReductionVL: 1677 if (VM_Version::supports_avx512dq() == false) 1678 ret_value = false; 1679 break; 1680 case Op_AddReductionVL: 1681 if (UseAVX < 3) // only EVEX : vector connectivity becomes an issue here 1682 ret_value = false; 1683 break; 1684 case Op_AddReductionVI: 1685 if (UseSSE < 3) // requires at least SSE3 1686 ret_value = false; 1687 break; 1688 case Op_MulReductionVI: 1689 if (UseSSE < 4) // requires at least SSE4 1690 ret_value = false; 1691 break; 1692 case Op_AddReductionVF: 1693 case Op_AddReductionVD: 1694 case Op_MulReductionVF: 1695 case Op_MulReductionVD: 1696 if (UseSSE < 1) // requires at least SSE 1697 ret_value = false; 1698 break; 1699 case Op_SqrtVD: 1700 if (UseAVX < 1) // enabled for AVX only 1701 ret_value = false; 1702 break; 1703 case Op_CompareAndSwapL: 1704 #ifdef _LP64 1705 case Op_CompareAndSwapP: 1706 #endif 1707 if (!VM_Version::supports_cx8()) 1708 ret_value = false; 1709 break; 1710 case Op_CMoveVD: 1711 if (UseAVX > 2) 1712 ret_value = false; 1713 break; 1714 } 1715 1716 return ret_value; // Per default match rules are supported. 1717 } 1718 1719 const int Matcher::float_pressure(int default_pressure_threshold) { 1720 int float_pressure_threshold = default_pressure_threshold; 1721 #ifdef _LP64 1722 if (UseAVX > 2) { 1723 // Increase pressure threshold on machines with AVX3 which have 1724 // 2x more XMM registers. 1725 float_pressure_threshold = default_pressure_threshold * 2; 1726 } 1727 #endif 1728 return float_pressure_threshold; 1729 } 1730 1731 // Max vector size in bytes. 0 if not supported. 1732 const int Matcher::vector_width_in_bytes(BasicType bt) { 1733 assert(is_java_primitive(bt), "only primitive type vectors"); 1734 if (UseSSE < 2) return 0; 1735 // SSE2 supports 128bit vectors for all types. 1736 // AVX2 supports 256bit vectors for all types. 1737 // AVX2/EVEX supports 512bit vectors for all types. 1738 int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16; 1739 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE. 1740 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE)) 1741 size = (UseAVX > 2) ? 64 : 32; 1742 // Use flag to limit vector size. 1743 size = MIN2(size,(int)MaxVectorSize); 1744 // Minimum 2 values in vector (or 4 for bytes). 1745 switch (bt) { 1746 case T_DOUBLE: 1747 case T_LONG: 1748 if (size < 16) return 0; 1749 break; 1750 case T_FLOAT: 1751 case T_INT: 1752 if (size < 8) return 0; 1753 break; 1754 case T_BOOLEAN: 1755 if (size < 4) return 0; 1756 break; 1757 case T_CHAR: 1758 if (size < 4) return 0; 1759 break; 1760 case T_BYTE: 1761 if (size < 4) return 0; 1762 if ((size > 32) && !VM_Version::supports_avx512bw()) return 0; 1763 break; 1764 case T_SHORT: 1765 if (size < 4) return 0; 1766 if ((size > 16) && !VM_Version::supports_avx512bw()) return 0; 1767 break; 1768 default: 1769 ShouldNotReachHere(); 1770 } 1771 return size; 1772 } 1773 1774 // Limits on vector size (number of elements) loaded into vector. 1775 const int Matcher::max_vector_size(const BasicType bt) { 1776 return vector_width_in_bytes(bt)/type2aelembytes(bt); 1777 } 1778 const int Matcher::min_vector_size(const BasicType bt) { 1779 int max_size = max_vector_size(bt); 1780 // Min size which can be loaded into vector is 4 bytes. 1781 int size = (type2aelembytes(bt) == 1) ? 4 : 2; 1782 return MIN2(size,max_size); 1783 } 1784 1785 // Vector ideal reg corresponding to specidied size in bytes 1786 const int Matcher::vector_ideal_reg(int size) { 1787 assert(MaxVectorSize >= size, ""); 1788 switch(size) { 1789 case 4: return Op_VecS; 1790 case 8: return Op_VecD; 1791 case 16: return Op_VecX; 1792 case 32: return Op_VecY; 1793 case 64: return Op_VecZ; 1794 } 1795 ShouldNotReachHere(); 1796 return 0; 1797 } 1798 1799 // Only lowest bits of xmm reg are used for vector shift count. 1800 const int Matcher::vector_shift_count_ideal_reg(int size) { 1801 return Op_VecS; 1802 } 1803 1804 // x86 supports misaligned vectors store/load. 1805 const bool Matcher::misaligned_vectors_ok() { 1806 return !AlignVector; // can be changed by flag 1807 } 1808 1809 // x86 AES instructions are compatible with SunJCE expanded 1810 // keys, hence we do not need to pass the original key to stubs 1811 const bool Matcher::pass_original_key_for_aes() { 1812 return false; 1813 } 1814 1815 // Helper methods for MachSpillCopyNode::implementation(). 1816 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 1817 int src_hi, int dst_hi, uint ireg, outputStream* st) { 1818 // In 64-bit VM size calculation is very complex. Emitting instructions 1819 // into scratch buffer is used to get size in 64-bit VM. 1820 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 1821 assert(ireg == Op_VecS || // 32bit vector 1822 (src_lo & 1) == 0 && (src_lo + 1) == src_hi && 1823 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi, 1824 "no non-adjacent vector moves" ); 1825 if (cbuf) { 1826 MacroAssembler _masm(cbuf); 1827 int offset = __ offset(); 1828 switch (ireg) { 1829 case Op_VecS: // copy whole register 1830 case Op_VecD: 1831 case Op_VecX: 1832 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1833 break; 1834 case Op_VecY: 1835 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1836 break; 1837 case Op_VecZ: 1838 __ evmovdqul(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2); 1839 break; 1840 default: 1841 ShouldNotReachHere(); 1842 } 1843 int size = __ offset() - offset; 1844 #ifdef ASSERT 1845 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1846 assert(!do_size || size == 4, "incorrect size calculattion"); 1847 #endif 1848 return size; 1849 #ifndef PRODUCT 1850 } else if (!do_size) { 1851 switch (ireg) { 1852 case Op_VecS: 1853 case Op_VecD: 1854 case Op_VecX: 1855 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 1856 break; 1857 case Op_VecY: 1858 case Op_VecZ: 1859 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 1860 break; 1861 default: 1862 ShouldNotReachHere(); 1863 } 1864 #endif 1865 } 1866 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 1867 return (UseAVX > 2) ? 6 : 4; 1868 } 1869 1870 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, 1871 int stack_offset, int reg, uint ireg, outputStream* st) { 1872 // In 64-bit VM size calculation is very complex. Emitting instructions 1873 // into scratch buffer is used to get size in 64-bit VM. 1874 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 1875 if (cbuf) { 1876 MacroAssembler _masm(cbuf); 1877 int offset = __ offset(); 1878 if (is_load) { 1879 switch (ireg) { 1880 case Op_VecS: 1881 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1882 break; 1883 case Op_VecD: 1884 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1885 break; 1886 case Op_VecX: 1887 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1888 break; 1889 case Op_VecY: 1890 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1891 break; 1892 case Op_VecZ: 1893 __ evmovdqul(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2); 1894 break; 1895 default: 1896 ShouldNotReachHere(); 1897 } 1898 } else { // store 1899 switch (ireg) { 1900 case Op_VecS: 1901 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1902 break; 1903 case Op_VecD: 1904 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1905 break; 1906 case Op_VecX: 1907 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1908 break; 1909 case Op_VecY: 1910 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1911 break; 1912 case Op_VecZ: 1913 __ evmovdqul(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2); 1914 break; 1915 default: 1916 ShouldNotReachHere(); 1917 } 1918 } 1919 int size = __ offset() - offset; 1920 #ifdef ASSERT 1921 int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : (UseAVX > 2) ? 6 : 4); 1922 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1923 assert(!do_size || size == (5+offset_size), "incorrect size calculattion"); 1924 #endif 1925 return size; 1926 #ifndef PRODUCT 1927 } else if (!do_size) { 1928 if (is_load) { 1929 switch (ireg) { 1930 case Op_VecS: 1931 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1932 break; 1933 case Op_VecD: 1934 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1935 break; 1936 case Op_VecX: 1937 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1938 break; 1939 case Op_VecY: 1940 case Op_VecZ: 1941 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1942 break; 1943 default: 1944 ShouldNotReachHere(); 1945 } 1946 } else { // store 1947 switch (ireg) { 1948 case Op_VecS: 1949 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1950 break; 1951 case Op_VecD: 1952 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1953 break; 1954 case Op_VecX: 1955 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1956 break; 1957 case Op_VecY: 1958 case Op_VecZ: 1959 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1960 break; 1961 default: 1962 ShouldNotReachHere(); 1963 } 1964 } 1965 #endif 1966 } 1967 bool is_single_byte = false; 1968 int vec_len = 0; 1969 if ((UseAVX > 2) && (stack_offset != 0)) { 1970 switch (ireg) { 1971 case Op_VecS: 1972 case Op_VecD: 1973 case Op_VecX: 1974 break; 1975 case Op_VecY: 1976 vec_len = 1; 1977 break; 1978 case Op_VecZ: 1979 vec_len = 2; 1980 break; 1981 } 1982 is_single_byte = Assembler::query_compressed_disp_byte(stack_offset, true, vec_len, Assembler::EVEX_FVM, Assembler::EVEX_32bit, 0); 1983 } 1984 int offset_size = 0; 1985 int size = 5; 1986 if (UseAVX > 2 ) { 1987 if ((VM_Version::supports_avx512vl() == false) && (vec_len == 2)) { 1988 offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 1989 size += 2; // Need an additional two bytes for EVEX encoding 1990 } else if ((VM_Version::supports_avx512vl() == false) && (vec_len < 2)) { 1991 offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4); 1992 } else { 1993 offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 1994 size += 2; // Need an additional two bytes for EVEX encodding 1995 } 1996 } else { 1997 offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4); 1998 } 1999 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 2000 return size+offset_size; 2001 } 2002 2003 static inline jfloat replicate4_imm(int con, int width) { 2004 // Load a constant of "width" (in bytes) and replicate it to fill 32bit. 2005 assert(width == 1 || width == 2, "only byte or short types here"); 2006 int bit_width = width * 8; 2007 jint val = con; 2008 val &= (1 << bit_width) - 1; // mask off sign bits 2009 while(bit_width < 32) { 2010 val |= (val << bit_width); 2011 bit_width <<= 1; 2012 } 2013 jfloat fval = *((jfloat*) &val); // coerce to float type 2014 return fval; 2015 } 2016 2017 static inline jdouble replicate8_imm(int con, int width) { 2018 // Load a constant of "width" (in bytes) and replicate it to fill 64bit. 2019 assert(width == 1 || width == 2 || width == 4, "only byte, short or int types here"); 2020 int bit_width = width * 8; 2021 jlong val = con; 2022 val &= (((jlong) 1) << bit_width) - 1; // mask off sign bits 2023 while(bit_width < 64) { 2024 val |= (val << bit_width); 2025 bit_width <<= 1; 2026 } 2027 jdouble dval = *((jdouble*) &val); // coerce to double type 2028 return dval; 2029 } 2030 2031 #ifndef PRODUCT 2032 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const { 2033 st->print("nop \t# %d bytes pad for loops and calls", _count); 2034 } 2035 #endif 2036 2037 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const { 2038 MacroAssembler _masm(&cbuf); 2039 __ nop(_count); 2040 } 2041 2042 uint MachNopNode::size(PhaseRegAlloc*) const { 2043 return _count; 2044 } 2045 2046 #ifndef PRODUCT 2047 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const { 2048 st->print("# breakpoint"); 2049 } 2050 #endif 2051 2052 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const { 2053 MacroAssembler _masm(&cbuf); 2054 __ int3(); 2055 } 2056 2057 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const { 2058 return MachNode::size(ra_); 2059 } 2060 2061 %} 2062 2063 encode %{ 2064 2065 enc_class call_epilog %{ 2066 if (VerifyStackAtCalls) { 2067 // Check that stack depth is unchanged: find majik cookie on stack 2068 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word)); 2069 MacroAssembler _masm(&cbuf); 2070 Label L; 2071 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d); 2072 __ jccb(Assembler::equal, L); 2073 // Die if stack mismatch 2074 __ int3(); 2075 __ bind(L); 2076 } 2077 %} 2078 2079 %} 2080 2081 2082 //----------OPERANDS----------------------------------------------------------- 2083 // Operand definitions must precede instruction definitions for correct parsing 2084 // in the ADLC because operands constitute user defined types which are used in 2085 // instruction definitions. 2086 2087 // This one generically applies only for evex, so only one version 2088 operand vecZ() %{ 2089 constraint(ALLOC_IN_RC(vectorz_reg)); 2090 match(VecZ); 2091 2092 format %{ %} 2093 interface(REG_INTER); 2094 %} 2095 2096 // Comparison Code for FP conditional move 2097 operand cmpOp_vcmppd() %{ 2098 match(Bool); 2099 2100 predicate(n->as_Bool()->_test._test != BoolTest::overflow && 2101 n->as_Bool()->_test._test != BoolTest::no_overflow); 2102 format %{ "" %} 2103 interface(COND_INTER) %{ 2104 equal (0x0, "eq"); 2105 less (0x1, "lt"); 2106 less_equal (0x2, "le"); 2107 not_equal (0xC, "ne"); 2108 greater_equal(0xD, "ge"); 2109 greater (0xE, "gt"); 2110 //TODO cannot compile (adlc breaks) without two next lines with error: 2111 // x86_64.ad(13987) Syntax Error: :In operand cmpOp_vcmppd: Do not support this encode constant: ' %{ 2112 // equal' for overflow. 2113 overflow (0x20, "o"); // not really supported by the instruction 2114 no_overflow (0x21, "no"); // not really supported by the instruction 2115 %} 2116 %} 2117 2118 2119 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit) 2120 2121 // ============================================================================ 2122 2123 instruct ShouldNotReachHere() %{ 2124 match(Halt); 2125 format %{ "int3\t# ShouldNotReachHere" %} 2126 ins_encode %{ 2127 __ int3(); 2128 %} 2129 ins_pipe(pipe_slow); 2130 %} 2131 2132 // ============================================================================ 2133 2134 instruct addF_reg(regF dst, regF src) %{ 2135 predicate((UseSSE>=1) && (UseAVX == 0)); 2136 match(Set dst (AddF dst src)); 2137 2138 format %{ "addss $dst, $src" %} 2139 ins_cost(150); 2140 ins_encode %{ 2141 __ addss($dst$$XMMRegister, $src$$XMMRegister); 2142 %} 2143 ins_pipe(pipe_slow); 2144 %} 2145 2146 instruct addF_mem(regF dst, memory src) %{ 2147 predicate((UseSSE>=1) && (UseAVX == 0)); 2148 match(Set dst (AddF dst (LoadF src))); 2149 2150 format %{ "addss $dst, $src" %} 2151 ins_cost(150); 2152 ins_encode %{ 2153 __ addss($dst$$XMMRegister, $src$$Address); 2154 %} 2155 ins_pipe(pipe_slow); 2156 %} 2157 2158 instruct addF_imm(regF dst, immF con) %{ 2159 predicate((UseSSE>=1) && (UseAVX == 0)); 2160 match(Set dst (AddF dst con)); 2161 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2162 ins_cost(150); 2163 ins_encode %{ 2164 __ addss($dst$$XMMRegister, $constantaddress($con)); 2165 %} 2166 ins_pipe(pipe_slow); 2167 %} 2168 2169 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{ 2170 predicate(UseAVX > 0); 2171 match(Set dst (AddF src1 src2)); 2172 2173 format %{ "vaddss $dst, $src1, $src2" %} 2174 ins_cost(150); 2175 ins_encode %{ 2176 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2177 %} 2178 ins_pipe(pipe_slow); 2179 %} 2180 2181 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{ 2182 predicate(UseAVX > 0); 2183 match(Set dst (AddF src1 (LoadF src2))); 2184 2185 format %{ "vaddss $dst, $src1, $src2" %} 2186 ins_cost(150); 2187 ins_encode %{ 2188 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2189 %} 2190 ins_pipe(pipe_slow); 2191 %} 2192 2193 instruct addF_reg_imm(regF dst, regF src, immF con) %{ 2194 predicate(UseAVX > 0); 2195 match(Set dst (AddF src con)); 2196 2197 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2198 ins_cost(150); 2199 ins_encode %{ 2200 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2201 %} 2202 ins_pipe(pipe_slow); 2203 %} 2204 2205 instruct addD_reg(regD dst, regD src) %{ 2206 predicate((UseSSE>=2) && (UseAVX == 0)); 2207 match(Set dst (AddD dst src)); 2208 2209 format %{ "addsd $dst, $src" %} 2210 ins_cost(150); 2211 ins_encode %{ 2212 __ addsd($dst$$XMMRegister, $src$$XMMRegister); 2213 %} 2214 ins_pipe(pipe_slow); 2215 %} 2216 2217 instruct addD_mem(regD dst, memory src) %{ 2218 predicate((UseSSE>=2) && (UseAVX == 0)); 2219 match(Set dst (AddD dst (LoadD src))); 2220 2221 format %{ "addsd $dst, $src" %} 2222 ins_cost(150); 2223 ins_encode %{ 2224 __ addsd($dst$$XMMRegister, $src$$Address); 2225 %} 2226 ins_pipe(pipe_slow); 2227 %} 2228 2229 instruct addD_imm(regD dst, immD con) %{ 2230 predicate((UseSSE>=2) && (UseAVX == 0)); 2231 match(Set dst (AddD dst con)); 2232 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2233 ins_cost(150); 2234 ins_encode %{ 2235 __ addsd($dst$$XMMRegister, $constantaddress($con)); 2236 %} 2237 ins_pipe(pipe_slow); 2238 %} 2239 2240 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{ 2241 predicate(UseAVX > 0); 2242 match(Set dst (AddD src1 src2)); 2243 2244 format %{ "vaddsd $dst, $src1, $src2" %} 2245 ins_cost(150); 2246 ins_encode %{ 2247 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2248 %} 2249 ins_pipe(pipe_slow); 2250 %} 2251 2252 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{ 2253 predicate(UseAVX > 0); 2254 match(Set dst (AddD src1 (LoadD src2))); 2255 2256 format %{ "vaddsd $dst, $src1, $src2" %} 2257 ins_cost(150); 2258 ins_encode %{ 2259 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2260 %} 2261 ins_pipe(pipe_slow); 2262 %} 2263 2264 instruct addD_reg_imm(regD dst, regD src, immD con) %{ 2265 predicate(UseAVX > 0); 2266 match(Set dst (AddD src con)); 2267 2268 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2269 ins_cost(150); 2270 ins_encode %{ 2271 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2272 %} 2273 ins_pipe(pipe_slow); 2274 %} 2275 2276 instruct subF_reg(regF dst, regF src) %{ 2277 predicate((UseSSE>=1) && (UseAVX == 0)); 2278 match(Set dst (SubF dst src)); 2279 2280 format %{ "subss $dst, $src" %} 2281 ins_cost(150); 2282 ins_encode %{ 2283 __ subss($dst$$XMMRegister, $src$$XMMRegister); 2284 %} 2285 ins_pipe(pipe_slow); 2286 %} 2287 2288 instruct subF_mem(regF dst, memory src) %{ 2289 predicate((UseSSE>=1) && (UseAVX == 0)); 2290 match(Set dst (SubF dst (LoadF src))); 2291 2292 format %{ "subss $dst, $src" %} 2293 ins_cost(150); 2294 ins_encode %{ 2295 __ subss($dst$$XMMRegister, $src$$Address); 2296 %} 2297 ins_pipe(pipe_slow); 2298 %} 2299 2300 instruct subF_imm(regF dst, immF con) %{ 2301 predicate((UseSSE>=1) && (UseAVX == 0)); 2302 match(Set dst (SubF dst con)); 2303 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2304 ins_cost(150); 2305 ins_encode %{ 2306 __ subss($dst$$XMMRegister, $constantaddress($con)); 2307 %} 2308 ins_pipe(pipe_slow); 2309 %} 2310 2311 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{ 2312 predicate(UseAVX > 0); 2313 match(Set dst (SubF src1 src2)); 2314 2315 format %{ "vsubss $dst, $src1, $src2" %} 2316 ins_cost(150); 2317 ins_encode %{ 2318 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2319 %} 2320 ins_pipe(pipe_slow); 2321 %} 2322 2323 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{ 2324 predicate(UseAVX > 0); 2325 match(Set dst (SubF src1 (LoadF src2))); 2326 2327 format %{ "vsubss $dst, $src1, $src2" %} 2328 ins_cost(150); 2329 ins_encode %{ 2330 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2331 %} 2332 ins_pipe(pipe_slow); 2333 %} 2334 2335 instruct subF_reg_imm(regF dst, regF src, immF con) %{ 2336 predicate(UseAVX > 0); 2337 match(Set dst (SubF src con)); 2338 2339 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2340 ins_cost(150); 2341 ins_encode %{ 2342 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2343 %} 2344 ins_pipe(pipe_slow); 2345 %} 2346 2347 instruct subD_reg(regD dst, regD src) %{ 2348 predicate((UseSSE>=2) && (UseAVX == 0)); 2349 match(Set dst (SubD dst src)); 2350 2351 format %{ "subsd $dst, $src" %} 2352 ins_cost(150); 2353 ins_encode %{ 2354 __ subsd($dst$$XMMRegister, $src$$XMMRegister); 2355 %} 2356 ins_pipe(pipe_slow); 2357 %} 2358 2359 instruct subD_mem(regD dst, memory src) %{ 2360 predicate((UseSSE>=2) && (UseAVX == 0)); 2361 match(Set dst (SubD dst (LoadD src))); 2362 2363 format %{ "subsd $dst, $src" %} 2364 ins_cost(150); 2365 ins_encode %{ 2366 __ subsd($dst$$XMMRegister, $src$$Address); 2367 %} 2368 ins_pipe(pipe_slow); 2369 %} 2370 2371 instruct subD_imm(regD dst, immD con) %{ 2372 predicate((UseSSE>=2) && (UseAVX == 0)); 2373 match(Set dst (SubD dst con)); 2374 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2375 ins_cost(150); 2376 ins_encode %{ 2377 __ subsd($dst$$XMMRegister, $constantaddress($con)); 2378 %} 2379 ins_pipe(pipe_slow); 2380 %} 2381 2382 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{ 2383 predicate(UseAVX > 0); 2384 match(Set dst (SubD src1 src2)); 2385 2386 format %{ "vsubsd $dst, $src1, $src2" %} 2387 ins_cost(150); 2388 ins_encode %{ 2389 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2390 %} 2391 ins_pipe(pipe_slow); 2392 %} 2393 2394 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{ 2395 predicate(UseAVX > 0); 2396 match(Set dst (SubD src1 (LoadD src2))); 2397 2398 format %{ "vsubsd $dst, $src1, $src2" %} 2399 ins_cost(150); 2400 ins_encode %{ 2401 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2402 %} 2403 ins_pipe(pipe_slow); 2404 %} 2405 2406 instruct subD_reg_imm(regD dst, regD src, immD con) %{ 2407 predicate(UseAVX > 0); 2408 match(Set dst (SubD src con)); 2409 2410 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2411 ins_cost(150); 2412 ins_encode %{ 2413 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2414 %} 2415 ins_pipe(pipe_slow); 2416 %} 2417 2418 instruct mulF_reg(regF dst, regF src) %{ 2419 predicate((UseSSE>=1) && (UseAVX == 0)); 2420 match(Set dst (MulF dst src)); 2421 2422 format %{ "mulss $dst, $src" %} 2423 ins_cost(150); 2424 ins_encode %{ 2425 __ mulss($dst$$XMMRegister, $src$$XMMRegister); 2426 %} 2427 ins_pipe(pipe_slow); 2428 %} 2429 2430 instruct mulF_mem(regF dst, memory src) %{ 2431 predicate((UseSSE>=1) && (UseAVX == 0)); 2432 match(Set dst (MulF dst (LoadF src))); 2433 2434 format %{ "mulss $dst, $src" %} 2435 ins_cost(150); 2436 ins_encode %{ 2437 __ mulss($dst$$XMMRegister, $src$$Address); 2438 %} 2439 ins_pipe(pipe_slow); 2440 %} 2441 2442 instruct mulF_imm(regF dst, immF con) %{ 2443 predicate((UseSSE>=1) && (UseAVX == 0)); 2444 match(Set dst (MulF dst con)); 2445 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2446 ins_cost(150); 2447 ins_encode %{ 2448 __ mulss($dst$$XMMRegister, $constantaddress($con)); 2449 %} 2450 ins_pipe(pipe_slow); 2451 %} 2452 2453 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{ 2454 predicate(UseAVX > 0); 2455 match(Set dst (MulF src1 src2)); 2456 2457 format %{ "vmulss $dst, $src1, $src2" %} 2458 ins_cost(150); 2459 ins_encode %{ 2460 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2461 %} 2462 ins_pipe(pipe_slow); 2463 %} 2464 2465 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{ 2466 predicate(UseAVX > 0); 2467 match(Set dst (MulF src1 (LoadF src2))); 2468 2469 format %{ "vmulss $dst, $src1, $src2" %} 2470 ins_cost(150); 2471 ins_encode %{ 2472 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2473 %} 2474 ins_pipe(pipe_slow); 2475 %} 2476 2477 instruct mulF_reg_imm(regF dst, regF src, immF con) %{ 2478 predicate(UseAVX > 0); 2479 match(Set dst (MulF src con)); 2480 2481 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2482 ins_cost(150); 2483 ins_encode %{ 2484 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2485 %} 2486 ins_pipe(pipe_slow); 2487 %} 2488 2489 instruct mulD_reg(regD dst, regD src) %{ 2490 predicate((UseSSE>=2) && (UseAVX == 0)); 2491 match(Set dst (MulD dst src)); 2492 2493 format %{ "mulsd $dst, $src" %} 2494 ins_cost(150); 2495 ins_encode %{ 2496 __ mulsd($dst$$XMMRegister, $src$$XMMRegister); 2497 %} 2498 ins_pipe(pipe_slow); 2499 %} 2500 2501 instruct mulD_mem(regD dst, memory src) %{ 2502 predicate((UseSSE>=2) && (UseAVX == 0)); 2503 match(Set dst (MulD dst (LoadD src))); 2504 2505 format %{ "mulsd $dst, $src" %} 2506 ins_cost(150); 2507 ins_encode %{ 2508 __ mulsd($dst$$XMMRegister, $src$$Address); 2509 %} 2510 ins_pipe(pipe_slow); 2511 %} 2512 2513 instruct mulD_imm(regD dst, immD con) %{ 2514 predicate((UseSSE>=2) && (UseAVX == 0)); 2515 match(Set dst (MulD dst con)); 2516 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2517 ins_cost(150); 2518 ins_encode %{ 2519 __ mulsd($dst$$XMMRegister, $constantaddress($con)); 2520 %} 2521 ins_pipe(pipe_slow); 2522 %} 2523 2524 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{ 2525 predicate(UseAVX > 0); 2526 match(Set dst (MulD src1 src2)); 2527 2528 format %{ "vmulsd $dst, $src1, $src2" %} 2529 ins_cost(150); 2530 ins_encode %{ 2531 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2532 %} 2533 ins_pipe(pipe_slow); 2534 %} 2535 2536 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{ 2537 predicate(UseAVX > 0); 2538 match(Set dst (MulD src1 (LoadD src2))); 2539 2540 format %{ "vmulsd $dst, $src1, $src2" %} 2541 ins_cost(150); 2542 ins_encode %{ 2543 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2544 %} 2545 ins_pipe(pipe_slow); 2546 %} 2547 2548 instruct mulD_reg_imm(regD dst, regD src, immD con) %{ 2549 predicate(UseAVX > 0); 2550 match(Set dst (MulD src con)); 2551 2552 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2553 ins_cost(150); 2554 ins_encode %{ 2555 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2556 %} 2557 ins_pipe(pipe_slow); 2558 %} 2559 2560 instruct divF_reg(regF dst, regF src) %{ 2561 predicate((UseSSE>=1) && (UseAVX == 0)); 2562 match(Set dst (DivF dst src)); 2563 2564 format %{ "divss $dst, $src" %} 2565 ins_cost(150); 2566 ins_encode %{ 2567 __ divss($dst$$XMMRegister, $src$$XMMRegister); 2568 %} 2569 ins_pipe(pipe_slow); 2570 %} 2571 2572 instruct divF_mem(regF dst, memory src) %{ 2573 predicate((UseSSE>=1) && (UseAVX == 0)); 2574 match(Set dst (DivF dst (LoadF src))); 2575 2576 format %{ "divss $dst, $src" %} 2577 ins_cost(150); 2578 ins_encode %{ 2579 __ divss($dst$$XMMRegister, $src$$Address); 2580 %} 2581 ins_pipe(pipe_slow); 2582 %} 2583 2584 instruct divF_imm(regF dst, immF con) %{ 2585 predicate((UseSSE>=1) && (UseAVX == 0)); 2586 match(Set dst (DivF dst con)); 2587 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2588 ins_cost(150); 2589 ins_encode %{ 2590 __ divss($dst$$XMMRegister, $constantaddress($con)); 2591 %} 2592 ins_pipe(pipe_slow); 2593 %} 2594 2595 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{ 2596 predicate(UseAVX > 0); 2597 match(Set dst (DivF src1 src2)); 2598 2599 format %{ "vdivss $dst, $src1, $src2" %} 2600 ins_cost(150); 2601 ins_encode %{ 2602 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2603 %} 2604 ins_pipe(pipe_slow); 2605 %} 2606 2607 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{ 2608 predicate(UseAVX > 0); 2609 match(Set dst (DivF src1 (LoadF src2))); 2610 2611 format %{ "vdivss $dst, $src1, $src2" %} 2612 ins_cost(150); 2613 ins_encode %{ 2614 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2615 %} 2616 ins_pipe(pipe_slow); 2617 %} 2618 2619 instruct divF_reg_imm(regF dst, regF src, immF con) %{ 2620 predicate(UseAVX > 0); 2621 match(Set dst (DivF src con)); 2622 2623 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2624 ins_cost(150); 2625 ins_encode %{ 2626 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2627 %} 2628 ins_pipe(pipe_slow); 2629 %} 2630 2631 instruct divD_reg(regD dst, regD src) %{ 2632 predicate((UseSSE>=2) && (UseAVX == 0)); 2633 match(Set dst (DivD dst src)); 2634 2635 format %{ "divsd $dst, $src" %} 2636 ins_cost(150); 2637 ins_encode %{ 2638 __ divsd($dst$$XMMRegister, $src$$XMMRegister); 2639 %} 2640 ins_pipe(pipe_slow); 2641 %} 2642 2643 instruct divD_mem(regD dst, memory src) %{ 2644 predicate((UseSSE>=2) && (UseAVX == 0)); 2645 match(Set dst (DivD dst (LoadD src))); 2646 2647 format %{ "divsd $dst, $src" %} 2648 ins_cost(150); 2649 ins_encode %{ 2650 __ divsd($dst$$XMMRegister, $src$$Address); 2651 %} 2652 ins_pipe(pipe_slow); 2653 %} 2654 2655 instruct divD_imm(regD dst, immD con) %{ 2656 predicate((UseSSE>=2) && (UseAVX == 0)); 2657 match(Set dst (DivD dst con)); 2658 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2659 ins_cost(150); 2660 ins_encode %{ 2661 __ divsd($dst$$XMMRegister, $constantaddress($con)); 2662 %} 2663 ins_pipe(pipe_slow); 2664 %} 2665 2666 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{ 2667 predicate(UseAVX > 0); 2668 match(Set dst (DivD src1 src2)); 2669 2670 format %{ "vdivsd $dst, $src1, $src2" %} 2671 ins_cost(150); 2672 ins_encode %{ 2673 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2674 %} 2675 ins_pipe(pipe_slow); 2676 %} 2677 2678 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{ 2679 predicate(UseAVX > 0); 2680 match(Set dst (DivD src1 (LoadD src2))); 2681 2682 format %{ "vdivsd $dst, $src1, $src2" %} 2683 ins_cost(150); 2684 ins_encode %{ 2685 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2686 %} 2687 ins_pipe(pipe_slow); 2688 %} 2689 2690 instruct divD_reg_imm(regD dst, regD src, immD con) %{ 2691 predicate(UseAVX > 0); 2692 match(Set dst (DivD src con)); 2693 2694 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2695 ins_cost(150); 2696 ins_encode %{ 2697 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2698 %} 2699 ins_pipe(pipe_slow); 2700 %} 2701 2702 instruct absF_reg(regF dst) %{ 2703 predicate((UseSSE>=1) && (UseAVX == 0)); 2704 match(Set dst (AbsF dst)); 2705 ins_cost(150); 2706 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %} 2707 ins_encode %{ 2708 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask())); 2709 %} 2710 ins_pipe(pipe_slow); 2711 %} 2712 2713 instruct absF_reg_reg(regF dst, regF src) %{ 2714 predicate(UseAVX > 0); 2715 match(Set dst (AbsF src)); 2716 ins_cost(150); 2717 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 2718 ins_encode %{ 2719 int vector_len = 0; 2720 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 2721 ExternalAddress(float_signmask()), vector_len); 2722 %} 2723 ins_pipe(pipe_slow); 2724 %} 2725 2726 instruct absD_reg(regD dst) %{ 2727 predicate((UseSSE>=2) && (UseAVX == 0)); 2728 match(Set dst (AbsD dst)); 2729 ins_cost(150); 2730 format %{ "andpd $dst, [0x7fffffffffffffff]\t" 2731 "# abs double by sign masking" %} 2732 ins_encode %{ 2733 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask())); 2734 %} 2735 ins_pipe(pipe_slow); 2736 %} 2737 2738 instruct absD_reg_reg(regD dst, regD src) %{ 2739 predicate(UseAVX > 0); 2740 match(Set dst (AbsD src)); 2741 ins_cost(150); 2742 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 2743 "# abs double by sign masking" %} 2744 ins_encode %{ 2745 int vector_len = 0; 2746 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 2747 ExternalAddress(double_signmask()), vector_len); 2748 %} 2749 ins_pipe(pipe_slow); 2750 %} 2751 2752 instruct negF_reg(regF dst) %{ 2753 predicate((UseSSE>=1) && (UseAVX == 0)); 2754 match(Set dst (NegF dst)); 2755 ins_cost(150); 2756 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %} 2757 ins_encode %{ 2758 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip())); 2759 %} 2760 ins_pipe(pipe_slow); 2761 %} 2762 2763 instruct negF_reg_reg(regF dst, regF src) %{ 2764 predicate(UseAVX > 0); 2765 match(Set dst (NegF src)); 2766 ins_cost(150); 2767 format %{ "vnegatess $dst, $src, [0x80000000]\t# neg float by sign flipping" %} 2768 ins_encode %{ 2769 __ vnegatess($dst$$XMMRegister, $src$$XMMRegister, 2770 ExternalAddress(float_signflip())); 2771 %} 2772 ins_pipe(pipe_slow); 2773 %} 2774 2775 instruct negD_reg(regD dst) %{ 2776 predicate((UseSSE>=2) && (UseAVX == 0)); 2777 match(Set dst (NegD dst)); 2778 ins_cost(150); 2779 format %{ "xorpd $dst, [0x8000000000000000]\t" 2780 "# neg double by sign flipping" %} 2781 ins_encode %{ 2782 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip())); 2783 %} 2784 ins_pipe(pipe_slow); 2785 %} 2786 2787 instruct negD_reg_reg(regD dst, regD src) %{ 2788 predicate(UseAVX > 0); 2789 match(Set dst (NegD src)); 2790 ins_cost(150); 2791 format %{ "vnegatess $dst, $src, [0x8000000000000000]\t" 2792 "# neg double by sign flipping" %} 2793 ins_encode %{ 2794 __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister, 2795 ExternalAddress(double_signflip())); 2796 %} 2797 ins_pipe(pipe_slow); 2798 %} 2799 2800 instruct sqrtF_reg(regF dst, regF src) %{ 2801 predicate(UseSSE>=1); 2802 match(Set dst (ConvD2F (SqrtD (ConvF2D src)))); 2803 2804 format %{ "sqrtss $dst, $src" %} 2805 ins_cost(150); 2806 ins_encode %{ 2807 __ sqrtss($dst$$XMMRegister, $src$$XMMRegister); 2808 %} 2809 ins_pipe(pipe_slow); 2810 %} 2811 2812 instruct sqrtF_mem(regF dst, memory src) %{ 2813 predicate(UseSSE>=1); 2814 match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF src))))); 2815 2816 format %{ "sqrtss $dst, $src" %} 2817 ins_cost(150); 2818 ins_encode %{ 2819 __ sqrtss($dst$$XMMRegister, $src$$Address); 2820 %} 2821 ins_pipe(pipe_slow); 2822 %} 2823 2824 instruct sqrtF_imm(regF dst, immF con) %{ 2825 predicate(UseSSE>=1); 2826 match(Set dst (ConvD2F (SqrtD (ConvF2D con)))); 2827 format %{ "sqrtss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2828 ins_cost(150); 2829 ins_encode %{ 2830 __ sqrtss($dst$$XMMRegister, $constantaddress($con)); 2831 %} 2832 ins_pipe(pipe_slow); 2833 %} 2834 2835 instruct sqrtD_reg(regD dst, regD src) %{ 2836 predicate(UseSSE>=2); 2837 match(Set dst (SqrtD src)); 2838 2839 format %{ "sqrtsd $dst, $src" %} 2840 ins_cost(150); 2841 ins_encode %{ 2842 __ sqrtsd($dst$$XMMRegister, $src$$XMMRegister); 2843 %} 2844 ins_pipe(pipe_slow); 2845 %} 2846 2847 instruct sqrtD_mem(regD dst, memory src) %{ 2848 predicate(UseSSE>=2); 2849 match(Set dst (SqrtD (LoadD src))); 2850 2851 format %{ "sqrtsd $dst, $src" %} 2852 ins_cost(150); 2853 ins_encode %{ 2854 __ sqrtsd($dst$$XMMRegister, $src$$Address); 2855 %} 2856 ins_pipe(pipe_slow); 2857 %} 2858 2859 instruct sqrtD_imm(regD dst, immD con) %{ 2860 predicate(UseSSE>=2); 2861 match(Set dst (SqrtD con)); 2862 format %{ "sqrtsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2863 ins_cost(150); 2864 ins_encode %{ 2865 __ sqrtsd($dst$$XMMRegister, $constantaddress($con)); 2866 %} 2867 ins_pipe(pipe_slow); 2868 %} 2869 2870 // ====================VECTOR INSTRUCTIONS===================================== 2871 2872 // Load vectors (4 bytes long) 2873 instruct loadV4(vecS dst, memory mem) %{ 2874 predicate(n->as_LoadVector()->memory_size() == 4); 2875 match(Set dst (LoadVector mem)); 2876 ins_cost(125); 2877 format %{ "movd $dst,$mem\t! load vector (4 bytes)" %} 2878 ins_encode %{ 2879 __ movdl($dst$$XMMRegister, $mem$$Address); 2880 %} 2881 ins_pipe( pipe_slow ); 2882 %} 2883 2884 // Load vectors (8 bytes long) 2885 instruct loadV8(vecD dst, memory mem) %{ 2886 predicate(n->as_LoadVector()->memory_size() == 8); 2887 match(Set dst (LoadVector mem)); 2888 ins_cost(125); 2889 format %{ "movq $dst,$mem\t! load vector (8 bytes)" %} 2890 ins_encode %{ 2891 __ movq($dst$$XMMRegister, $mem$$Address); 2892 %} 2893 ins_pipe( pipe_slow ); 2894 %} 2895 2896 // Load vectors (16 bytes long) 2897 instruct loadV16(vecX dst, memory mem) %{ 2898 predicate(n->as_LoadVector()->memory_size() == 16); 2899 match(Set dst (LoadVector mem)); 2900 ins_cost(125); 2901 format %{ "movdqu $dst,$mem\t! load vector (16 bytes)" %} 2902 ins_encode %{ 2903 __ movdqu($dst$$XMMRegister, $mem$$Address); 2904 %} 2905 ins_pipe( pipe_slow ); 2906 %} 2907 2908 // Load vectors (32 bytes long) 2909 instruct loadV32(vecY dst, memory mem) %{ 2910 predicate(n->as_LoadVector()->memory_size() == 32); 2911 match(Set dst (LoadVector mem)); 2912 ins_cost(125); 2913 format %{ "vmovdqu $dst,$mem\t! load vector (32 bytes)" %} 2914 ins_encode %{ 2915 __ vmovdqu($dst$$XMMRegister, $mem$$Address); 2916 %} 2917 ins_pipe( pipe_slow ); 2918 %} 2919 2920 // Load vectors (64 bytes long) 2921 instruct loadV64(vecZ dst, memory mem) %{ 2922 predicate(n->as_LoadVector()->memory_size() == 64); 2923 match(Set dst (LoadVector mem)); 2924 ins_cost(125); 2925 format %{ "vmovdqu $dst k0,$mem\t! load vector (64 bytes)" %} 2926 ins_encode %{ 2927 int vector_len = 2; 2928 __ evmovdqul($dst$$XMMRegister, $mem$$Address, vector_len); 2929 %} 2930 ins_pipe( pipe_slow ); 2931 %} 2932 2933 // Store vectors 2934 instruct storeV4(memory mem, vecS src) %{ 2935 predicate(n->as_StoreVector()->memory_size() == 4); 2936 match(Set mem (StoreVector mem src)); 2937 ins_cost(145); 2938 format %{ "movd $mem,$src\t! store vector (4 bytes)" %} 2939 ins_encode %{ 2940 __ movdl($mem$$Address, $src$$XMMRegister); 2941 %} 2942 ins_pipe( pipe_slow ); 2943 %} 2944 2945 instruct storeV8(memory mem, vecD src) %{ 2946 predicate(n->as_StoreVector()->memory_size() == 8); 2947 match(Set mem (StoreVector mem src)); 2948 ins_cost(145); 2949 format %{ "movq $mem,$src\t! store vector (8 bytes)" %} 2950 ins_encode %{ 2951 __ movq($mem$$Address, $src$$XMMRegister); 2952 %} 2953 ins_pipe( pipe_slow ); 2954 %} 2955 2956 instruct storeV16(memory mem, vecX src) %{ 2957 predicate(n->as_StoreVector()->memory_size() == 16); 2958 match(Set mem (StoreVector mem src)); 2959 ins_cost(145); 2960 format %{ "movdqu $mem,$src\t! store vector (16 bytes)" %} 2961 ins_encode %{ 2962 __ movdqu($mem$$Address, $src$$XMMRegister); 2963 %} 2964 ins_pipe( pipe_slow ); 2965 %} 2966 2967 instruct storeV32(memory mem, vecY src) %{ 2968 predicate(n->as_StoreVector()->memory_size() == 32); 2969 match(Set mem (StoreVector mem src)); 2970 ins_cost(145); 2971 format %{ "vmovdqu $mem,$src\t! store vector (32 bytes)" %} 2972 ins_encode %{ 2973 __ vmovdqu($mem$$Address, $src$$XMMRegister); 2974 %} 2975 ins_pipe( pipe_slow ); 2976 %} 2977 2978 instruct storeV64(memory mem, vecZ src) %{ 2979 predicate(n->as_StoreVector()->memory_size() == 64); 2980 match(Set mem (StoreVector mem src)); 2981 ins_cost(145); 2982 format %{ "vmovdqu $mem k0,$src\t! store vector (64 bytes)" %} 2983 ins_encode %{ 2984 int vector_len = 2; 2985 __ evmovdqul($mem$$Address, $src$$XMMRegister, vector_len); 2986 %} 2987 ins_pipe( pipe_slow ); 2988 %} 2989 2990 // ====================LEGACY REPLICATE======================================= 2991 2992 instruct Repl4B_mem(vecS dst, memory mem) %{ 2993 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 2994 match(Set dst (ReplicateB (LoadB mem))); 2995 format %{ "punpcklbw $dst,$mem\n\t" 2996 "pshuflw $dst,$dst,0x00\t! replicate4B" %} 2997 ins_encode %{ 2998 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 2999 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3000 %} 3001 ins_pipe( pipe_slow ); 3002 %} 3003 3004 instruct Repl8B_mem(vecD dst, memory mem) %{ 3005 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3006 match(Set dst (ReplicateB (LoadB mem))); 3007 format %{ "punpcklbw $dst,$mem\n\t" 3008 "pshuflw $dst,$dst,0x00\t! replicate8B" %} 3009 ins_encode %{ 3010 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 3011 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3012 %} 3013 ins_pipe( pipe_slow ); 3014 %} 3015 3016 instruct Repl16B(vecX dst, rRegI src) %{ 3017 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3018 match(Set dst (ReplicateB src)); 3019 format %{ "movd $dst,$src\n\t" 3020 "punpcklbw $dst,$dst\n\t" 3021 "pshuflw $dst,$dst,0x00\n\t" 3022 "punpcklqdq $dst,$dst\t! replicate16B" %} 3023 ins_encode %{ 3024 __ movdl($dst$$XMMRegister, $src$$Register); 3025 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3026 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3027 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3028 %} 3029 ins_pipe( pipe_slow ); 3030 %} 3031 3032 instruct Repl16B_mem(vecX dst, memory mem) %{ 3033 predicate(n->as_Vector()->length() == 16 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3034 match(Set dst (ReplicateB (LoadB mem))); 3035 format %{ "punpcklbw $dst,$mem\n\t" 3036 "pshuflw $dst,$dst,0x00\n\t" 3037 "punpcklqdq $dst,$dst\t! replicate16B" %} 3038 ins_encode %{ 3039 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 3040 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3041 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3042 %} 3043 ins_pipe( pipe_slow ); 3044 %} 3045 3046 instruct Repl32B(vecY dst, rRegI src) %{ 3047 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3048 match(Set dst (ReplicateB src)); 3049 format %{ "movd $dst,$src\n\t" 3050 "punpcklbw $dst,$dst\n\t" 3051 "pshuflw $dst,$dst,0x00\n\t" 3052 "punpcklqdq $dst,$dst\n\t" 3053 "vinserti128h $dst,$dst,$dst\t! replicate32B" %} 3054 ins_encode %{ 3055 __ movdl($dst$$XMMRegister, $src$$Register); 3056 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3057 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3058 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3059 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3060 %} 3061 ins_pipe( pipe_slow ); 3062 %} 3063 3064 instruct Repl32B_mem(vecY dst, memory mem) %{ 3065 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3066 match(Set dst (ReplicateB (LoadB mem))); 3067 format %{ "punpcklbw $dst,$mem\n\t" 3068 "pshuflw $dst,$dst,0x00\n\t" 3069 "punpcklqdq $dst,$dst\n\t" 3070 "vinserti128h $dst,$dst,$dst\t! replicate32B" %} 3071 ins_encode %{ 3072 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 3073 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3074 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3075 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3076 %} 3077 ins_pipe( pipe_slow ); 3078 %} 3079 3080 instruct Repl16B_imm(vecX dst, immI con) %{ 3081 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3082 match(Set dst (ReplicateB con)); 3083 format %{ "movq $dst,[$constantaddress]\n\t" 3084 "punpcklqdq $dst,$dst\t! replicate16B($con)" %} 3085 ins_encode %{ 3086 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3087 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3088 %} 3089 ins_pipe( pipe_slow ); 3090 %} 3091 3092 instruct Repl32B_imm(vecY dst, immI con) %{ 3093 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3094 match(Set dst (ReplicateB con)); 3095 format %{ "movq $dst,[$constantaddress]\n\t" 3096 "punpcklqdq $dst,$dst\n\t" 3097 "vinserti128h $dst,$dst,$dst\t! lreplicate32B($con)" %} 3098 ins_encode %{ 3099 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3100 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3101 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3102 %} 3103 ins_pipe( pipe_slow ); 3104 %} 3105 3106 instruct Repl4S(vecD dst, rRegI src) %{ 3107 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vlbw()); 3108 match(Set dst (ReplicateS src)); 3109 format %{ "movd $dst,$src\n\t" 3110 "pshuflw $dst,$dst,0x00\t! replicate4S" %} 3111 ins_encode %{ 3112 __ movdl($dst$$XMMRegister, $src$$Register); 3113 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3114 %} 3115 ins_pipe( pipe_slow ); 3116 %} 3117 3118 instruct Repl4S_mem(vecD dst, memory mem) %{ 3119 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3120 match(Set dst (ReplicateS (LoadS mem))); 3121 format %{ "pshuflw $dst,$mem,0x00\t! replicate4S" %} 3122 ins_encode %{ 3123 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3124 %} 3125 ins_pipe( pipe_slow ); 3126 %} 3127 3128 instruct Repl8S(vecX dst, rRegI src) %{ 3129 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vlbw()); 3130 match(Set dst (ReplicateS src)); 3131 format %{ "movd $dst,$src\n\t" 3132 "pshuflw $dst,$dst,0x00\n\t" 3133 "punpcklqdq $dst,$dst\t! replicate8S" %} 3134 ins_encode %{ 3135 __ movdl($dst$$XMMRegister, $src$$Register); 3136 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3137 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3138 %} 3139 ins_pipe( pipe_slow ); 3140 %} 3141 3142 instruct Repl8S_mem(vecX dst, memory mem) %{ 3143 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3144 match(Set dst (ReplicateS (LoadS mem))); 3145 format %{ "pshuflw $dst,$mem,0x00\n\t" 3146 "punpcklqdq $dst,$dst\t! replicate8S" %} 3147 ins_encode %{ 3148 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3149 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3150 %} 3151 ins_pipe( pipe_slow ); 3152 %} 3153 3154 instruct Repl8S_imm(vecX dst, immI con) %{ 3155 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vlbw()); 3156 match(Set dst (ReplicateS con)); 3157 format %{ "movq $dst,[$constantaddress]\n\t" 3158 "punpcklqdq $dst,$dst\t! replicate8S($con)" %} 3159 ins_encode %{ 3160 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3161 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3162 %} 3163 ins_pipe( pipe_slow ); 3164 %} 3165 3166 instruct Repl16S(vecY dst, rRegI src) %{ 3167 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3168 match(Set dst (ReplicateS src)); 3169 format %{ "movd $dst,$src\n\t" 3170 "pshuflw $dst,$dst,0x00\n\t" 3171 "punpcklqdq $dst,$dst\n\t" 3172 "vinserti128h $dst,$dst,$dst\t! replicate16S" %} 3173 ins_encode %{ 3174 __ movdl($dst$$XMMRegister, $src$$Register); 3175 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3176 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3177 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3178 %} 3179 ins_pipe( pipe_slow ); 3180 %} 3181 3182 instruct Repl16S_mem(vecY dst, memory mem) %{ 3183 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3184 match(Set dst (ReplicateS (LoadS mem))); 3185 format %{ "pshuflw $dst,$mem,0x00\n\t" 3186 "punpcklqdq $dst,$dst\n\t" 3187 "vinserti128h $dst,$dst,$dst\t! replicate16S" %} 3188 ins_encode %{ 3189 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3190 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3191 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3192 %} 3193 ins_pipe( pipe_slow ); 3194 %} 3195 3196 instruct Repl16S_imm(vecY dst, immI con) %{ 3197 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3198 match(Set dst (ReplicateS con)); 3199 format %{ "movq $dst,[$constantaddress]\n\t" 3200 "punpcklqdq $dst,$dst\n\t" 3201 "vinserti128h $dst,$dst,$dst\t! replicate16S($con)" %} 3202 ins_encode %{ 3203 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3204 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3205 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3206 %} 3207 ins_pipe( pipe_slow ); 3208 %} 3209 3210 instruct Repl4I(vecX dst, rRegI src) %{ 3211 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3212 match(Set dst (ReplicateI src)); 3213 format %{ "movd $dst,$src\n\t" 3214 "pshufd $dst,$dst,0x00\t! replicate4I" %} 3215 ins_encode %{ 3216 __ movdl($dst$$XMMRegister, $src$$Register); 3217 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3218 %} 3219 ins_pipe( pipe_slow ); 3220 %} 3221 3222 instruct Repl4I_mem(vecX dst, memory mem) %{ 3223 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3224 match(Set dst (ReplicateI (LoadI mem))); 3225 format %{ "pshufd $dst,$mem,0x00\t! replicate4I" %} 3226 ins_encode %{ 3227 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3228 %} 3229 ins_pipe( pipe_slow ); 3230 %} 3231 3232 instruct Repl8I(vecY dst, rRegI src) %{ 3233 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3234 match(Set dst (ReplicateI src)); 3235 format %{ "movd $dst,$src\n\t" 3236 "pshufd $dst,$dst,0x00\n\t" 3237 "vinserti128h $dst,$dst,$dst\t! replicate8I" %} 3238 ins_encode %{ 3239 __ movdl($dst$$XMMRegister, $src$$Register); 3240 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3241 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3242 %} 3243 ins_pipe( pipe_slow ); 3244 %} 3245 3246 instruct Repl8I_mem(vecY dst, memory mem) %{ 3247 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3248 match(Set dst (ReplicateI (LoadI mem))); 3249 format %{ "pshufd $dst,$mem,0x00\n\t" 3250 "vinserti128h $dst,$dst,$dst\t! replicate8I" %} 3251 ins_encode %{ 3252 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3253 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3254 %} 3255 ins_pipe( pipe_slow ); 3256 %} 3257 3258 instruct Repl4I_imm(vecX dst, immI con) %{ 3259 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3260 match(Set dst (ReplicateI con)); 3261 format %{ "movq $dst,[$constantaddress]\t! replicate4I($con)\n\t" 3262 "punpcklqdq $dst,$dst" %} 3263 ins_encode %{ 3264 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3265 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3266 %} 3267 ins_pipe( pipe_slow ); 3268 %} 3269 3270 instruct Repl8I_imm(vecY dst, immI con) %{ 3271 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3272 match(Set dst (ReplicateI con)); 3273 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 3274 "punpcklqdq $dst,$dst\n\t" 3275 "vinserti128h $dst,$dst,$dst" %} 3276 ins_encode %{ 3277 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3278 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3279 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3280 %} 3281 ins_pipe( pipe_slow ); 3282 %} 3283 3284 // Long could be loaded into xmm register directly from memory. 3285 instruct Repl2L_mem(vecX dst, memory mem) %{ 3286 predicate(n->as_Vector()->length() == 2 && !VM_Version::supports_avx512vlbw()); 3287 match(Set dst (ReplicateL (LoadL mem))); 3288 format %{ "movq $dst,$mem\n\t" 3289 "punpcklqdq $dst,$dst\t! replicate2L" %} 3290 ins_encode %{ 3291 __ movq($dst$$XMMRegister, $mem$$Address); 3292 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3293 %} 3294 ins_pipe( pipe_slow ); 3295 %} 3296 3297 // Replicate long (8 byte) scalar to be vector 3298 #ifdef _LP64 3299 instruct Repl4L(vecY dst, rRegL src) %{ 3300 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3301 match(Set dst (ReplicateL src)); 3302 format %{ "movdq $dst,$src\n\t" 3303 "punpcklqdq $dst,$dst\n\t" 3304 "vinserti128h $dst,$dst,$dst\t! replicate4L" %} 3305 ins_encode %{ 3306 __ movdq($dst$$XMMRegister, $src$$Register); 3307 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3308 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3309 %} 3310 ins_pipe( pipe_slow ); 3311 %} 3312 #else // _LP64 3313 instruct Repl4L(vecY dst, eRegL src, regD tmp) %{ 3314 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3315 match(Set dst (ReplicateL src)); 3316 effect(TEMP dst, USE src, TEMP tmp); 3317 format %{ "movdl $dst,$src.lo\n\t" 3318 "movdl $tmp,$src.hi\n\t" 3319 "punpckldq $dst,$tmp\n\t" 3320 "punpcklqdq $dst,$dst\n\t" 3321 "vinserti128h $dst,$dst,$dst\t! replicate4L" %} 3322 ins_encode %{ 3323 __ movdl($dst$$XMMRegister, $src$$Register); 3324 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3325 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3326 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3327 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3328 %} 3329 ins_pipe( pipe_slow ); 3330 %} 3331 #endif // _LP64 3332 3333 instruct Repl4L_imm(vecY dst, immL con) %{ 3334 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3335 match(Set dst (ReplicateL con)); 3336 format %{ "movq $dst,[$constantaddress]\n\t" 3337 "punpcklqdq $dst,$dst\n\t" 3338 "vinserti128h $dst,$dst,$dst\t! replicate4L($con)" %} 3339 ins_encode %{ 3340 __ movq($dst$$XMMRegister, $constantaddress($con)); 3341 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3342 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3343 %} 3344 ins_pipe( pipe_slow ); 3345 %} 3346 3347 instruct Repl4L_mem(vecY dst, memory mem) %{ 3348 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3349 match(Set dst (ReplicateL (LoadL mem))); 3350 format %{ "movq $dst,$mem\n\t" 3351 "punpcklqdq $dst,$dst\n\t" 3352 "vinserti128h $dst,$dst,$dst\t! replicate4L" %} 3353 ins_encode %{ 3354 __ movq($dst$$XMMRegister, $mem$$Address); 3355 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3356 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3357 %} 3358 ins_pipe( pipe_slow ); 3359 %} 3360 3361 instruct Repl2F_mem(vecD dst, memory mem) %{ 3362 predicate(n->as_Vector()->length() == 2 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3363 match(Set dst (ReplicateF (LoadF mem))); 3364 format %{ "pshufd $dst,$mem,0x00\t! replicate2F" %} 3365 ins_encode %{ 3366 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3367 %} 3368 ins_pipe( pipe_slow ); 3369 %} 3370 3371 instruct Repl4F_mem(vecX dst, memory mem) %{ 3372 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3373 match(Set dst (ReplicateF (LoadF mem))); 3374 format %{ "pshufd $dst,$mem,0x00\t! replicate4F" %} 3375 ins_encode %{ 3376 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3377 %} 3378 ins_pipe( pipe_slow ); 3379 %} 3380 3381 instruct Repl8F(vecY dst, regF src) %{ 3382 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3383 match(Set dst (ReplicateF src)); 3384 format %{ "pshufd $dst,$src,0x00\n\t" 3385 "vinsertf128h $dst,$dst,$dst\t! replicate8F" %} 3386 ins_encode %{ 3387 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3388 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3389 %} 3390 ins_pipe( pipe_slow ); 3391 %} 3392 3393 instruct Repl8F_mem(vecY dst, memory mem) %{ 3394 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3395 match(Set dst (ReplicateF (LoadF mem))); 3396 format %{ "pshufd $dst,$mem,0x00\n\t" 3397 "vinsertf128h $dst,$dst,$dst\t! replicate8F" %} 3398 ins_encode %{ 3399 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3400 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3401 %} 3402 ins_pipe( pipe_slow ); 3403 %} 3404 3405 instruct Repl2F_zero(vecD dst, immF0 zero) %{ 3406 predicate(n->as_Vector()->length() == 2 && UseAVX < 3); 3407 match(Set dst (ReplicateF zero)); 3408 format %{ "xorps $dst,$dst\t! replicate2F zero" %} 3409 ins_encode %{ 3410 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 3411 %} 3412 ins_pipe( fpu_reg_reg ); 3413 %} 3414 3415 instruct Repl4F_zero(vecX dst, immF0 zero) %{ 3416 predicate(n->as_Vector()->length() == 4 && UseAVX < 3); 3417 match(Set dst (ReplicateF zero)); 3418 format %{ "xorps $dst,$dst\t! replicate4F zero" %} 3419 ins_encode %{ 3420 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 3421 %} 3422 ins_pipe( fpu_reg_reg ); 3423 %} 3424 3425 instruct Repl8F_zero(vecY dst, immF0 zero) %{ 3426 predicate(n->as_Vector()->length() == 8 && UseAVX < 3); 3427 match(Set dst (ReplicateF zero)); 3428 format %{ "vxorps $dst,$dst,$dst\t! replicate8F zero" %} 3429 ins_encode %{ 3430 int vector_len = 1; 3431 __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3432 %} 3433 ins_pipe( fpu_reg_reg ); 3434 %} 3435 3436 instruct Repl2D_mem(vecX dst, memory mem) %{ 3437 predicate(n->as_Vector()->length() == 2 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3438 match(Set dst (ReplicateD (LoadD mem))); 3439 format %{ "pshufd $dst,$mem,0x44\t! replicate2D" %} 3440 ins_encode %{ 3441 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44); 3442 %} 3443 ins_pipe( pipe_slow ); 3444 %} 3445 3446 instruct Repl4D(vecY dst, regD src) %{ 3447 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3448 match(Set dst (ReplicateD src)); 3449 format %{ "pshufd $dst,$src,0x44\n\t" 3450 "vinsertf128h $dst,$dst,$dst\t! replicate4D" %} 3451 ins_encode %{ 3452 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 3453 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3454 %} 3455 ins_pipe( pipe_slow ); 3456 %} 3457 3458 instruct Repl4D_mem(vecY dst, memory mem) %{ 3459 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3460 match(Set dst (ReplicateD (LoadD mem))); 3461 format %{ "pshufd $dst,$mem,0x44\n\t" 3462 "vinsertf128h $dst,$dst,$dst\t! replicate4D" %} 3463 ins_encode %{ 3464 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44); 3465 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 3466 %} 3467 ins_pipe( pipe_slow ); 3468 %} 3469 3470 // Replicate double (8 byte) scalar zero to be vector 3471 instruct Repl2D_zero(vecX dst, immD0 zero) %{ 3472 predicate(n->as_Vector()->length() == 2 && UseAVX < 3); 3473 match(Set dst (ReplicateD zero)); 3474 format %{ "xorpd $dst,$dst\t! replicate2D zero" %} 3475 ins_encode %{ 3476 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister); 3477 %} 3478 ins_pipe( fpu_reg_reg ); 3479 %} 3480 3481 instruct Repl4D_zero(vecY dst, immD0 zero) %{ 3482 predicate(n->as_Vector()->length() == 4 && UseAVX < 3); 3483 match(Set dst (ReplicateD zero)); 3484 format %{ "vxorpd $dst,$dst,$dst,vect256\t! replicate4D zero" %} 3485 ins_encode %{ 3486 int vector_len = 1; 3487 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3488 %} 3489 ins_pipe( fpu_reg_reg ); 3490 %} 3491 3492 // ====================GENERIC REPLICATE========================================== 3493 3494 // Replicate byte scalar to be vector 3495 instruct Repl4B(vecS dst, rRegI src) %{ 3496 predicate(n->as_Vector()->length() == 4); 3497 match(Set dst (ReplicateB src)); 3498 format %{ "movd $dst,$src\n\t" 3499 "punpcklbw $dst,$dst\n\t" 3500 "pshuflw $dst,$dst,0x00\t! replicate4B" %} 3501 ins_encode %{ 3502 __ movdl($dst$$XMMRegister, $src$$Register); 3503 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3504 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3505 %} 3506 ins_pipe( pipe_slow ); 3507 %} 3508 3509 instruct Repl8B(vecD dst, rRegI src) %{ 3510 predicate(n->as_Vector()->length() == 8); 3511 match(Set dst (ReplicateB src)); 3512 format %{ "movd $dst,$src\n\t" 3513 "punpcklbw $dst,$dst\n\t" 3514 "pshuflw $dst,$dst,0x00\t! replicate8B" %} 3515 ins_encode %{ 3516 __ movdl($dst$$XMMRegister, $src$$Register); 3517 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3518 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3519 %} 3520 ins_pipe( pipe_slow ); 3521 %} 3522 3523 // Replicate byte scalar immediate to be vector by loading from const table. 3524 instruct Repl4B_imm(vecS dst, immI con) %{ 3525 predicate(n->as_Vector()->length() == 4); 3526 match(Set dst (ReplicateB con)); 3527 format %{ "movdl $dst,[$constantaddress]\t! replicate4B($con)" %} 3528 ins_encode %{ 3529 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 1))); 3530 %} 3531 ins_pipe( pipe_slow ); 3532 %} 3533 3534 instruct Repl8B_imm(vecD dst, immI con) %{ 3535 predicate(n->as_Vector()->length() == 8); 3536 match(Set dst (ReplicateB con)); 3537 format %{ "movq $dst,[$constantaddress]\t! replicate8B($con)" %} 3538 ins_encode %{ 3539 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3540 %} 3541 ins_pipe( pipe_slow ); 3542 %} 3543 3544 // Replicate byte scalar zero to be vector 3545 instruct Repl4B_zero(vecS dst, immI0 zero) %{ 3546 predicate(n->as_Vector()->length() == 4); 3547 match(Set dst (ReplicateB zero)); 3548 format %{ "pxor $dst,$dst\t! replicate4B zero" %} 3549 ins_encode %{ 3550 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3551 %} 3552 ins_pipe( fpu_reg_reg ); 3553 %} 3554 3555 instruct Repl8B_zero(vecD dst, immI0 zero) %{ 3556 predicate(n->as_Vector()->length() == 8); 3557 match(Set dst (ReplicateB zero)); 3558 format %{ "pxor $dst,$dst\t! replicate8B zero" %} 3559 ins_encode %{ 3560 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3561 %} 3562 ins_pipe( fpu_reg_reg ); 3563 %} 3564 3565 instruct Repl16B_zero(vecX dst, immI0 zero) %{ 3566 predicate(n->as_Vector()->length() == 16); 3567 match(Set dst (ReplicateB zero)); 3568 format %{ "pxor $dst,$dst\t! replicate16B zero" %} 3569 ins_encode %{ 3570 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3571 %} 3572 ins_pipe( fpu_reg_reg ); 3573 %} 3574 3575 instruct Repl32B_zero(vecY dst, immI0 zero) %{ 3576 predicate(n->as_Vector()->length() == 32); 3577 match(Set dst (ReplicateB zero)); 3578 format %{ "vpxor $dst,$dst,$dst\t! replicate32B zero" %} 3579 ins_encode %{ 3580 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3581 int vector_len = 1; 3582 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3583 %} 3584 ins_pipe( fpu_reg_reg ); 3585 %} 3586 3587 // Replicate char/short (2 byte) scalar to be vector 3588 instruct Repl2S(vecS dst, rRegI src) %{ 3589 predicate(n->as_Vector()->length() == 2); 3590 match(Set dst (ReplicateS src)); 3591 format %{ "movd $dst,$src\n\t" 3592 "pshuflw $dst,$dst,0x00\t! replicate2S" %} 3593 ins_encode %{ 3594 __ movdl($dst$$XMMRegister, $src$$Register); 3595 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3596 %} 3597 ins_pipe( fpu_reg_reg ); 3598 %} 3599 3600 // Replicate char/short (2 byte) scalar immediate to be vector by loading from const table. 3601 instruct Repl2S_imm(vecS dst, immI con) %{ 3602 predicate(n->as_Vector()->length() == 2); 3603 match(Set dst (ReplicateS con)); 3604 format %{ "movdl $dst,[$constantaddress]\t! replicate2S($con)" %} 3605 ins_encode %{ 3606 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 2))); 3607 %} 3608 ins_pipe( fpu_reg_reg ); 3609 %} 3610 3611 instruct Repl4S_imm(vecD dst, immI con) %{ 3612 predicate(n->as_Vector()->length() == 4); 3613 match(Set dst (ReplicateS con)); 3614 format %{ "movq $dst,[$constantaddress]\t! replicate4S($con)" %} 3615 ins_encode %{ 3616 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3617 %} 3618 ins_pipe( fpu_reg_reg ); 3619 %} 3620 3621 // Replicate char/short (2 byte) scalar zero to be vector 3622 instruct Repl2S_zero(vecS dst, immI0 zero) %{ 3623 predicate(n->as_Vector()->length() == 2); 3624 match(Set dst (ReplicateS zero)); 3625 format %{ "pxor $dst,$dst\t! replicate2S zero" %} 3626 ins_encode %{ 3627 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3628 %} 3629 ins_pipe( fpu_reg_reg ); 3630 %} 3631 3632 instruct Repl4S_zero(vecD dst, immI0 zero) %{ 3633 predicate(n->as_Vector()->length() == 4); 3634 match(Set dst (ReplicateS zero)); 3635 format %{ "pxor $dst,$dst\t! replicate4S zero" %} 3636 ins_encode %{ 3637 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3638 %} 3639 ins_pipe( fpu_reg_reg ); 3640 %} 3641 3642 instruct Repl8S_zero(vecX dst, immI0 zero) %{ 3643 predicate(n->as_Vector()->length() == 8); 3644 match(Set dst (ReplicateS zero)); 3645 format %{ "pxor $dst,$dst\t! replicate8S zero" %} 3646 ins_encode %{ 3647 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3648 %} 3649 ins_pipe( fpu_reg_reg ); 3650 %} 3651 3652 instruct Repl16S_zero(vecY dst, immI0 zero) %{ 3653 predicate(n->as_Vector()->length() == 16); 3654 match(Set dst (ReplicateS zero)); 3655 format %{ "vpxor $dst,$dst,$dst\t! replicate16S zero" %} 3656 ins_encode %{ 3657 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3658 int vector_len = 1; 3659 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3660 %} 3661 ins_pipe( fpu_reg_reg ); 3662 %} 3663 3664 // Replicate integer (4 byte) scalar to be vector 3665 instruct Repl2I(vecD dst, rRegI src) %{ 3666 predicate(n->as_Vector()->length() == 2); 3667 match(Set dst (ReplicateI src)); 3668 format %{ "movd $dst,$src\n\t" 3669 "pshufd $dst,$dst,0x00\t! replicate2I" %} 3670 ins_encode %{ 3671 __ movdl($dst$$XMMRegister, $src$$Register); 3672 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3673 %} 3674 ins_pipe( fpu_reg_reg ); 3675 %} 3676 3677 // Integer could be loaded into xmm register directly from memory. 3678 instruct Repl2I_mem(vecD dst, memory mem) %{ 3679 predicate(n->as_Vector()->length() == 2); 3680 match(Set dst (ReplicateI (LoadI mem))); 3681 format %{ "movd $dst,$mem\n\t" 3682 "pshufd $dst,$dst,0x00\t! replicate2I" %} 3683 ins_encode %{ 3684 __ movdl($dst$$XMMRegister, $mem$$Address); 3685 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3686 %} 3687 ins_pipe( fpu_reg_reg ); 3688 %} 3689 3690 // Replicate integer (4 byte) scalar immediate to be vector by loading from const table. 3691 instruct Repl2I_imm(vecD dst, immI con) %{ 3692 predicate(n->as_Vector()->length() == 2); 3693 match(Set dst (ReplicateI con)); 3694 format %{ "movq $dst,[$constantaddress]\t! replicate2I($con)" %} 3695 ins_encode %{ 3696 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3697 %} 3698 ins_pipe( fpu_reg_reg ); 3699 %} 3700 3701 // Replicate integer (4 byte) scalar zero to be vector 3702 instruct Repl2I_zero(vecD dst, immI0 zero) %{ 3703 predicate(n->as_Vector()->length() == 2); 3704 match(Set dst (ReplicateI zero)); 3705 format %{ "pxor $dst,$dst\t! replicate2I" %} 3706 ins_encode %{ 3707 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3708 %} 3709 ins_pipe( fpu_reg_reg ); 3710 %} 3711 3712 instruct Repl4I_zero(vecX dst, immI0 zero) %{ 3713 predicate(n->as_Vector()->length() == 4); 3714 match(Set dst (ReplicateI zero)); 3715 format %{ "pxor $dst,$dst\t! replicate4I zero)" %} 3716 ins_encode %{ 3717 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3718 %} 3719 ins_pipe( fpu_reg_reg ); 3720 %} 3721 3722 instruct Repl8I_zero(vecY dst, immI0 zero) %{ 3723 predicate(n->as_Vector()->length() == 8); 3724 match(Set dst (ReplicateI zero)); 3725 format %{ "vpxor $dst,$dst,$dst\t! replicate8I zero" %} 3726 ins_encode %{ 3727 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3728 int vector_len = 1; 3729 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3730 %} 3731 ins_pipe( fpu_reg_reg ); 3732 %} 3733 3734 // Replicate long (8 byte) scalar to be vector 3735 #ifdef _LP64 3736 instruct Repl2L(vecX dst, rRegL src) %{ 3737 predicate(n->as_Vector()->length() == 2); 3738 match(Set dst (ReplicateL src)); 3739 format %{ "movdq $dst,$src\n\t" 3740 "punpcklqdq $dst,$dst\t! replicate2L" %} 3741 ins_encode %{ 3742 __ movdq($dst$$XMMRegister, $src$$Register); 3743 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3744 %} 3745 ins_pipe( pipe_slow ); 3746 %} 3747 #else // _LP64 3748 instruct Repl2L(vecX dst, eRegL src, regD tmp) %{ 3749 predicate(n->as_Vector()->length() == 2); 3750 match(Set dst (ReplicateL src)); 3751 effect(TEMP dst, USE src, TEMP tmp); 3752 format %{ "movdl $dst,$src.lo\n\t" 3753 "movdl $tmp,$src.hi\n\t" 3754 "punpckldq $dst,$tmp\n\t" 3755 "punpcklqdq $dst,$dst\t! replicate2L"%} 3756 ins_encode %{ 3757 __ movdl($dst$$XMMRegister, $src$$Register); 3758 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3759 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3760 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3761 %} 3762 ins_pipe( pipe_slow ); 3763 %} 3764 #endif // _LP64 3765 3766 // Replicate long (8 byte) scalar immediate to be vector by loading from const table. 3767 instruct Repl2L_imm(vecX dst, immL con) %{ 3768 predicate(n->as_Vector()->length() == 2); 3769 match(Set dst (ReplicateL con)); 3770 format %{ "movq $dst,[$constantaddress]\n\t" 3771 "punpcklqdq $dst,$dst\t! replicate2L($con)" %} 3772 ins_encode %{ 3773 __ movq($dst$$XMMRegister, $constantaddress($con)); 3774 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3775 %} 3776 ins_pipe( pipe_slow ); 3777 %} 3778 3779 // Replicate long (8 byte) scalar zero to be vector 3780 instruct Repl2L_zero(vecX dst, immL0 zero) %{ 3781 predicate(n->as_Vector()->length() == 2); 3782 match(Set dst (ReplicateL zero)); 3783 format %{ "pxor $dst,$dst\t! replicate2L zero" %} 3784 ins_encode %{ 3785 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3786 %} 3787 ins_pipe( fpu_reg_reg ); 3788 %} 3789 3790 instruct Repl4L_zero(vecY dst, immL0 zero) %{ 3791 predicate(n->as_Vector()->length() == 4); 3792 match(Set dst (ReplicateL zero)); 3793 format %{ "vpxor $dst,$dst,$dst\t! replicate4L zero" %} 3794 ins_encode %{ 3795 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3796 int vector_len = 1; 3797 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3798 %} 3799 ins_pipe( fpu_reg_reg ); 3800 %} 3801 3802 // Replicate float (4 byte) scalar to be vector 3803 instruct Repl2F(vecD dst, regF src) %{ 3804 predicate(n->as_Vector()->length() == 2); 3805 match(Set dst (ReplicateF src)); 3806 format %{ "pshufd $dst,$dst,0x00\t! replicate2F" %} 3807 ins_encode %{ 3808 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3809 %} 3810 ins_pipe( fpu_reg_reg ); 3811 %} 3812 3813 instruct Repl4F(vecX dst, regF src) %{ 3814 predicate(n->as_Vector()->length() == 4); 3815 match(Set dst (ReplicateF src)); 3816 format %{ "pshufd $dst,$dst,0x00\t! replicate4F" %} 3817 ins_encode %{ 3818 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3819 %} 3820 ins_pipe( pipe_slow ); 3821 %} 3822 3823 // Replicate double (8 bytes) scalar to be vector 3824 instruct Repl2D(vecX dst, regD src) %{ 3825 predicate(n->as_Vector()->length() == 2); 3826 match(Set dst (ReplicateD src)); 3827 format %{ "pshufd $dst,$src,0x44\t! replicate2D" %} 3828 ins_encode %{ 3829 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 3830 %} 3831 ins_pipe( pipe_slow ); 3832 %} 3833 3834 // ====================EVEX REPLICATE============================================= 3835 3836 instruct Repl4B_mem_evex(vecS dst, memory mem) %{ 3837 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vlbw()); 3838 match(Set dst (ReplicateB (LoadB mem))); 3839 format %{ "vpbroadcastb $dst,$mem\t! replicate4B" %} 3840 ins_encode %{ 3841 int vector_len = 0; 3842 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 3843 %} 3844 ins_pipe( pipe_slow ); 3845 %} 3846 3847 instruct Repl8B_mem_evex(vecD dst, memory mem) %{ 3848 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); 3849 match(Set dst (ReplicateB (LoadB mem))); 3850 format %{ "vpbroadcastb $dst,$mem\t! replicate8B" %} 3851 ins_encode %{ 3852 int vector_len = 0; 3853 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 3854 %} 3855 ins_pipe( pipe_slow ); 3856 %} 3857 3858 instruct Repl16B_evex(vecX dst, rRegI src) %{ 3859 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 3860 match(Set dst (ReplicateB src)); 3861 format %{ "vpbroadcastb $dst,$src\t! replicate16B" %} 3862 ins_encode %{ 3863 int vector_len = 0; 3864 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 3865 %} 3866 ins_pipe( pipe_slow ); 3867 %} 3868 3869 instruct Repl16B_mem_evex(vecX dst, memory mem) %{ 3870 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 3871 match(Set dst (ReplicateB (LoadB mem))); 3872 format %{ "vpbroadcastb $dst,$mem\t! replicate16B" %} 3873 ins_encode %{ 3874 int vector_len = 0; 3875 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 3876 %} 3877 ins_pipe( pipe_slow ); 3878 %} 3879 3880 instruct Repl32B_evex(vecY dst, rRegI src) %{ 3881 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw()); 3882 match(Set dst (ReplicateB src)); 3883 format %{ "vpbroadcastb $dst,$src\t! replicate32B" %} 3884 ins_encode %{ 3885 int vector_len = 1; 3886 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 3887 %} 3888 ins_pipe( pipe_slow ); 3889 %} 3890 3891 instruct Repl32B_mem_evex(vecY dst, memory mem) %{ 3892 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw()); 3893 match(Set dst (ReplicateB (LoadB mem))); 3894 format %{ "vpbroadcastb $dst,$mem\t! replicate32B" %} 3895 ins_encode %{ 3896 int vector_len = 1; 3897 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 3898 %} 3899 ins_pipe( pipe_slow ); 3900 %} 3901 3902 instruct Repl64B_evex(vecZ dst, rRegI src) %{ 3903 predicate(n->as_Vector()->length() == 64 && VM_Version::supports_avx512bw()); 3904 match(Set dst (ReplicateB src)); 3905 format %{ "vpbroadcastb $dst,$src\t! upper replicate64B" %} 3906 ins_encode %{ 3907 int vector_len = 2; 3908 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 3909 %} 3910 ins_pipe( pipe_slow ); 3911 %} 3912 3913 instruct Repl64B_mem_evex(vecZ dst, memory mem) %{ 3914 predicate(n->as_Vector()->length() == 64 && VM_Version::supports_avx512bw()); 3915 match(Set dst (ReplicateB (LoadB mem))); 3916 format %{ "vpbroadcastb $dst,$mem\t! replicate64B" %} 3917 ins_encode %{ 3918 int vector_len = 2; 3919 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 3920 %} 3921 ins_pipe( pipe_slow ); 3922 %} 3923 3924 instruct Repl16B_imm_evex(vecX dst, immI con) %{ 3925 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 3926 match(Set dst (ReplicateB con)); 3927 format %{ "movq $dst,[$constantaddress]\n\t" 3928 "vpbroadcastb $dst,$dst\t! replicate16B" %} 3929 ins_encode %{ 3930 int vector_len = 0; 3931 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3932 __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3933 %} 3934 ins_pipe( pipe_slow ); 3935 %} 3936 3937 instruct Repl32B_imm_evex(vecY dst, immI con) %{ 3938 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw()); 3939 match(Set dst (ReplicateB con)); 3940 format %{ "movq $dst,[$constantaddress]\n\t" 3941 "vpbroadcastb $dst,$dst\t! replicate32B" %} 3942 ins_encode %{ 3943 int vector_len = 1; 3944 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3945 __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3946 %} 3947 ins_pipe( pipe_slow ); 3948 %} 3949 3950 instruct Repl64B_imm_evex(vecZ dst, immI con) %{ 3951 predicate(n->as_Vector()->length() == 64 && VM_Version::supports_avx512bw()); 3952 match(Set dst (ReplicateB con)); 3953 format %{ "movq $dst,[$constantaddress]\n\t" 3954 "vpbroadcastb $dst,$dst\t! upper replicate64B" %} 3955 ins_encode %{ 3956 int vector_len = 2; 3957 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3958 __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3959 %} 3960 ins_pipe( pipe_slow ); 3961 %} 3962 3963 instruct Repl64B_zero_evex(vecZ dst, immI0 zero) %{ 3964 predicate(n->as_Vector()->length() == 64 && UseAVX > 2); 3965 match(Set dst (ReplicateB zero)); 3966 format %{ "vpxor $dst k0,$dst,$dst\t! replicate64B zero" %} 3967 ins_encode %{ 3968 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 3969 int vector_len = 2; 3970 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3971 %} 3972 ins_pipe( fpu_reg_reg ); 3973 %} 3974 3975 instruct Repl4S_evex(vecD dst, rRegI src) %{ 3976 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vlbw()); 3977 match(Set dst (ReplicateS src)); 3978 format %{ "vpbroadcastw $dst,$src\t! replicate4S" %} 3979 ins_encode %{ 3980 int vector_len = 0; 3981 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 3982 %} 3983 ins_pipe( pipe_slow ); 3984 %} 3985 3986 instruct Repl4S_mem_evex(vecD dst, memory mem) %{ 3987 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vlbw()); 3988 match(Set dst (ReplicateS (LoadS mem))); 3989 format %{ "vpbroadcastw $dst,$mem\t! replicate4S" %} 3990 ins_encode %{ 3991 int vector_len = 0; 3992 __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 3993 %} 3994 ins_pipe( pipe_slow ); 3995 %} 3996 3997 instruct Repl8S_evex(vecX dst, rRegI src) %{ 3998 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); 3999 match(Set dst (ReplicateS src)); 4000 format %{ "vpbroadcastw $dst,$src\t! replicate8S" %} 4001 ins_encode %{ 4002 int vector_len = 0; 4003 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 4004 %} 4005 ins_pipe( pipe_slow ); 4006 %} 4007 4008 instruct Repl8S_mem_evex(vecX dst, memory mem) %{ 4009 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); 4010 match(Set dst (ReplicateS (LoadS mem))); 4011 format %{ "vpbroadcastw $dst,$mem\t! replicate8S" %} 4012 ins_encode %{ 4013 int vector_len = 0; 4014 __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 4015 %} 4016 ins_pipe( pipe_slow ); 4017 %} 4018 4019 instruct Repl16S_evex(vecY dst, rRegI src) %{ 4020 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 4021 match(Set dst (ReplicateS src)); 4022 format %{ "vpbroadcastw $dst,$src\t! replicate16S" %} 4023 ins_encode %{ 4024 int vector_len = 1; 4025 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 4026 %} 4027 ins_pipe( pipe_slow ); 4028 %} 4029 4030 instruct Repl16S_mem_evex(vecY dst, memory mem) %{ 4031 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 4032 match(Set dst (ReplicateS (LoadS mem))); 4033 format %{ "vpbroadcastw $dst,$mem\t! replicate16S" %} 4034 ins_encode %{ 4035 int vector_len = 1; 4036 __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 4037 %} 4038 ins_pipe( pipe_slow ); 4039 %} 4040 4041 instruct Repl32S_evex(vecZ dst, rRegI src) %{ 4042 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512bw()); 4043 match(Set dst (ReplicateS src)); 4044 format %{ "vpbroadcastw $dst,$src\t! replicate32S" %} 4045 ins_encode %{ 4046 int vector_len = 2; 4047 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 4048 %} 4049 ins_pipe( pipe_slow ); 4050 %} 4051 4052 instruct Repl32S_mem_evex(vecZ dst, memory mem) %{ 4053 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512bw()); 4054 match(Set dst (ReplicateS (LoadS mem))); 4055 format %{ "vpbroadcastw $dst,$mem\t! replicate32S" %} 4056 ins_encode %{ 4057 int vector_len = 2; 4058 __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 4059 %} 4060 ins_pipe( pipe_slow ); 4061 %} 4062 4063 instruct Repl8S_imm_evex(vecX dst, immI con) %{ 4064 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); 4065 match(Set dst (ReplicateS con)); 4066 format %{ "movq $dst,[$constantaddress]\n\t" 4067 "vpbroadcastw $dst,$dst\t! replicate8S" %} 4068 ins_encode %{ 4069 int vector_len = 0; 4070 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 4071 __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4072 %} 4073 ins_pipe( pipe_slow ); 4074 %} 4075 4076 instruct Repl16S_imm_evex(vecY dst, immI con) %{ 4077 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 4078 match(Set dst (ReplicateS con)); 4079 format %{ "movq $dst,[$constantaddress]\n\t" 4080 "vpbroadcastw $dst,$dst\t! replicate16S" %} 4081 ins_encode %{ 4082 int vector_len = 1; 4083 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 4084 __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4085 %} 4086 ins_pipe( pipe_slow ); 4087 %} 4088 4089 instruct Repl32S_imm_evex(vecZ dst, immI con) %{ 4090 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512bw()); 4091 match(Set dst (ReplicateS con)); 4092 format %{ "movq $dst,[$constantaddress]\n\t" 4093 "vpbroadcastw $dst,$dst\t! replicate32S" %} 4094 ins_encode %{ 4095 int vector_len = 2; 4096 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 4097 __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4098 %} 4099 ins_pipe( pipe_slow ); 4100 %} 4101 4102 instruct Repl32S_zero_evex(vecZ dst, immI0 zero) %{ 4103 predicate(n->as_Vector()->length() == 32 && UseAVX > 2); 4104 match(Set dst (ReplicateS zero)); 4105 format %{ "vpxor $dst k0,$dst,$dst\t! replicate32S zero" %} 4106 ins_encode %{ 4107 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 4108 int vector_len = 2; 4109 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4110 %} 4111 ins_pipe( fpu_reg_reg ); 4112 %} 4113 4114 instruct Repl4I_evex(vecX dst, rRegI src) %{ 4115 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4116 match(Set dst (ReplicateI src)); 4117 format %{ "vpbroadcastd $dst,$src\t! replicate4I" %} 4118 ins_encode %{ 4119 int vector_len = 0; 4120 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 4121 %} 4122 ins_pipe( pipe_slow ); 4123 %} 4124 4125 instruct Repl4I_mem_evex(vecX dst, memory mem) %{ 4126 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4127 match(Set dst (ReplicateI (LoadI mem))); 4128 format %{ "vpbroadcastd $dst,$mem\t! replicate4I" %} 4129 ins_encode %{ 4130 int vector_len = 0; 4131 __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 4132 %} 4133 ins_pipe( pipe_slow ); 4134 %} 4135 4136 instruct Repl8I_evex(vecY dst, rRegI src) %{ 4137 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4138 match(Set dst (ReplicateI src)); 4139 format %{ "vpbroadcastd $dst,$src\t! replicate8I" %} 4140 ins_encode %{ 4141 int vector_len = 1; 4142 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 4143 %} 4144 ins_pipe( pipe_slow ); 4145 %} 4146 4147 instruct Repl8I_mem_evex(vecY dst, memory mem) %{ 4148 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4149 match(Set dst (ReplicateI (LoadI mem))); 4150 format %{ "vpbroadcastd $dst,$mem\t! replicate8I" %} 4151 ins_encode %{ 4152 int vector_len = 1; 4153 __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 4154 %} 4155 ins_pipe( pipe_slow ); 4156 %} 4157 4158 instruct Repl16I_evex(vecZ dst, rRegI src) %{ 4159 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4160 match(Set dst (ReplicateI src)); 4161 format %{ "vpbroadcastd $dst,$src\t! replicate16I" %} 4162 ins_encode %{ 4163 int vector_len = 2; 4164 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 4165 %} 4166 ins_pipe( pipe_slow ); 4167 %} 4168 4169 instruct Repl16I_mem_evex(vecZ dst, memory mem) %{ 4170 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4171 match(Set dst (ReplicateI (LoadI mem))); 4172 format %{ "vpbroadcastd $dst,$mem\t! replicate16I" %} 4173 ins_encode %{ 4174 int vector_len = 2; 4175 __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 4176 %} 4177 ins_pipe( pipe_slow ); 4178 %} 4179 4180 instruct Repl4I_imm_evex(vecX dst, immI con) %{ 4181 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4182 match(Set dst (ReplicateI con)); 4183 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 4184 "vpbroadcastd $dst,$dst\t! replicate4I" %} 4185 ins_encode %{ 4186 int vector_len = 0; 4187 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4188 __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4189 %} 4190 ins_pipe( pipe_slow ); 4191 %} 4192 4193 instruct Repl8I_imm_evex(vecY dst, immI con) %{ 4194 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4195 match(Set dst (ReplicateI con)); 4196 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 4197 "vpbroadcastd $dst,$dst\t! replicate8I" %} 4198 ins_encode %{ 4199 int vector_len = 1; 4200 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4201 __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4202 %} 4203 ins_pipe( pipe_slow ); 4204 %} 4205 4206 instruct Repl16I_imm_evex(vecZ dst, immI con) %{ 4207 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4208 match(Set dst (ReplicateI con)); 4209 format %{ "movq $dst,[$constantaddress]\t! replicate16I($con)\n\t" 4210 "vpbroadcastd $dst,$dst\t! replicate16I" %} 4211 ins_encode %{ 4212 int vector_len = 2; 4213 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4214 __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4215 %} 4216 ins_pipe( pipe_slow ); 4217 %} 4218 4219 instruct Repl16I_zero_evex(vecZ dst, immI0 zero) %{ 4220 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4221 match(Set dst (ReplicateI zero)); 4222 format %{ "vpxor $dst k0,$dst,$dst\t! replicate16I zero" %} 4223 ins_encode %{ 4224 // Use vxorpd since AVX does not have vpxor for 512-bit (AVX2 will have it). 4225 int vector_len = 2; 4226 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4227 %} 4228 ins_pipe( fpu_reg_reg ); 4229 %} 4230 4231 // Replicate long (8 byte) scalar to be vector 4232 #ifdef _LP64 4233 instruct Repl4L_evex(vecY dst, rRegL src) %{ 4234 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4235 match(Set dst (ReplicateL src)); 4236 format %{ "vpbroadcastq $dst,$src\t! replicate4L" %} 4237 ins_encode %{ 4238 int vector_len = 1; 4239 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vector_len); 4240 %} 4241 ins_pipe( pipe_slow ); 4242 %} 4243 4244 instruct Repl8L_evex(vecZ dst, rRegL src) %{ 4245 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4246 match(Set dst (ReplicateL src)); 4247 format %{ "vpbroadcastq $dst,$src\t! replicate8L" %} 4248 ins_encode %{ 4249 int vector_len = 2; 4250 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vector_len); 4251 %} 4252 ins_pipe( pipe_slow ); 4253 %} 4254 #else // _LP64 4255 instruct Repl4L_evex(vecY dst, eRegL src, regD tmp) %{ 4256 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4257 match(Set dst (ReplicateL src)); 4258 effect(TEMP dst, USE src, TEMP tmp); 4259 format %{ "movdl $dst,$src.lo\n\t" 4260 "movdl $tmp,$src.hi\n\t" 4261 "punpckldq $dst,$tmp\n\t" 4262 "vpbroadcastq $dst,$dst\t! replicate4L" %} 4263 ins_encode %{ 4264 int vector_len = 1; 4265 __ movdl($dst$$XMMRegister, $src$$Register); 4266 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4267 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4268 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4269 %} 4270 ins_pipe( pipe_slow ); 4271 %} 4272 4273 instruct Repl8L_evex(vecZ dst, eRegL src, regD tmp) %{ 4274 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4275 match(Set dst (ReplicateL src)); 4276 effect(TEMP dst, USE src, TEMP tmp); 4277 format %{ "movdl $dst,$src.lo\n\t" 4278 "movdl $tmp,$src.hi\n\t" 4279 "punpckldq $dst,$tmp\n\t" 4280 "vpbroadcastq $dst,$dst\t! replicate8L" %} 4281 ins_encode %{ 4282 int vector_len = 2; 4283 __ movdl($dst$$XMMRegister, $src$$Register); 4284 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4285 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4286 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4287 %} 4288 ins_pipe( pipe_slow ); 4289 %} 4290 #endif // _LP64 4291 4292 instruct Repl4L_imm_evex(vecY dst, immL con) %{ 4293 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4294 match(Set dst (ReplicateL con)); 4295 format %{ "movq $dst,[$constantaddress]\n\t" 4296 "vpbroadcastq $dst,$dst\t! replicate4L" %} 4297 ins_encode %{ 4298 int vector_len = 1; 4299 __ movq($dst$$XMMRegister, $constantaddress($con)); 4300 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4301 %} 4302 ins_pipe( pipe_slow ); 4303 %} 4304 4305 instruct Repl8L_imm_evex(vecZ dst, immL con) %{ 4306 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4307 match(Set dst (ReplicateL con)); 4308 format %{ "movq $dst,[$constantaddress]\n\t" 4309 "vpbroadcastq $dst,$dst\t! replicate8L" %} 4310 ins_encode %{ 4311 int vector_len = 2; 4312 __ movq($dst$$XMMRegister, $constantaddress($con)); 4313 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4314 %} 4315 ins_pipe( pipe_slow ); 4316 %} 4317 4318 instruct Repl2L_mem_evex(vecX dst, memory mem) %{ 4319 predicate(n->as_Vector()->length() == 2 && VM_Version::supports_avx512vl()); 4320 match(Set dst (ReplicateL (LoadL mem))); 4321 format %{ "vpbroadcastd $dst,$mem\t! replicate2L" %} 4322 ins_encode %{ 4323 int vector_len = 0; 4324 __ evpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4325 %} 4326 ins_pipe( pipe_slow ); 4327 %} 4328 4329 instruct Repl4L_mem_evex(vecY dst, memory mem) %{ 4330 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4331 match(Set dst (ReplicateL (LoadL mem))); 4332 format %{ "vpbroadcastd $dst,$mem\t! replicate4L" %} 4333 ins_encode %{ 4334 int vector_len = 1; 4335 __ evpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4336 %} 4337 ins_pipe( pipe_slow ); 4338 %} 4339 4340 instruct Repl8L_mem_evex(vecZ dst, memory mem) %{ 4341 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4342 match(Set dst (ReplicateL (LoadL mem))); 4343 format %{ "vpbroadcastd $dst,$mem\t! replicate8L" %} 4344 ins_encode %{ 4345 int vector_len = 2; 4346 __ evpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4347 %} 4348 ins_pipe( pipe_slow ); 4349 %} 4350 4351 instruct Repl8L_zero_evex(vecZ dst, immL0 zero) %{ 4352 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4353 match(Set dst (ReplicateL zero)); 4354 format %{ "vpxor $dst k0,$dst,$dst\t! replicate8L zero" %} 4355 ins_encode %{ 4356 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 4357 int vector_len = 2; 4358 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4359 %} 4360 ins_pipe( fpu_reg_reg ); 4361 %} 4362 4363 instruct Repl8F_evex(vecY dst, regF src) %{ 4364 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4365 match(Set dst (ReplicateF src)); 4366 format %{ "vbroadcastss $dst,$src\t! replicate8F" %} 4367 ins_encode %{ 4368 int vector_len = 1; 4369 __ evpbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4370 %} 4371 ins_pipe( pipe_slow ); 4372 %} 4373 4374 instruct Repl8F_mem_evex(vecY dst, memory mem) %{ 4375 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4376 match(Set dst (ReplicateF (LoadF mem))); 4377 format %{ "vbroadcastss $dst,$mem\t! replicate8F" %} 4378 ins_encode %{ 4379 int vector_len = 1; 4380 __ evpbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len); 4381 %} 4382 ins_pipe( pipe_slow ); 4383 %} 4384 4385 instruct Repl16F_evex(vecZ dst, regF src) %{ 4386 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4387 match(Set dst (ReplicateF src)); 4388 format %{ "vbroadcastss $dst,$src\t! replicate16F" %} 4389 ins_encode %{ 4390 int vector_len = 2; 4391 __ evpbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4392 %} 4393 ins_pipe( pipe_slow ); 4394 %} 4395 4396 instruct Repl16F_mem_evex(vecZ dst, memory mem) %{ 4397 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4398 match(Set dst (ReplicateF (LoadF mem))); 4399 format %{ "vbroadcastss $dst,$mem\t! replicate16F" %} 4400 ins_encode %{ 4401 int vector_len = 2; 4402 __ evpbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len); 4403 %} 4404 ins_pipe( pipe_slow ); 4405 %} 4406 4407 instruct Repl2F_zero_evex(vecD dst, immF0 zero) %{ 4408 predicate(n->as_Vector()->length() == 2 && UseAVX > 2); 4409 match(Set dst (ReplicateF zero)); 4410 format %{ "vpxor $dst k0,$dst,$dst\t! replicate2F zero" %} 4411 ins_encode %{ 4412 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4413 int vector_len = 2; 4414 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4415 %} 4416 ins_pipe( fpu_reg_reg ); 4417 %} 4418 4419 instruct Repl4F_zero_evex(vecX dst, immF0 zero) %{ 4420 predicate(n->as_Vector()->length() == 4 && UseAVX > 2); 4421 match(Set dst (ReplicateF zero)); 4422 format %{ "vpxor $dst k0,$dst,$dst\t! replicate4F zero" %} 4423 ins_encode %{ 4424 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4425 int vector_len = 2; 4426 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4427 %} 4428 ins_pipe( fpu_reg_reg ); 4429 %} 4430 4431 instruct Repl8F_zero_evex(vecY dst, immF0 zero) %{ 4432 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4433 match(Set dst (ReplicateF zero)); 4434 format %{ "vpxor $dst k0,$dst,$dst\t! replicate8F zero" %} 4435 ins_encode %{ 4436 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4437 int vector_len = 2; 4438 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4439 %} 4440 ins_pipe( fpu_reg_reg ); 4441 %} 4442 4443 instruct Repl16F_zero_evex(vecZ dst, immF0 zero) %{ 4444 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4445 match(Set dst (ReplicateF zero)); 4446 format %{ "vpxor $dst k0,$dst,$dst\t! replicate16F zero" %} 4447 ins_encode %{ 4448 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4449 int vector_len = 2; 4450 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4451 %} 4452 ins_pipe( fpu_reg_reg ); 4453 %} 4454 4455 instruct Repl4D_evex(vecY dst, regD src) %{ 4456 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4457 match(Set dst (ReplicateD src)); 4458 format %{ "vbroadcastsd $dst,$src\t! replicate4D" %} 4459 ins_encode %{ 4460 int vector_len = 1; 4461 __ evpbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4462 %} 4463 ins_pipe( pipe_slow ); 4464 %} 4465 4466 instruct Repl4D_mem_evex(vecY dst, memory mem) %{ 4467 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4468 match(Set dst (ReplicateD (LoadD mem))); 4469 format %{ "vbroadcastsd $dst,$mem\t! replicate4D" %} 4470 ins_encode %{ 4471 int vector_len = 1; 4472 __ evpbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len); 4473 %} 4474 ins_pipe( pipe_slow ); 4475 %} 4476 4477 instruct Repl8D_evex(vecZ dst, regD src) %{ 4478 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4479 match(Set dst (ReplicateD src)); 4480 format %{ "vbroadcastsd $dst,$src\t! replicate8D" %} 4481 ins_encode %{ 4482 int vector_len = 2; 4483 __ evpbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4484 %} 4485 ins_pipe( pipe_slow ); 4486 %} 4487 4488 instruct Repl8D_mem_evex(vecZ dst, memory mem) %{ 4489 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4490 match(Set dst (ReplicateD (LoadD mem))); 4491 format %{ "vbroadcastsd $dst,$mem\t! replicate8D" %} 4492 ins_encode %{ 4493 int vector_len = 2; 4494 __ evpbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len); 4495 %} 4496 ins_pipe( pipe_slow ); 4497 %} 4498 4499 instruct Repl2D_zero_evex(vecX dst, immD0 zero) %{ 4500 predicate(n->as_Vector()->length() == 2 && UseAVX > 2); 4501 match(Set dst (ReplicateD zero)); 4502 format %{ "vpxor $dst k0,$dst,$dst\t! replicate2D zero" %} 4503 ins_encode %{ 4504 // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation 4505 int vector_len = 2; 4506 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4507 %} 4508 ins_pipe( fpu_reg_reg ); 4509 %} 4510 4511 instruct Repl4D_zero_evex(vecY dst, immD0 zero) %{ 4512 predicate(n->as_Vector()->length() == 4 && UseAVX > 2); 4513 match(Set dst (ReplicateD zero)); 4514 format %{ "vpxor $dst k0,$dst,$dst\t! replicate4D zero" %} 4515 ins_encode %{ 4516 // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation 4517 int vector_len = 2; 4518 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4519 %} 4520 ins_pipe( fpu_reg_reg ); 4521 %} 4522 4523 instruct Repl8D_zero_evex(vecZ dst, immD0 zero) %{ 4524 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4525 match(Set dst (ReplicateD zero)); 4526 format %{ "vpxor $dst k0,$dst,$dst,vect512\t! replicate8D zero" %} 4527 ins_encode %{ 4528 // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation 4529 int vector_len = 2; 4530 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4531 %} 4532 ins_pipe( fpu_reg_reg ); 4533 %} 4534 4535 // ====================REDUCTION ARITHMETIC======================================= 4536 4537 instruct rsadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 4538 predicate(UseSSE > 2 && UseAVX == 0); 4539 match(Set dst (AddReductionVI src1 src2)); 4540 effect(TEMP tmp2, TEMP tmp); 4541 format %{ "movdqu $tmp2,$src2\n\t" 4542 "phaddd $tmp2,$tmp2\n\t" 4543 "movd $tmp,$src1\n\t" 4544 "paddd $tmp,$tmp2\n\t" 4545 "movd $dst,$tmp\t! add reduction2I" %} 4546 ins_encode %{ 4547 __ movdqu($tmp2$$XMMRegister, $src2$$XMMRegister); 4548 __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister); 4549 __ movdl($tmp$$XMMRegister, $src1$$Register); 4550 __ paddd($tmp$$XMMRegister, $tmp2$$XMMRegister); 4551 __ movdl($dst$$Register, $tmp$$XMMRegister); 4552 %} 4553 ins_pipe( pipe_slow ); 4554 %} 4555 4556 instruct rvadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 4557 predicate(UseAVX > 0 && UseAVX < 3); 4558 match(Set dst (AddReductionVI src1 src2)); 4559 effect(TEMP tmp, TEMP tmp2); 4560 format %{ "vphaddd $tmp,$src2,$src2\n\t" 4561 "movd $tmp2,$src1\n\t" 4562 "vpaddd $tmp2,$tmp2,$tmp\n\t" 4563 "movd $dst,$tmp2\t! add reduction2I" %} 4564 ins_encode %{ 4565 int vector_len = 0; 4566 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 4567 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4568 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 4569 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4570 %} 4571 ins_pipe( pipe_slow ); 4572 %} 4573 4574 instruct rvadd2I_reduction_reg_evex(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 4575 predicate(UseAVX > 2); 4576 match(Set dst (AddReductionVI src1 src2)); 4577 effect(TEMP tmp, TEMP tmp2); 4578 format %{ "pshufd $tmp2,$src2,0x1\n\t" 4579 "vpaddd $tmp,$src2,$tmp2\n\t" 4580 "movd $tmp2,$src1\n\t" 4581 "vpaddd $tmp2,$tmp,$tmp2\n\t" 4582 "movd $dst,$tmp2\t! add reduction2I" %} 4583 ins_encode %{ 4584 int vector_len = 0; 4585 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 4586 __ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4587 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4588 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4589 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4590 %} 4591 ins_pipe( pipe_slow ); 4592 %} 4593 4594 instruct rsadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 4595 predicate(UseSSE > 2 && UseAVX == 0); 4596 match(Set dst (AddReductionVI src1 src2)); 4597 effect(TEMP tmp2, TEMP tmp); 4598 format %{ "movdqu $tmp2,$src2\n\t" 4599 "phaddd $tmp2,$tmp2\n\t" 4600 "phaddd $tmp2,$tmp2\n\t" 4601 "movd $tmp,$src1\n\t" 4602 "paddd $tmp,$tmp2\n\t" 4603 "movd $dst,$tmp\t! add reduction4I" %} 4604 ins_encode %{ 4605 __ movdqu($tmp2$$XMMRegister, $src2$$XMMRegister); 4606 __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister); 4607 __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister); 4608 __ movdl($tmp$$XMMRegister, $src1$$Register); 4609 __ paddd($tmp$$XMMRegister, $tmp2$$XMMRegister); 4610 __ movdl($dst$$Register, $tmp$$XMMRegister); 4611 %} 4612 ins_pipe( pipe_slow ); 4613 %} 4614 4615 instruct rvadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 4616 predicate(UseAVX > 0 && UseAVX < 3); 4617 match(Set dst (AddReductionVI src1 src2)); 4618 effect(TEMP tmp, TEMP tmp2); 4619 format %{ "vphaddd $tmp,$src2,$src2\n\t" 4620 "vphaddd $tmp,$tmp,$tmp2\n\t" 4621 "movd $tmp2,$src1\n\t" 4622 "vpaddd $tmp2,$tmp2,$tmp\n\t" 4623 "movd $dst,$tmp2\t! add reduction4I" %} 4624 ins_encode %{ 4625 int vector_len = 0; 4626 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 4627 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4628 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4629 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 4630 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4631 %} 4632 ins_pipe( pipe_slow ); 4633 %} 4634 4635 instruct rvadd4I_reduction_reg_evex(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 4636 predicate(UseAVX > 2); 4637 match(Set dst (AddReductionVI src1 src2)); 4638 effect(TEMP tmp, TEMP tmp2); 4639 format %{ "pshufd $tmp2,$src2,0xE\n\t" 4640 "vpaddd $tmp,$src2,$tmp2\n\t" 4641 "pshufd $tmp2,$tmp,0x1\n\t" 4642 "vpaddd $tmp,$tmp,$tmp2\n\t" 4643 "movd $tmp2,$src1\n\t" 4644 "vpaddd $tmp2,$tmp,$tmp2\n\t" 4645 "movd $dst,$tmp2\t! add reduction4I" %} 4646 ins_encode %{ 4647 int vector_len = 0; 4648 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 4649 __ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4650 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 4651 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4652 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4653 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4654 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4655 %} 4656 ins_pipe( pipe_slow ); 4657 %} 4658 4659 instruct rvadd8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 4660 predicate(UseAVX > 0 && UseAVX < 3); 4661 match(Set dst (AddReductionVI src1 src2)); 4662 effect(TEMP tmp, TEMP tmp2); 4663 format %{ "vphaddd $tmp,$src2,$src2\n\t" 4664 "vphaddd $tmp,$tmp,$tmp2\n\t" 4665 "vextracti128 $tmp2,$tmp\n\t" 4666 "vpaddd $tmp,$tmp,$tmp2\n\t" 4667 "movd $tmp2,$src1\n\t" 4668 "vpaddd $tmp2,$tmp2,$tmp\n\t" 4669 "movd $dst,$tmp2\t! add reduction8I" %} 4670 ins_encode %{ 4671 int vector_len = 1; 4672 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 4673 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4674 __ vextracti128h($tmp2$$XMMRegister, $tmp$$XMMRegister); 4675 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4676 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4677 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4678 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4679 %} 4680 ins_pipe( pipe_slow ); 4681 %} 4682 4683 instruct rvadd8I_reduction_reg_evex(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 4684 predicate(UseAVX > 2); 4685 match(Set dst (AddReductionVI src1 src2)); 4686 effect(TEMP tmp, TEMP tmp2); 4687 format %{ "vextracti128 $tmp,$src2\n\t" 4688 "vpaddd $tmp,$tmp,$src2\n\t" 4689 "pshufd $tmp2,$tmp,0xE\n\t" 4690 "vpaddd $tmp,$tmp,$tmp2\n\t" 4691 "pshufd $tmp2,$tmp,0x1\n\t" 4692 "vpaddd $tmp,$tmp,$tmp2\n\t" 4693 "movd $tmp2,$src1\n\t" 4694 "vpaddd $tmp2,$tmp,$tmp2\n\t" 4695 "movd $dst,$tmp2\t! add reduction8I" %} 4696 ins_encode %{ 4697 int vector_len = 0; 4698 __ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister); 4699 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 4700 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 4701 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4702 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 4703 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4704 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4705 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4706 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4707 %} 4708 ins_pipe( pipe_slow ); 4709 %} 4710 4711 instruct rvadd16I_reduction_reg_evex(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ 4712 predicate(UseAVX > 2); 4713 match(Set dst (AddReductionVI src1 src2)); 4714 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 4715 format %{ "vextracti64x4 $tmp3,$src2\n\t" 4716 "vpaddd $tmp3,$tmp3,$src2\n\t" 4717 "vextracti128 $tmp,$tmp3\n\t" 4718 "vpaddd $tmp,$tmp,$tmp3\n\t" 4719 "pshufd $tmp2,$tmp,0xE\n\t" 4720 "vpaddd $tmp,$tmp,$tmp2\n\t" 4721 "pshufd $tmp2,$tmp,0x1\n\t" 4722 "vpaddd $tmp,$tmp,$tmp2\n\t" 4723 "movd $tmp2,$src1\n\t" 4724 "vpaddd $tmp2,$tmp,$tmp2\n\t" 4725 "movd $dst,$tmp2\t! mul reduction16I" %} 4726 ins_encode %{ 4727 __ vextracti64x4h($tmp3$$XMMRegister, $src2$$XMMRegister); 4728 __ vpaddd($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); 4729 __ vextracti128h($tmp$$XMMRegister, $tmp3$$XMMRegister); 4730 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); 4731 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 4732 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4733 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 4734 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4735 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4736 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4737 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4738 %} 4739 ins_pipe( pipe_slow ); 4740 %} 4741 4742 #ifdef _LP64 4743 instruct rvadd2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, regF tmp, regF tmp2) %{ 4744 predicate(UseAVX > 2); 4745 match(Set dst (AddReductionVL src1 src2)); 4746 effect(TEMP tmp, TEMP tmp2); 4747 format %{ "pshufd $tmp2,$src2,0xE\n\t" 4748 "vpaddq $tmp,$src2,$tmp2\n\t" 4749 "movdq $tmp2,$src1\n\t" 4750 "vpaddq $tmp2,$tmp,$tmp2\n\t" 4751 "movdq $dst,$tmp2\t! add reduction2L" %} 4752 ins_encode %{ 4753 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 4754 __ vpaddq($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, 0); 4755 __ movdq($tmp2$$XMMRegister, $src1$$Register); 4756 __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4757 __ movdq($dst$$Register, $tmp2$$XMMRegister); 4758 %} 4759 ins_pipe( pipe_slow ); 4760 %} 4761 4762 instruct rvadd4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{ 4763 predicate(UseAVX > 2); 4764 match(Set dst (AddReductionVL src1 src2)); 4765 effect(TEMP tmp, TEMP tmp2); 4766 format %{ "vextracti64x2 $tmp,$src2, 0x1\n\t" 4767 "vpaddq $tmp2,$tmp,$src2\n\t" 4768 "pshufd $tmp,$tmp2,0xE\n\t" 4769 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4770 "movdq $tmp,$src1\n\t" 4771 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4772 "movdq $dst,$tmp2\t! add reduction4L" %} 4773 ins_encode %{ 4774 __ vextracti64x2h($tmp$$XMMRegister, $src2$$XMMRegister, 0x1); 4775 __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); 4776 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 4777 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4778 __ movdq($tmp$$XMMRegister, $src1$$Register); 4779 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4780 __ movdq($dst$$Register, $tmp2$$XMMRegister); 4781 %} 4782 ins_pipe( pipe_slow ); 4783 %} 4784 4785 instruct rvadd8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{ 4786 predicate(UseAVX > 2); 4787 match(Set dst (AddReductionVL src1 src2)); 4788 effect(TEMP tmp, TEMP tmp2); 4789 format %{ "vextracti64x4 $tmp2,$src2\n\t" 4790 "vpaddq $tmp2,$tmp2,$src2\n\t" 4791 "vextracti128 $tmp,$tmp2\n\t" 4792 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4793 "pshufd $tmp,$tmp2,0xE\n\t" 4794 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4795 "movdq $tmp,$src1\n\t" 4796 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4797 "movdq $dst,$tmp2\t! add reduction8L" %} 4798 ins_encode %{ 4799 __ vextracti64x4h($tmp2$$XMMRegister, $src2$$XMMRegister); 4800 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 4801 __ vextracti128h($tmp$$XMMRegister, $tmp2$$XMMRegister); 4802 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4803 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 4804 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4805 __ movdq($tmp$$XMMRegister, $src1$$Register); 4806 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4807 __ movdq($dst$$Register, $tmp2$$XMMRegister); 4808 %} 4809 ins_pipe( pipe_slow ); 4810 %} 4811 #endif 4812 4813 instruct rsadd2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{ 4814 predicate(UseSSE >= 1 && UseAVX == 0); 4815 match(Set dst (AddReductionVF src1 src2)); 4816 effect(TEMP tmp, TEMP tmp2); 4817 format %{ "movdqu $tmp,$src1\n\t" 4818 "addss $tmp,$src2\n\t" 4819 "pshufd $tmp2,$src2,0x01\n\t" 4820 "addss $tmp,$tmp2\n\t" 4821 "movdqu $dst,$tmp\t! add reduction2F" %} 4822 ins_encode %{ 4823 __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); 4824 __ addss($tmp$$XMMRegister, $src2$$XMMRegister); 4825 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01); 4826 __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister); 4827 __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister); 4828 %} 4829 ins_pipe( pipe_slow ); 4830 %} 4831 4832 instruct rvadd2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{ 4833 predicate(UseAVX > 0); 4834 match(Set dst (AddReductionVF src1 src2)); 4835 effect(TEMP tmp2, TEMP tmp); 4836 format %{ "vaddss $tmp2,$src1,$src2\n\t" 4837 "pshufd $tmp,$src2,0x01\n\t" 4838 "vaddss $dst,$tmp2,$tmp\t! add reduction2F" %} 4839 ins_encode %{ 4840 __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 4841 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 4842 __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4843 %} 4844 ins_pipe( pipe_slow ); 4845 %} 4846 4847 instruct rsadd4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{ 4848 predicate(UseSSE >= 1 && UseAVX == 0); 4849 match(Set dst (AddReductionVF src1 src2)); 4850 effect(TEMP tmp, TEMP tmp2); 4851 format %{ "movdqu $tmp,$src1\n\t" 4852 "addss $tmp,$src2\n\t" 4853 "pshufd $tmp2,$src2,0x01\n\t" 4854 "addss $tmp,$tmp2\n\t" 4855 "pshufd $tmp2,$src2,0x02\n\t" 4856 "addss $tmp,$tmp2\n\t" 4857 "pshufd $tmp2,$src2,0x03\n\t" 4858 "addss $tmp,$tmp2\n\t" 4859 "movdqu $dst,$tmp\t! add reduction4F" %} 4860 ins_encode %{ 4861 __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); 4862 __ addss($tmp$$XMMRegister, $src2$$XMMRegister); 4863 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01); 4864 __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister); 4865 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x02); 4866 __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister); 4867 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x03); 4868 __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister); 4869 __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister); 4870 %} 4871 ins_pipe( pipe_slow ); 4872 %} 4873 4874 instruct rvadd4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{ 4875 predicate(UseAVX > 0); 4876 match(Set dst (AddReductionVF src1 src2)); 4877 effect(TEMP tmp, TEMP tmp2); 4878 format %{ "vaddss $tmp2,$src1,$src2\n\t" 4879 "pshufd $tmp,$src2,0x01\n\t" 4880 "vaddss $tmp2,$tmp2,$tmp\n\t" 4881 "pshufd $tmp,$src2,0x02\n\t" 4882 "vaddss $tmp2,$tmp2,$tmp\n\t" 4883 "pshufd $tmp,$src2,0x03\n\t" 4884 "vaddss $dst,$tmp2,$tmp\t! add reduction4F" %} 4885 ins_encode %{ 4886 __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 4887 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 4888 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4889 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 4890 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4891 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 4892 __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4893 %} 4894 ins_pipe( pipe_slow ); 4895 %} 4896 4897 instruct radd8F_reduction_reg(regF dst, regF src1, vecY src2, regF tmp, regF tmp2, regF tmp3) %{ 4898 predicate(UseAVX > 0); 4899 match(Set dst (AddReductionVF src1 src2)); 4900 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 4901 format %{ "vaddss $tmp2,$src1,$src2\n\t" 4902 "pshufd $tmp,$src2,0x01\n\t" 4903 "vaddss $tmp2,$tmp2,$tmp\n\t" 4904 "pshufd $tmp,$src2,0x02\n\t" 4905 "vaddss $tmp2,$tmp2,$tmp\n\t" 4906 "pshufd $tmp,$src2,0x03\n\t" 4907 "vaddss $tmp2,$tmp2,$tmp\n\t" 4908 "vextractf128 $tmp3,$src2\n\t" 4909 "vaddss $tmp2,$tmp2,$tmp3\n\t" 4910 "pshufd $tmp,$tmp3,0x01\n\t" 4911 "vaddss $tmp2,$tmp2,$tmp\n\t" 4912 "pshufd $tmp,$tmp3,0x02\n\t" 4913 "vaddss $tmp2,$tmp2,$tmp\n\t" 4914 "pshufd $tmp,$tmp3,0x03\n\t" 4915 "vaddss $dst,$tmp2,$tmp\t! add reduction8F" %} 4916 ins_encode %{ 4917 __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 4918 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 4919 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4920 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 4921 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4922 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 4923 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4924 __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister); 4925 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 4926 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); 4927 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4928 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); 4929 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4930 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); 4931 __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4932 %} 4933 ins_pipe( pipe_slow ); 4934 %} 4935 4936 instruct radd16F_reduction_reg(regF dst, regF src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ 4937 predicate(UseAVX > 2); 4938 match(Set dst (AddReductionVF src1 src2)); 4939 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 4940 format %{ "vaddss $tmp2,$src1,$src2\n\t" 4941 "pshufd $tmp,$src2,0x01\n\t" 4942 "vaddss $tmp2,$tmp2,$tmp\n\t" 4943 "pshufd $tmp,$src2,0x02\n\t" 4944 "vaddss $tmp2,$tmp2,$tmp\n\t" 4945 "pshufd $tmp,$src2,0x03\n\t" 4946 "vaddss $tmp2,$tmp2,$tmp\n\t" 4947 "vextractf64x2 $tmp3,$src2, 0x1\n\t" 4948 "vaddss $tmp2,$tmp2,$tmp3\n\t" 4949 "pshufd $tmp,$tmp3,0x01\n\t" 4950 "vaddss $tmp2,$tmp2,$tmp\n\t" 4951 "pshufd $tmp,$tmp3,0x02\n\t" 4952 "vaddss $tmp2,$tmp2,$tmp\n\t" 4953 "pshufd $tmp,$tmp3,0x03\n\t" 4954 "vaddss $tmp2,$tmp2,$tmp\n\t" 4955 "vextractf64x2 $tmp3,$src2, 0x2\n\t" 4956 "vaddss $tmp2,$tmp2,$tmp3\n\t" 4957 "pshufd $tmp,$tmp3,0x01\n\t" 4958 "vaddss $tmp2,$tmp2,$tmp\n\t" 4959 "pshufd $tmp,$tmp3,0x02\n\t" 4960 "vaddss $tmp2,$tmp2,$tmp\n\t" 4961 "pshufd $tmp,$tmp3,0x03\n\t" 4962 "vaddss $tmp2,$tmp2,$tmp\n\t" 4963 "vextractf64x2 $tmp3,$src2, 0x3\n\t" 4964 "vaddss $tmp2,$tmp2,$tmp3\n\t" 4965 "pshufd $tmp,$tmp3,0x01\n\t" 4966 "vaddss $tmp2,$tmp2,$tmp\n\t" 4967 "pshufd $tmp,$tmp3,0x02\n\t" 4968 "vaddss $tmp2,$tmp2,$tmp\n\t" 4969 "pshufd $tmp,$tmp3,0x03\n\t" 4970 "vaddss $dst,$tmp2,$tmp\t! add reduction16F" %} 4971 ins_encode %{ 4972 __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 4973 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 4974 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4975 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 4976 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4977 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 4978 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4979 __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1); 4980 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 4981 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); 4982 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4983 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); 4984 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4985 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); 4986 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4987 __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x2); 4988 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 4989 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); 4990 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4991 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); 4992 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4993 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); 4994 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4995 __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x3); 4996 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 4997 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); 4998 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 4999 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); 5000 __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5001 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); 5002 __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5003 %} 5004 ins_pipe( pipe_slow ); 5005 %} 5006 5007 instruct rsadd2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp) %{ 5008 predicate(UseSSE >= 1 && UseAVX == 0); 5009 match(Set dst (AddReductionVD src1 src2)); 5010 effect(TEMP tmp, TEMP dst); 5011 format %{ "movdqu $tmp,$src1\n\t" 5012 "addsd $tmp,$src2\n\t" 5013 "pshufd $dst,$src2,0xE\n\t" 5014 "addsd $dst,$tmp\t! add reduction2D" %} 5015 ins_encode %{ 5016 __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); 5017 __ addsd($tmp$$XMMRegister, $src2$$XMMRegister); 5018 __ pshufd($dst$$XMMRegister, $src2$$XMMRegister, 0xE); 5019 __ addsd($dst$$XMMRegister, $tmp$$XMMRegister); 5020 %} 5021 ins_pipe( pipe_slow ); 5022 %} 5023 5024 instruct rvadd2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp, regD tmp2) %{ 5025 predicate(UseAVX > 0); 5026 match(Set dst (AddReductionVD src1 src2)); 5027 effect(TEMP tmp, TEMP tmp2); 5028 format %{ "vaddsd $tmp2,$src1,$src2\n\t" 5029 "pshufd $tmp,$src2,0xE\n\t" 5030 "vaddsd $dst,$tmp2,$tmp\t! add reduction2D" %} 5031 ins_encode %{ 5032 __ vaddsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 5033 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5034 __ vaddsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5035 %} 5036 ins_pipe( pipe_slow ); 5037 %} 5038 5039 instruct rvadd4D_reduction_reg(regD dst, regD src1, vecY src2, regD tmp, regD tmp2, regD tmp3) %{ 5040 predicate(UseAVX > 0); 5041 match(Set dst (AddReductionVD src1 src2)); 5042 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 5043 format %{ "vaddsd $tmp2,$src1,$src2\n\t" 5044 "pshufd $tmp,$src2,0xE\n\t" 5045 "vaddsd $tmp2,$tmp2,$tmp\n\t" 5046 "vextractf128 $tmp3,$src2\n\t" 5047 "vaddsd $tmp2,$tmp2,$tmp3\n\t" 5048 "pshufd $tmp,$tmp3,0xE\n\t" 5049 "vaddsd $dst,$tmp2,$tmp\t! add reduction4D" %} 5050 ins_encode %{ 5051 __ vaddsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 5052 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5053 __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5054 __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister); 5055 __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 5056 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); 5057 __ vaddsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5058 %} 5059 ins_pipe( pipe_slow ); 5060 %} 5061 5062 instruct rvadd8D_reduction_reg(regD dst, regD src1, vecZ src2, regD tmp, regD tmp2, regD tmp3) %{ 5063 predicate(UseAVX > 2); 5064 match(Set dst (AddReductionVD src1 src2)); 5065 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 5066 format %{ "vaddsd $tmp2,$src1,$src2\n\t" 5067 "pshufd $tmp,$src2,0xE\n\t" 5068 "vaddsd $tmp2,$tmp2,$tmp\n\t" 5069 "vextractf64x2 $tmp3,$src2, 0x1\n\t" 5070 "vaddsd $tmp2,$tmp2,$tmp3\n\t" 5071 "pshufd $tmp,$tmp3,0xE\n\t" 5072 "vaddsd $tmp2,$tmp2,$tmp\n\t" 5073 "vextractf64x2 $tmp3,$src2, 0x2\n\t" 5074 "vaddsd $tmp2,$tmp2,$tmp3\n\t" 5075 "pshufd $tmp,$tmp3,0xE\n\t" 5076 "vaddsd $tmp2,$tmp2,$tmp\n\t" 5077 "vextractf64x2 $tmp3,$src2, 0x3\n\t" 5078 "vaddsd $tmp2,$tmp2,$tmp3\n\t" 5079 "pshufd $tmp,$tmp3,0xE\n\t" 5080 "vaddsd $dst,$tmp2,$tmp\t! add reduction8D" %} 5081 ins_encode %{ 5082 __ vaddsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 5083 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5084 __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5085 __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1); 5086 __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 5087 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); 5088 __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5089 __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x2); 5090 __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 5091 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); 5092 __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5093 __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x3); 5094 __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 5095 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); 5096 __ vaddsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5097 %} 5098 ins_pipe( pipe_slow ); 5099 %} 5100 5101 instruct rsmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 5102 predicate(UseSSE > 3 && UseAVX == 0); 5103 match(Set dst (MulReductionVI src1 src2)); 5104 effect(TEMP tmp, TEMP tmp2); 5105 format %{ "pshufd $tmp2,$src2,0x1\n\t" 5106 "pmulld $tmp2,$src2\n\t" 5107 "movd $tmp,$src1\n\t" 5108 "pmulld $tmp2,$tmp\n\t" 5109 "movd $dst,$tmp2\t! mul reduction2I" %} 5110 ins_encode %{ 5111 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5112 __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister); 5113 __ movdl($tmp$$XMMRegister, $src1$$Register); 5114 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 5115 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5116 %} 5117 ins_pipe( pipe_slow ); 5118 %} 5119 5120 instruct rvmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 5121 predicate(UseAVX > 0); 5122 match(Set dst (MulReductionVI src1 src2)); 5123 effect(TEMP tmp, TEMP tmp2); 5124 format %{ "pshufd $tmp2,$src2,0x1\n\t" 5125 "vpmulld $tmp,$src2,$tmp2\n\t" 5126 "movd $tmp2,$src1\n\t" 5127 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5128 "movd $dst,$tmp2\t! mul reduction2I" %} 5129 ins_encode %{ 5130 int vector_len = 0; 5131 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5132 __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5133 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5134 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5135 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5136 %} 5137 ins_pipe( pipe_slow ); 5138 %} 5139 5140 instruct rsmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 5141 predicate(UseSSE > 3 && UseAVX == 0); 5142 match(Set dst (MulReductionVI src1 src2)); 5143 effect(TEMP tmp, TEMP tmp2); 5144 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5145 "pmulld $tmp2,$src2\n\t" 5146 "pshufd $tmp,$tmp2,0x1\n\t" 5147 "pmulld $tmp2,$tmp\n\t" 5148 "movd $tmp,$src1\n\t" 5149 "pmulld $tmp2,$tmp\n\t" 5150 "movd $dst,$tmp2\t! mul reduction4I" %} 5151 ins_encode %{ 5152 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5153 __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister); 5154 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x1); 5155 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 5156 __ movdl($tmp$$XMMRegister, $src1$$Register); 5157 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 5158 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5159 %} 5160 ins_pipe( pipe_slow ); 5161 %} 5162 5163 instruct rvmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 5164 predicate(UseAVX > 0); 5165 match(Set dst (MulReductionVI src1 src2)); 5166 effect(TEMP tmp, TEMP tmp2); 5167 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5168 "vpmulld $tmp,$src2,$tmp2\n\t" 5169 "pshufd $tmp2,$tmp,0x1\n\t" 5170 "vpmulld $tmp,$tmp,$tmp2\n\t" 5171 "movd $tmp2,$src1\n\t" 5172 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5173 "movd $dst,$tmp2\t! mul reduction4I" %} 5174 ins_encode %{ 5175 int vector_len = 0; 5176 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5177 __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5178 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5179 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5180 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5181 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5182 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5183 %} 5184 ins_pipe( pipe_slow ); 5185 %} 5186 5187 instruct rvmul8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 5188 predicate(UseAVX > 0); 5189 match(Set dst (MulReductionVI src1 src2)); 5190 effect(TEMP tmp, TEMP tmp2); 5191 format %{ "vextracti128 $tmp,$src2\n\t" 5192 "vpmulld $tmp,$tmp,$src2\n\t" 5193 "pshufd $tmp2,$tmp,0xE\n\t" 5194 "vpmulld $tmp,$tmp,$tmp2\n\t" 5195 "pshufd $tmp2,$tmp,0x1\n\t" 5196 "vpmulld $tmp,$tmp,$tmp2\n\t" 5197 "movd $tmp2,$src1\n\t" 5198 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5199 "movd $dst,$tmp2\t! mul reduction8I" %} 5200 ins_encode %{ 5201 int vector_len = 0; 5202 __ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister); 5203 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 5204 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5205 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5206 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5207 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5208 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5209 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5210 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5211 %} 5212 ins_pipe( pipe_slow ); 5213 %} 5214 5215 instruct rvmul16I_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ 5216 predicate(UseAVX > 2); 5217 match(Set dst (MulReductionVI src1 src2)); 5218 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 5219 format %{ "vextracti64x4 $tmp3,$src2\n\t" 5220 "vpmulld $tmp3,$tmp3,$src2\n\t" 5221 "vextracti128 $tmp,$tmp3\n\t" 5222 "vpmulld $tmp,$tmp,$src2\n\t" 5223 "pshufd $tmp2,$tmp,0xE\n\t" 5224 "vpmulld $tmp,$tmp,$tmp2\n\t" 5225 "pshufd $tmp2,$tmp,0x1\n\t" 5226 "vpmulld $tmp,$tmp,$tmp2\n\t" 5227 "movd $tmp2,$src1\n\t" 5228 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5229 "movd $dst,$tmp2\t! mul reduction16I" %} 5230 ins_encode %{ 5231 __ vextracti64x4h($tmp3$$XMMRegister, $src2$$XMMRegister); 5232 __ vpmulld($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); 5233 __ vextracti128h($tmp$$XMMRegister, $tmp3$$XMMRegister); 5234 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); 5235 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5236 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5237 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5238 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5239 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5240 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5241 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5242 %} 5243 ins_pipe( pipe_slow ); 5244 %} 5245 5246 #ifdef _LP64 5247 instruct rvmul2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, regF tmp, regF tmp2) %{ 5248 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 5249 match(Set dst (MulReductionVL src1 src2)); 5250 effect(TEMP tmp, TEMP tmp2); 5251 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5252 "vpmullq $tmp,$src2,$tmp2\n\t" 5253 "movdq $tmp2,$src1\n\t" 5254 "vpmullq $tmp2,$tmp,$tmp2\n\t" 5255 "movdq $dst,$tmp2\t! mul reduction2L" %} 5256 ins_encode %{ 5257 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5258 __ vpmullq($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, 0); 5259 __ movdq($tmp2$$XMMRegister, $src1$$Register); 5260 __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5261 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5262 %} 5263 ins_pipe( pipe_slow ); 5264 %} 5265 5266 instruct rvmul4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{ 5267 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 5268 match(Set dst (MulReductionVL src1 src2)); 5269 effect(TEMP tmp, TEMP tmp2); 5270 format %{ "vextracti64x2 $tmp,$src2, 0x1\n\t" 5271 "vpmullq $tmp2,$tmp,$src2\n\t" 5272 "pshufd $tmp,$tmp2,0xE\n\t" 5273 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5274 "movdq $tmp,$src1\n\t" 5275 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5276 "movdq $dst,$tmp2\t! mul reduction4L" %} 5277 ins_encode %{ 5278 __ vextracti64x2h($tmp$$XMMRegister, $src2$$XMMRegister, 0x1); 5279 __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); 5280 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5281 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5282 __ movdq($tmp$$XMMRegister, $src1$$Register); 5283 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5284 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5285 %} 5286 ins_pipe( pipe_slow ); 5287 %} 5288 5289 instruct rvmul8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{ 5290 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 5291 match(Set dst (MulReductionVL src1 src2)); 5292 effect(TEMP tmp, TEMP tmp2); 5293 format %{ "vextracti64x4 $tmp2,$src2\n\t" 5294 "vpmullq $tmp2,$tmp2,$src2\n\t" 5295 "vextracti128 $tmp,$tmp2\n\t" 5296 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5297 "pshufd $tmp,$tmp2,0xE\n\t" 5298 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5299 "movdq $tmp,$src1\n\t" 5300 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5301 "movdq $dst,$tmp2\t! mul reduction8L" %} 5302 ins_encode %{ 5303 __ vextracti64x4h($tmp2$$XMMRegister, $src2$$XMMRegister); 5304 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 5305 __ vextracti128h($tmp$$XMMRegister, $tmp2$$XMMRegister); 5306 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5307 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5308 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5309 __ movdq($tmp$$XMMRegister, $src1$$Register); 5310 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5311 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5312 %} 5313 ins_pipe( pipe_slow ); 5314 %} 5315 #endif 5316 5317 instruct rsmul2F_reduction(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{ 5318 predicate(UseSSE >= 1 && UseAVX == 0); 5319 match(Set dst (MulReductionVF src1 src2)); 5320 effect(TEMP tmp, TEMP tmp2); 5321 format %{ "movdqu $tmp,$src1\n\t" 5322 "mulss $tmp,$src2\n\t" 5323 "pshufd $tmp2,$src2,0x01\n\t" 5324 "mulss $tmp,$tmp2\n\t" 5325 "movdqu $dst,$tmp\t! mul reduction2F" %} 5326 ins_encode %{ 5327 __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); 5328 __ mulss($tmp$$XMMRegister, $src2$$XMMRegister); 5329 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01); 5330 __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister); 5331 __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister); 5332 %} 5333 ins_pipe( pipe_slow ); 5334 %} 5335 5336 instruct rvmul2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{ 5337 predicate(UseAVX > 0); 5338 match(Set dst (MulReductionVF src1 src2)); 5339 effect(TEMP tmp, TEMP tmp2); 5340 format %{ "vmulss $tmp2,$src1,$src2\n\t" 5341 "pshufd $tmp,$src2,0x01\n\t" 5342 "vmulss $dst,$tmp2,$tmp\t! mul reduction2F" %} 5343 ins_encode %{ 5344 __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 5345 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5346 __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5347 %} 5348 ins_pipe( pipe_slow ); 5349 %} 5350 5351 instruct rsmul4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{ 5352 predicate(UseSSE >= 1 && UseAVX == 0); 5353 match(Set dst (MulReductionVF src1 src2)); 5354 effect(TEMP tmp, TEMP tmp2); 5355 format %{ "movdqu $tmp,$src1\n\t" 5356 "mulss $tmp,$src2\n\t" 5357 "pshufd $tmp2,$src2,0x01\n\t" 5358 "mulss $tmp,$tmp2\n\t" 5359 "pshufd $tmp2,$src2,0x02\n\t" 5360 "mulss $tmp,$tmp2\n\t" 5361 "pshufd $tmp2,$src2,0x03\n\t" 5362 "mulss $tmp,$tmp2\n\t" 5363 "movdqu $dst,$tmp\t! mul reduction4F" %} 5364 ins_encode %{ 5365 __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); 5366 __ mulss($tmp$$XMMRegister, $src2$$XMMRegister); 5367 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01); 5368 __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister); 5369 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x02); 5370 __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister); 5371 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x03); 5372 __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister); 5373 __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister); 5374 %} 5375 ins_pipe( pipe_slow ); 5376 %} 5377 5378 instruct rvmul4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{ 5379 predicate(UseAVX > 0); 5380 match(Set dst (MulReductionVF src1 src2)); 5381 effect(TEMP tmp, TEMP tmp2); 5382 format %{ "vmulss $tmp2,$src1,$src2\n\t" 5383 "pshufd $tmp,$src2,0x01\n\t" 5384 "vmulss $tmp2,$tmp2,$tmp\n\t" 5385 "pshufd $tmp,$src2,0x02\n\t" 5386 "vmulss $tmp2,$tmp2,$tmp\n\t" 5387 "pshufd $tmp,$src2,0x03\n\t" 5388 "vmulss $dst,$tmp2,$tmp\t! mul reduction4F" %} 5389 ins_encode %{ 5390 __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 5391 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5392 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5393 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5394 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5395 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5396 __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5397 %} 5398 ins_pipe( pipe_slow ); 5399 %} 5400 5401 instruct rvmul8F_reduction_reg(regF dst, regF src1, vecY src2, regF tmp, regF tmp2, regF tmp3) %{ 5402 predicate(UseAVX > 0); 5403 match(Set dst (MulReductionVF src1 src2)); 5404 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 5405 format %{ "vmulss $tmp2,$src1,$src2\n\t" 5406 "pshufd $tmp,$src2,0x01\n\t" 5407 "vmulss $tmp2,$tmp2,$tmp\n\t" 5408 "pshufd $tmp,$src2,0x02\n\t" 5409 "vmulss $tmp2,$tmp2,$tmp\n\t" 5410 "pshufd $tmp,$src2,0x03\n\t" 5411 "vmulss $tmp2,$tmp2,$tmp\n\t" 5412 "vextractf128 $tmp3,$src2\n\t" 5413 "vmulss $tmp2,$tmp2,$tmp3\n\t" 5414 "pshufd $tmp,$tmp3,0x01\n\t" 5415 "vmulss $tmp2,$tmp2,$tmp\n\t" 5416 "pshufd $tmp,$tmp3,0x02\n\t" 5417 "vmulss $tmp2,$tmp2,$tmp\n\t" 5418 "pshufd $tmp,$tmp3,0x03\n\t" 5419 "vmulss $dst,$tmp2,$tmp\t! mul reduction8F" %} 5420 ins_encode %{ 5421 __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 5422 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5423 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5424 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5425 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5426 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5427 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5428 __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister); 5429 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 5430 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); 5431 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5432 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); 5433 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5434 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); 5435 __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5436 %} 5437 ins_pipe( pipe_slow ); 5438 %} 5439 5440 instruct rvmul16F_reduction_reg(regF dst, regF src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ 5441 predicate(UseAVX > 2); 5442 match(Set dst (MulReductionVF src1 src2)); 5443 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 5444 format %{ "vmulss $tmp2,$src1,$src2\n\t" 5445 "pshufd $tmp,$src2,0x01\n\t" 5446 "vmulss $tmp2,$tmp2,$tmp\n\t" 5447 "pshufd $tmp,$src2,0x02\n\t" 5448 "vmulss $tmp2,$tmp2,$tmp\n\t" 5449 "pshufd $tmp,$src2,0x03\n\t" 5450 "vmulss $tmp2,$tmp2,$tmp\n\t" 5451 "vextractf32x4 $tmp3,$src2, 0x1\n\t" 5452 "vmulss $tmp2,$tmp2,$tmp3\n\t" 5453 "pshufd $tmp,$tmp3,0x01\n\t" 5454 "vmulss $tmp2,$tmp2,$tmp\n\t" 5455 "pshufd $tmp,$tmp3,0x02\n\t" 5456 "vmulss $tmp2,$tmp2,$tmp\n\t" 5457 "pshufd $tmp,$tmp3,0x03\n\t" 5458 "vmulss $tmp2,$tmp2,$tmp\n\t" 5459 "vextractf32x4 $tmp3,$src2, 0x2\n\t" 5460 "vmulss $tmp2,$tmp2,$tmp3\n\t" 5461 "pshufd $tmp,$tmp3,0x01\n\t" 5462 "vmulss $tmp2,$tmp2,$tmp\n\t" 5463 "pshufd $tmp,$tmp3,0x02\n\t" 5464 "vmulss $tmp2,$tmp2,$tmp\n\t" 5465 "pshufd $tmp,$tmp3,0x03\n\t" 5466 "vmulss $tmp2,$tmp2,$tmp\n\t" 5467 "vextractf32x4 $tmp3,$src2, 0x3\n\t" 5468 "vmulss $tmp2,$tmp2,$tmp3\n\t" 5469 "pshufd $tmp,$tmp3,0x01\n\t" 5470 "vmulss $tmp2,$tmp2,$tmp\n\t" 5471 "pshufd $tmp,$tmp3,0x02\n\t" 5472 "vmulss $tmp2,$tmp2,$tmp\n\t" 5473 "pshufd $tmp,$tmp3,0x03\n\t" 5474 "vmulss $dst,$tmp2,$tmp\t! mul reduction16F" %} 5475 ins_encode %{ 5476 __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 5477 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5478 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5479 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5480 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5481 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5482 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5483 __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1); 5484 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 5485 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); 5486 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5487 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); 5488 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5489 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); 5490 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5491 __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x2); 5492 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 5493 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); 5494 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5495 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); 5496 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5497 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); 5498 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5499 __ vextractf32x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x3); 5500 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 5501 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01); 5502 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5503 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02); 5504 __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5505 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03); 5506 __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5507 %} 5508 ins_pipe( pipe_slow ); 5509 %} 5510 5511 instruct rsmul2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp) %{ 5512 predicate(UseSSE >= 1 && UseAVX == 0); 5513 match(Set dst (MulReductionVD src1 src2)); 5514 effect(TEMP tmp, TEMP dst); 5515 format %{ "movdqu $tmp,$src1\n\t" 5516 "mulsd $tmp,$src2\n\t" 5517 "pshufd $dst,$src2,0xE\n\t" 5518 "mulsd $dst,$tmp\t! mul reduction2D" %} 5519 ins_encode %{ 5520 __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); 5521 __ mulsd($tmp$$XMMRegister, $src2$$XMMRegister); 5522 __ pshufd($dst$$XMMRegister, $src2$$XMMRegister, 0xE); 5523 __ mulsd($dst$$XMMRegister, $tmp$$XMMRegister); 5524 %} 5525 ins_pipe( pipe_slow ); 5526 %} 5527 5528 instruct rvmul2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp, regD tmp2) %{ 5529 predicate(UseAVX > 0); 5530 match(Set dst (MulReductionVD src1 src2)); 5531 effect(TEMP tmp, TEMP tmp2); 5532 format %{ "vmulsd $tmp2,$src1,$src2\n\t" 5533 "pshufd $tmp,$src2,0xE\n\t" 5534 "vmulsd $dst,$tmp2,$tmp\t! mul reduction2D" %} 5535 ins_encode %{ 5536 __ vmulsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 5537 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5538 __ vmulsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5539 %} 5540 ins_pipe( pipe_slow ); 5541 %} 5542 5543 instruct rvmul4D_reduction_reg(regD dst, regD src1, vecY src2, regD tmp, regD tmp2, regD tmp3) %{ 5544 predicate(UseAVX > 0); 5545 match(Set dst (MulReductionVD src1 src2)); 5546 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 5547 format %{ "vmulsd $tmp2,$src1,$src2\n\t" 5548 "pshufd $tmp,$src2,0xE\n\t" 5549 "vmulsd $tmp2,$tmp2,$tmp\n\t" 5550 "vextractf128 $tmp3,$src2\n\t" 5551 "vmulsd $tmp2,$tmp2,$tmp3\n\t" 5552 "pshufd $tmp,$tmp3,0xE\n\t" 5553 "vmulsd $dst,$tmp2,$tmp\t! mul reduction4D" %} 5554 ins_encode %{ 5555 __ vmulsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 5556 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5557 __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5558 __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister); 5559 __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 5560 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); 5561 __ vmulsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5562 %} 5563 ins_pipe( pipe_slow ); 5564 %} 5565 5566 instruct rvmul8D_reduction_reg(regD dst, regD src1, vecZ src2, regD tmp, regD tmp2, regD tmp3) %{ 5567 predicate(UseAVX > 2); 5568 match(Set dst (MulReductionVD src1 src2)); 5569 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 5570 format %{ "vmulsd $tmp2,$src1,$src2\n\t" 5571 "pshufd $tmp,$src2,0xE\n\t" 5572 "vmulsd $tmp2,$tmp2,$tmp\n\t" 5573 "vextractf64x2 $tmp3,$src2, 0x1\n\t" 5574 "vmulsd $tmp2,$tmp2,$tmp3\n\t" 5575 "pshufd $tmp,$src2,0xE\n\t" 5576 "vmulsd $tmp2,$tmp2,$tmp\n\t" 5577 "vextractf64x2 $tmp3,$src2, 0x2\n\t" 5578 "vmulsd $tmp2,$tmp2,$tmp3\n\t" 5579 "pshufd $tmp,$tmp3,0xE\n\t" 5580 "vmulsd $tmp2,$tmp2,$tmp\n\t" 5581 "vextractf64x2 $tmp3,$src2, 0x3\n\t" 5582 "vmulsd $tmp2,$tmp2,$tmp3\n\t" 5583 "pshufd $tmp,$tmp3,0xE\n\t" 5584 "vmulsd $dst,$tmp2,$tmp\t! mul reduction8D" %} 5585 ins_encode %{ 5586 __ vmulsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 5587 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5588 __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5589 __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x1); 5590 __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 5591 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); 5592 __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5593 __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x2); 5594 __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 5595 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); 5596 __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5597 __ vextractf64x2h($tmp3$$XMMRegister, $src2$$XMMRegister, 0x3); 5598 __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister); 5599 __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE); 5600 __ vmulsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister); 5601 %} 5602 ins_pipe( pipe_slow ); 5603 %} 5604 5605 // ====================VECTOR ARITHMETIC======================================= 5606 5607 // --------------------------------- ADD -------------------------------------- 5608 5609 // Bytes vector add 5610 instruct vadd4B(vecS dst, vecS src) %{ 5611 predicate(n->as_Vector()->length() == 4); 5612 match(Set dst (AddVB dst src)); 5613 format %{ "paddb $dst,$src\t! add packed4B" %} 5614 ins_encode %{ 5615 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5616 %} 5617 ins_pipe( pipe_slow ); 5618 %} 5619 5620 instruct vadd4B_reg(vecS dst, vecS src1, vecS src2) %{ 5621 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5622 match(Set dst (AddVB src1 src2)); 5623 format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %} 5624 ins_encode %{ 5625 int vector_len = 0; 5626 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5627 %} 5628 ins_pipe( pipe_slow ); 5629 %} 5630 5631 instruct vadd4B_mem(vecS dst, vecS src, memory mem) %{ 5632 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5633 match(Set dst (AddVB src (LoadVector mem))); 5634 format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %} 5635 ins_encode %{ 5636 int vector_len = 0; 5637 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5638 %} 5639 ins_pipe( pipe_slow ); 5640 %} 5641 5642 instruct vadd8B(vecD dst, vecD src) %{ 5643 predicate(n->as_Vector()->length() == 8); 5644 match(Set dst (AddVB dst src)); 5645 format %{ "paddb $dst,$src\t! add packed8B" %} 5646 ins_encode %{ 5647 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5648 %} 5649 ins_pipe( pipe_slow ); 5650 %} 5651 5652 instruct vadd8B_reg(vecD dst, vecD src1, vecD src2) %{ 5653 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 5654 match(Set dst (AddVB src1 src2)); 5655 format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %} 5656 ins_encode %{ 5657 int vector_len = 0; 5658 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5659 %} 5660 ins_pipe( pipe_slow ); 5661 %} 5662 5663 instruct vadd8B_mem(vecD dst, vecD src, memory mem) %{ 5664 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 5665 match(Set dst (AddVB src (LoadVector mem))); 5666 format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %} 5667 ins_encode %{ 5668 int vector_len = 0; 5669 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5670 %} 5671 ins_pipe( pipe_slow ); 5672 %} 5673 5674 instruct vadd16B(vecX dst, vecX src) %{ 5675 predicate(n->as_Vector()->length() == 16); 5676 match(Set dst (AddVB dst src)); 5677 format %{ "paddb $dst,$src\t! add packed16B" %} 5678 ins_encode %{ 5679 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5680 %} 5681 ins_pipe( pipe_slow ); 5682 %} 5683 5684 instruct vadd16B_reg(vecX dst, vecX src1, vecX src2) %{ 5685 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 5686 match(Set dst (AddVB src1 src2)); 5687 format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %} 5688 ins_encode %{ 5689 int vector_len = 0; 5690 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5691 %} 5692 ins_pipe( pipe_slow ); 5693 %} 5694 5695 instruct vadd16B_mem(vecX dst, vecX src, memory mem) %{ 5696 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 5697 match(Set dst (AddVB src (LoadVector mem))); 5698 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} 5699 ins_encode %{ 5700 int vector_len = 0; 5701 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5702 %} 5703 ins_pipe( pipe_slow ); 5704 %} 5705 5706 instruct vadd32B_reg(vecY dst, vecY src1, vecY src2) %{ 5707 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 5708 match(Set dst (AddVB src1 src2)); 5709 format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %} 5710 ins_encode %{ 5711 int vector_len = 1; 5712 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5713 %} 5714 ins_pipe( pipe_slow ); 5715 %} 5716 5717 instruct vadd32B_mem(vecY dst, vecY src, memory mem) %{ 5718 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 5719 match(Set dst (AddVB src (LoadVector mem))); 5720 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} 5721 ins_encode %{ 5722 int vector_len = 1; 5723 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5724 %} 5725 ins_pipe( pipe_slow ); 5726 %} 5727 5728 instruct vadd64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 5729 predicate(UseAVX > 2 && n->as_Vector()->length() == 64); 5730 match(Set dst (AddVB src1 src2)); 5731 format %{ "vpaddb $dst,$src1,$src2\t! add packed64B" %} 5732 ins_encode %{ 5733 int vector_len = 2; 5734 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5735 %} 5736 ins_pipe( pipe_slow ); 5737 %} 5738 5739 instruct vadd64B_mem(vecZ dst, vecZ src, memory mem) %{ 5740 predicate(UseAVX > 2 && n->as_Vector()->length() == 64); 5741 match(Set dst (AddVB src (LoadVector mem))); 5742 format %{ "vpaddb $dst,$src,$mem\t! add packed64B" %} 5743 ins_encode %{ 5744 int vector_len = 2; 5745 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5746 %} 5747 ins_pipe( pipe_slow ); 5748 %} 5749 5750 // Shorts/Chars vector add 5751 instruct vadd2S(vecS dst, vecS src) %{ 5752 predicate(n->as_Vector()->length() == 2); 5753 match(Set dst (AddVS dst src)); 5754 format %{ "paddw $dst,$src\t! add packed2S" %} 5755 ins_encode %{ 5756 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 5757 %} 5758 ins_pipe( pipe_slow ); 5759 %} 5760 5761 instruct vadd2S_reg(vecS dst, vecS src1, vecS src2) %{ 5762 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 5763 match(Set dst (AddVS src1 src2)); 5764 format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %} 5765 ins_encode %{ 5766 int vector_len = 0; 5767 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5768 %} 5769 ins_pipe( pipe_slow ); 5770 %} 5771 5772 instruct vadd2S_mem(vecS dst, vecS src, memory mem) %{ 5773 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 5774 match(Set dst (AddVS src (LoadVector mem))); 5775 format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %} 5776 ins_encode %{ 5777 int vector_len = 0; 5778 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5779 %} 5780 ins_pipe( pipe_slow ); 5781 %} 5782 5783 instruct vadd4S(vecD dst, vecD src) %{ 5784 predicate(n->as_Vector()->length() == 4); 5785 match(Set dst (AddVS dst src)); 5786 format %{ "paddw $dst,$src\t! add packed4S" %} 5787 ins_encode %{ 5788 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 5789 %} 5790 ins_pipe( pipe_slow ); 5791 %} 5792 5793 instruct vadd4S_reg(vecD dst, vecD src1, vecD src2) %{ 5794 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5795 match(Set dst (AddVS src1 src2)); 5796 format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %} 5797 ins_encode %{ 5798 int vector_len = 0; 5799 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5800 %} 5801 ins_pipe( pipe_slow ); 5802 %} 5803 5804 instruct vadd4S_mem(vecD dst, vecD src, memory mem) %{ 5805 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5806 match(Set dst (AddVS src (LoadVector mem))); 5807 format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %} 5808 ins_encode %{ 5809 int vector_len = 0; 5810 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5811 %} 5812 ins_pipe( pipe_slow ); 5813 %} 5814 5815 instruct vadd8S(vecX dst, vecX src) %{ 5816 predicate(n->as_Vector()->length() == 8); 5817 match(Set dst (AddVS dst src)); 5818 format %{ "paddw $dst,$src\t! add packed8S" %} 5819 ins_encode %{ 5820 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 5821 %} 5822 ins_pipe( pipe_slow ); 5823 %} 5824 5825 instruct vadd8S_reg(vecX dst, vecX src1, vecX src2) %{ 5826 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 5827 match(Set dst (AddVS src1 src2)); 5828 format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %} 5829 ins_encode %{ 5830 int vector_len = 0; 5831 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5832 %} 5833 ins_pipe( pipe_slow ); 5834 %} 5835 5836 instruct vadd8S_mem(vecX dst, vecX src, memory mem) %{ 5837 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 5838 match(Set dst (AddVS src (LoadVector mem))); 5839 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} 5840 ins_encode %{ 5841 int vector_len = 0; 5842 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5843 %} 5844 ins_pipe( pipe_slow ); 5845 %} 5846 5847 instruct vadd16S_reg(vecY dst, vecY src1, vecY src2) %{ 5848 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 5849 match(Set dst (AddVS src1 src2)); 5850 format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %} 5851 ins_encode %{ 5852 int vector_len = 1; 5853 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5854 %} 5855 ins_pipe( pipe_slow ); 5856 %} 5857 5858 instruct vadd16S_mem(vecY dst, vecY src, memory mem) %{ 5859 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 5860 match(Set dst (AddVS src (LoadVector mem))); 5861 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} 5862 ins_encode %{ 5863 int vector_len = 1; 5864 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5865 %} 5866 ins_pipe( pipe_slow ); 5867 %} 5868 5869 instruct vadd32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 5870 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); 5871 match(Set dst (AddVS src1 src2)); 5872 format %{ "vpaddw $dst,$src1,$src2\t! add packed32S" %} 5873 ins_encode %{ 5874 int vector_len = 2; 5875 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5876 %} 5877 ins_pipe( pipe_slow ); 5878 %} 5879 5880 instruct vadd32S_mem(vecZ dst, vecZ src, memory mem) %{ 5881 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); 5882 match(Set dst (AddVS src (LoadVector mem))); 5883 format %{ "vpaddw $dst,$src,$mem\t! add packed32S" %} 5884 ins_encode %{ 5885 int vector_len = 2; 5886 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5887 %} 5888 ins_pipe( pipe_slow ); 5889 %} 5890 5891 // Integers vector add 5892 instruct vadd2I(vecD dst, vecD src) %{ 5893 predicate(n->as_Vector()->length() == 2); 5894 match(Set dst (AddVI dst src)); 5895 format %{ "paddd $dst,$src\t! add packed2I" %} 5896 ins_encode %{ 5897 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 5898 %} 5899 ins_pipe( pipe_slow ); 5900 %} 5901 5902 instruct vadd2I_reg(vecD dst, vecD src1, vecD src2) %{ 5903 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 5904 match(Set dst (AddVI src1 src2)); 5905 format %{ "vpaddd $dst,$src1,$src2\t! add packed2I" %} 5906 ins_encode %{ 5907 int vector_len = 0; 5908 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5909 %} 5910 ins_pipe( pipe_slow ); 5911 %} 5912 5913 instruct vadd2I_mem(vecD dst, vecD src, memory mem) %{ 5914 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 5915 match(Set dst (AddVI src (LoadVector mem))); 5916 format %{ "vpaddd $dst,$src,$mem\t! add packed2I" %} 5917 ins_encode %{ 5918 int vector_len = 0; 5919 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5920 %} 5921 ins_pipe( pipe_slow ); 5922 %} 5923 5924 instruct vadd4I(vecX dst, vecX src) %{ 5925 predicate(n->as_Vector()->length() == 4); 5926 match(Set dst (AddVI dst src)); 5927 format %{ "paddd $dst,$src\t! add packed4I" %} 5928 ins_encode %{ 5929 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 5930 %} 5931 ins_pipe( pipe_slow ); 5932 %} 5933 5934 instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{ 5935 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5936 match(Set dst (AddVI src1 src2)); 5937 format %{ "vpaddd $dst,$src1,$src2\t! add packed4I" %} 5938 ins_encode %{ 5939 int vector_len = 0; 5940 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5941 %} 5942 ins_pipe( pipe_slow ); 5943 %} 5944 5945 instruct vadd4I_mem(vecX dst, vecX src, memory mem) %{ 5946 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5947 match(Set dst (AddVI src (LoadVector mem))); 5948 format %{ "vpaddd $dst,$src,$mem\t! add packed4I" %} 5949 ins_encode %{ 5950 int vector_len = 0; 5951 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5952 %} 5953 ins_pipe( pipe_slow ); 5954 %} 5955 5956 instruct vadd8I_reg(vecY dst, vecY src1, vecY src2) %{ 5957 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 5958 match(Set dst (AddVI src1 src2)); 5959 format %{ "vpaddd $dst,$src1,$src2\t! add packed8I" %} 5960 ins_encode %{ 5961 int vector_len = 1; 5962 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5963 %} 5964 ins_pipe( pipe_slow ); 5965 %} 5966 5967 instruct vadd8I_mem(vecY dst, vecY src, memory mem) %{ 5968 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 5969 match(Set dst (AddVI src (LoadVector mem))); 5970 format %{ "vpaddd $dst,$src,$mem\t! add packed8I" %} 5971 ins_encode %{ 5972 int vector_len = 1; 5973 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5974 %} 5975 ins_pipe( pipe_slow ); 5976 %} 5977 5978 instruct vadd16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 5979 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 5980 match(Set dst (AddVI src1 src2)); 5981 format %{ "vpaddd $dst,$src1,$src2\t! add packed16I" %} 5982 ins_encode %{ 5983 int vector_len = 2; 5984 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5985 %} 5986 ins_pipe( pipe_slow ); 5987 %} 5988 5989 instruct vadd16I_mem(vecZ dst, vecZ src, memory mem) %{ 5990 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 5991 match(Set dst (AddVI src (LoadVector mem))); 5992 format %{ "vpaddd $dst,$src,$mem\t! add packed16I" %} 5993 ins_encode %{ 5994 int vector_len = 2; 5995 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5996 %} 5997 ins_pipe( pipe_slow ); 5998 %} 5999 6000 // Longs vector add 6001 instruct vadd2L(vecX dst, vecX src) %{ 6002 predicate(n->as_Vector()->length() == 2); 6003 match(Set dst (AddVL dst src)); 6004 format %{ "paddq $dst,$src\t! add packed2L" %} 6005 ins_encode %{ 6006 __ paddq($dst$$XMMRegister, $src$$XMMRegister); 6007 %} 6008 ins_pipe( pipe_slow ); 6009 %} 6010 6011 instruct vadd2L_reg(vecX dst, vecX src1, vecX src2) %{ 6012 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6013 match(Set dst (AddVL src1 src2)); 6014 format %{ "vpaddq $dst,$src1,$src2\t! add packed2L" %} 6015 ins_encode %{ 6016 int vector_len = 0; 6017 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6018 %} 6019 ins_pipe( pipe_slow ); 6020 %} 6021 6022 instruct vadd2L_mem(vecX dst, vecX src, memory mem) %{ 6023 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6024 match(Set dst (AddVL src (LoadVector mem))); 6025 format %{ "vpaddq $dst,$src,$mem\t! add packed2L" %} 6026 ins_encode %{ 6027 int vector_len = 0; 6028 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6029 %} 6030 ins_pipe( pipe_slow ); 6031 %} 6032 6033 instruct vadd4L_reg(vecY dst, vecY src1, vecY src2) %{ 6034 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 6035 match(Set dst (AddVL src1 src2)); 6036 format %{ "vpaddq $dst,$src1,$src2\t! add packed4L" %} 6037 ins_encode %{ 6038 int vector_len = 1; 6039 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6040 %} 6041 ins_pipe( pipe_slow ); 6042 %} 6043 6044 instruct vadd4L_mem(vecY dst, vecY src, memory mem) %{ 6045 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 6046 match(Set dst (AddVL src (LoadVector mem))); 6047 format %{ "vpaddq $dst,$src,$mem\t! add packed4L" %} 6048 ins_encode %{ 6049 int vector_len = 1; 6050 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6051 %} 6052 ins_pipe( pipe_slow ); 6053 %} 6054 6055 instruct vadd8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6056 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6057 match(Set dst (AddVL src1 src2)); 6058 format %{ "vpaddq $dst,$src1,$src2\t! add packed8L" %} 6059 ins_encode %{ 6060 int vector_len = 2; 6061 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6062 %} 6063 ins_pipe( pipe_slow ); 6064 %} 6065 6066 instruct vadd8L_mem(vecZ dst, vecZ src, memory mem) %{ 6067 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6068 match(Set dst (AddVL src (LoadVector mem))); 6069 format %{ "vpaddq $dst,$src,$mem\t! add packed8L" %} 6070 ins_encode %{ 6071 int vector_len = 2; 6072 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6073 %} 6074 ins_pipe( pipe_slow ); 6075 %} 6076 6077 // Floats vector add 6078 instruct vadd2F(vecD dst, vecD src) %{ 6079 predicate(n->as_Vector()->length() == 2); 6080 match(Set dst (AddVF dst src)); 6081 format %{ "addps $dst,$src\t! add packed2F" %} 6082 ins_encode %{ 6083 __ addps($dst$$XMMRegister, $src$$XMMRegister); 6084 %} 6085 ins_pipe( pipe_slow ); 6086 %} 6087 6088 instruct vadd2F_reg(vecD dst, vecD src1, vecD src2) %{ 6089 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6090 match(Set dst (AddVF src1 src2)); 6091 format %{ "vaddps $dst,$src1,$src2\t! add packed2F" %} 6092 ins_encode %{ 6093 int vector_len = 0; 6094 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6095 %} 6096 ins_pipe( pipe_slow ); 6097 %} 6098 6099 instruct vadd2F_mem(vecD dst, vecD src, memory mem) %{ 6100 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6101 match(Set dst (AddVF src (LoadVector mem))); 6102 format %{ "vaddps $dst,$src,$mem\t! add packed2F" %} 6103 ins_encode %{ 6104 int vector_len = 0; 6105 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6106 %} 6107 ins_pipe( pipe_slow ); 6108 %} 6109 6110 instruct vadd4F(vecX dst, vecX src) %{ 6111 predicate(n->as_Vector()->length() == 4); 6112 match(Set dst (AddVF dst src)); 6113 format %{ "addps $dst,$src\t! add packed4F" %} 6114 ins_encode %{ 6115 __ addps($dst$$XMMRegister, $src$$XMMRegister); 6116 %} 6117 ins_pipe( pipe_slow ); 6118 %} 6119 6120 instruct vadd4F_reg(vecX dst, vecX src1, vecX src2) %{ 6121 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6122 match(Set dst (AddVF src1 src2)); 6123 format %{ "vaddps $dst,$src1,$src2\t! add packed4F" %} 6124 ins_encode %{ 6125 int vector_len = 0; 6126 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6127 %} 6128 ins_pipe( pipe_slow ); 6129 %} 6130 6131 instruct vadd4F_mem(vecX dst, vecX src, memory mem) %{ 6132 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6133 match(Set dst (AddVF src (LoadVector mem))); 6134 format %{ "vaddps $dst,$src,$mem\t! add packed4F" %} 6135 ins_encode %{ 6136 int vector_len = 0; 6137 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6138 %} 6139 ins_pipe( pipe_slow ); 6140 %} 6141 6142 instruct vadd8F_reg(vecY dst, vecY src1, vecY src2) %{ 6143 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6144 match(Set dst (AddVF src1 src2)); 6145 format %{ "vaddps $dst,$src1,$src2\t! add packed8F" %} 6146 ins_encode %{ 6147 int vector_len = 1; 6148 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6149 %} 6150 ins_pipe( pipe_slow ); 6151 %} 6152 6153 instruct vadd8F_mem(vecY dst, vecY src, memory mem) %{ 6154 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6155 match(Set dst (AddVF src (LoadVector mem))); 6156 format %{ "vaddps $dst,$src,$mem\t! add packed8F" %} 6157 ins_encode %{ 6158 int vector_len = 1; 6159 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6160 %} 6161 ins_pipe( pipe_slow ); 6162 %} 6163 6164 instruct vadd16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6165 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6166 match(Set dst (AddVF src1 src2)); 6167 format %{ "vaddps $dst,$src1,$src2\t! add packed16F" %} 6168 ins_encode %{ 6169 int vector_len = 2; 6170 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6171 %} 6172 ins_pipe( pipe_slow ); 6173 %} 6174 6175 instruct vadd16F_mem(vecZ dst, vecZ src, memory mem) %{ 6176 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6177 match(Set dst (AddVF src (LoadVector mem))); 6178 format %{ "vaddps $dst,$src,$mem\t! add packed16F" %} 6179 ins_encode %{ 6180 int vector_len = 2; 6181 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6182 %} 6183 ins_pipe( pipe_slow ); 6184 %} 6185 6186 // Doubles vector add 6187 instruct vadd2D(vecX dst, vecX src) %{ 6188 predicate(n->as_Vector()->length() == 2); 6189 match(Set dst (AddVD dst src)); 6190 format %{ "addpd $dst,$src\t! add packed2D" %} 6191 ins_encode %{ 6192 __ addpd($dst$$XMMRegister, $src$$XMMRegister); 6193 %} 6194 ins_pipe( pipe_slow ); 6195 %} 6196 6197 instruct vadd2D_reg(vecX dst, vecX src1, vecX src2) %{ 6198 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6199 match(Set dst (AddVD src1 src2)); 6200 format %{ "vaddpd $dst,$src1,$src2\t! add packed2D" %} 6201 ins_encode %{ 6202 int vector_len = 0; 6203 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6204 %} 6205 ins_pipe( pipe_slow ); 6206 %} 6207 6208 instruct vadd2D_mem(vecX dst, vecX src, memory mem) %{ 6209 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6210 match(Set dst (AddVD src (LoadVector mem))); 6211 format %{ "vaddpd $dst,$src,$mem\t! add packed2D" %} 6212 ins_encode %{ 6213 int vector_len = 0; 6214 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6215 %} 6216 ins_pipe( pipe_slow ); 6217 %} 6218 6219 instruct vadd4D_reg(vecY dst, vecY src1, vecY src2) %{ 6220 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6221 match(Set dst (AddVD src1 src2)); 6222 format %{ "vaddpd $dst,$src1,$src2\t! add packed4D" %} 6223 ins_encode %{ 6224 int vector_len = 1; 6225 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6226 %} 6227 ins_pipe( pipe_slow ); 6228 %} 6229 6230 instruct vadd4D_mem(vecY dst, vecY src, memory mem) %{ 6231 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6232 match(Set dst (AddVD src (LoadVector mem))); 6233 format %{ "vaddpd $dst,$src,$mem\t! add packed4D" %} 6234 ins_encode %{ 6235 int vector_len = 1; 6236 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6237 %} 6238 ins_pipe( pipe_slow ); 6239 %} 6240 6241 instruct vadd8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6242 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6243 match(Set dst (AddVD src1 src2)); 6244 format %{ "vaddpd $dst,$src1,$src2\t! add packed8D" %} 6245 ins_encode %{ 6246 int vector_len = 2; 6247 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6248 %} 6249 ins_pipe( pipe_slow ); 6250 %} 6251 6252 instruct vadd8D_mem(vecZ dst, vecZ src, memory mem) %{ 6253 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6254 match(Set dst (AddVD src (LoadVector mem))); 6255 format %{ "vaddpd $dst,$src,$mem\t! add packed8D" %} 6256 ins_encode %{ 6257 int vector_len = 2; 6258 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6259 %} 6260 ins_pipe( pipe_slow ); 6261 %} 6262 6263 // --------------------------------- SUB -------------------------------------- 6264 6265 // Bytes vector sub 6266 instruct vsub4B(vecS dst, vecS src) %{ 6267 predicate(n->as_Vector()->length() == 4); 6268 match(Set dst (SubVB dst src)); 6269 format %{ "psubb $dst,$src\t! sub packed4B" %} 6270 ins_encode %{ 6271 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 6272 %} 6273 ins_pipe( pipe_slow ); 6274 %} 6275 6276 instruct vsub4B_reg(vecS dst, vecS src1, vecS src2) %{ 6277 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6278 match(Set dst (SubVB src1 src2)); 6279 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} 6280 ins_encode %{ 6281 int vector_len = 0; 6282 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6283 %} 6284 ins_pipe( pipe_slow ); 6285 %} 6286 6287 instruct vsub4B_mem(vecS dst, vecS src, memory mem) %{ 6288 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6289 match(Set dst (SubVB src (LoadVector mem))); 6290 format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %} 6291 ins_encode %{ 6292 int vector_len = 0; 6293 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6294 %} 6295 ins_pipe( pipe_slow ); 6296 %} 6297 6298 instruct vsub8B(vecD dst, vecD src) %{ 6299 predicate(n->as_Vector()->length() == 8); 6300 match(Set dst (SubVB dst src)); 6301 format %{ "psubb $dst,$src\t! sub packed8B" %} 6302 ins_encode %{ 6303 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 6304 %} 6305 ins_pipe( pipe_slow ); 6306 %} 6307 6308 instruct vsub8B_reg(vecD dst, vecD src1, vecD src2) %{ 6309 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6310 match(Set dst (SubVB src1 src2)); 6311 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} 6312 ins_encode %{ 6313 int vector_len = 0; 6314 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6315 %} 6316 ins_pipe( pipe_slow ); 6317 %} 6318 6319 instruct vsub8B_mem(vecD dst, vecD src, memory mem) %{ 6320 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6321 match(Set dst (SubVB src (LoadVector mem))); 6322 format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %} 6323 ins_encode %{ 6324 int vector_len = 0; 6325 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6326 %} 6327 ins_pipe( pipe_slow ); 6328 %} 6329 6330 instruct vsub16B(vecX dst, vecX src) %{ 6331 predicate(n->as_Vector()->length() == 16); 6332 match(Set dst (SubVB dst src)); 6333 format %{ "psubb $dst,$src\t! sub packed16B" %} 6334 ins_encode %{ 6335 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 6336 %} 6337 ins_pipe( pipe_slow ); 6338 %} 6339 6340 instruct vsub16B_reg(vecX dst, vecX src1, vecX src2) %{ 6341 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 6342 match(Set dst (SubVB src1 src2)); 6343 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} 6344 ins_encode %{ 6345 int vector_len = 0; 6346 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6347 %} 6348 ins_pipe( pipe_slow ); 6349 %} 6350 6351 instruct vsub16B_mem(vecX dst, vecX src, memory mem) %{ 6352 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 6353 match(Set dst (SubVB src (LoadVector mem))); 6354 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} 6355 ins_encode %{ 6356 int vector_len = 0; 6357 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6358 %} 6359 ins_pipe( pipe_slow ); 6360 %} 6361 6362 instruct vsub32B_reg(vecY dst, vecY src1, vecY src2) %{ 6363 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 6364 match(Set dst (SubVB src1 src2)); 6365 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} 6366 ins_encode %{ 6367 int vector_len = 1; 6368 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6369 %} 6370 ins_pipe( pipe_slow ); 6371 %} 6372 6373 instruct vsub32B_mem(vecY dst, vecY src, memory mem) %{ 6374 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 6375 match(Set dst (SubVB src (LoadVector mem))); 6376 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} 6377 ins_encode %{ 6378 int vector_len = 1; 6379 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6380 %} 6381 ins_pipe( pipe_slow ); 6382 %} 6383 6384 instruct vsub64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6385 predicate(UseAVX > 2 && n->as_Vector()->length() == 64); 6386 match(Set dst (SubVB src1 src2)); 6387 format %{ "vpsubb $dst,$src1,$src2\t! sub packed64B" %} 6388 ins_encode %{ 6389 int vector_len = 2; 6390 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6391 %} 6392 ins_pipe( pipe_slow ); 6393 %} 6394 6395 instruct vsub64B_mem(vecZ dst, vecZ src, memory mem) %{ 6396 predicate(UseAVX > 2 && n->as_Vector()->length() == 64); 6397 match(Set dst (SubVB src (LoadVector mem))); 6398 format %{ "vpsubb $dst,$src,$mem\t! sub packed64B" %} 6399 ins_encode %{ 6400 int vector_len = 2; 6401 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6402 %} 6403 ins_pipe( pipe_slow ); 6404 %} 6405 6406 // Shorts/Chars vector sub 6407 instruct vsub2S(vecS dst, vecS src) %{ 6408 predicate(n->as_Vector()->length() == 2); 6409 match(Set dst (SubVS dst src)); 6410 format %{ "psubw $dst,$src\t! sub packed2S" %} 6411 ins_encode %{ 6412 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 6413 %} 6414 ins_pipe( pipe_slow ); 6415 %} 6416 6417 instruct vsub2S_reg(vecS dst, vecS src1, vecS src2) %{ 6418 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6419 match(Set dst (SubVS src1 src2)); 6420 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} 6421 ins_encode %{ 6422 int vector_len = 0; 6423 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6424 %} 6425 ins_pipe( pipe_slow ); 6426 %} 6427 6428 instruct vsub2S_mem(vecS dst, vecS src, memory mem) %{ 6429 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6430 match(Set dst (SubVS src (LoadVector mem))); 6431 format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %} 6432 ins_encode %{ 6433 int vector_len = 0; 6434 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6435 %} 6436 ins_pipe( pipe_slow ); 6437 %} 6438 6439 instruct vsub4S(vecD dst, vecD src) %{ 6440 predicate(n->as_Vector()->length() == 4); 6441 match(Set dst (SubVS dst src)); 6442 format %{ "psubw $dst,$src\t! sub packed4S" %} 6443 ins_encode %{ 6444 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 6445 %} 6446 ins_pipe( pipe_slow ); 6447 %} 6448 6449 instruct vsub4S_reg(vecD dst, vecD src1, vecD src2) %{ 6450 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6451 match(Set dst (SubVS src1 src2)); 6452 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} 6453 ins_encode %{ 6454 int vector_len = 0; 6455 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6456 %} 6457 ins_pipe( pipe_slow ); 6458 %} 6459 6460 instruct vsub4S_mem(vecD dst, vecD src, memory mem) %{ 6461 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6462 match(Set dst (SubVS src (LoadVector mem))); 6463 format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %} 6464 ins_encode %{ 6465 int vector_len = 0; 6466 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6467 %} 6468 ins_pipe( pipe_slow ); 6469 %} 6470 6471 instruct vsub8S(vecX dst, vecX src) %{ 6472 predicate(n->as_Vector()->length() == 8); 6473 match(Set dst (SubVS dst src)); 6474 format %{ "psubw $dst,$src\t! sub packed8S" %} 6475 ins_encode %{ 6476 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 6477 %} 6478 ins_pipe( pipe_slow ); 6479 %} 6480 6481 instruct vsub8S_reg(vecX dst, vecX src1, vecX src2) %{ 6482 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6483 match(Set dst (SubVS src1 src2)); 6484 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} 6485 ins_encode %{ 6486 int vector_len = 0; 6487 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6488 %} 6489 ins_pipe( pipe_slow ); 6490 %} 6491 6492 instruct vsub8S_mem(vecX dst, vecX src, memory mem) %{ 6493 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6494 match(Set dst (SubVS src (LoadVector mem))); 6495 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} 6496 ins_encode %{ 6497 int vector_len = 0; 6498 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6499 %} 6500 ins_pipe( pipe_slow ); 6501 %} 6502 6503 instruct vsub16S_reg(vecY dst, vecY src1, vecY src2) %{ 6504 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 6505 match(Set dst (SubVS src1 src2)); 6506 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} 6507 ins_encode %{ 6508 int vector_len = 1; 6509 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6510 %} 6511 ins_pipe( pipe_slow ); 6512 %} 6513 6514 instruct vsub16S_mem(vecY dst, vecY src, memory mem) %{ 6515 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 6516 match(Set dst (SubVS src (LoadVector mem))); 6517 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} 6518 ins_encode %{ 6519 int vector_len = 1; 6520 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6521 %} 6522 ins_pipe( pipe_slow ); 6523 %} 6524 6525 instruct vsub32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6526 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); 6527 match(Set dst (SubVS src1 src2)); 6528 format %{ "vpsubw $dst,$src1,$src2\t! sub packed32S" %} 6529 ins_encode %{ 6530 int vector_len = 2; 6531 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6532 %} 6533 ins_pipe( pipe_slow ); 6534 %} 6535 6536 instruct vsub32S_mem(vecZ dst, vecZ src, memory mem) %{ 6537 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); 6538 match(Set dst (SubVS src (LoadVector mem))); 6539 format %{ "vpsubw $dst,$src,$mem\t! sub packed32S" %} 6540 ins_encode %{ 6541 int vector_len = 2; 6542 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6543 %} 6544 ins_pipe( pipe_slow ); 6545 %} 6546 6547 // Integers vector sub 6548 instruct vsub2I(vecD dst, vecD src) %{ 6549 predicate(n->as_Vector()->length() == 2); 6550 match(Set dst (SubVI dst src)); 6551 format %{ "psubd $dst,$src\t! sub packed2I" %} 6552 ins_encode %{ 6553 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 6554 %} 6555 ins_pipe( pipe_slow ); 6556 %} 6557 6558 instruct vsub2I_reg(vecD dst, vecD src1, vecD src2) %{ 6559 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6560 match(Set dst (SubVI src1 src2)); 6561 format %{ "vpsubd $dst,$src1,$src2\t! sub packed2I" %} 6562 ins_encode %{ 6563 int vector_len = 0; 6564 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6565 %} 6566 ins_pipe( pipe_slow ); 6567 %} 6568 6569 instruct vsub2I_mem(vecD dst, vecD src, memory mem) %{ 6570 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6571 match(Set dst (SubVI src (LoadVector mem))); 6572 format %{ "vpsubd $dst,$src,$mem\t! sub packed2I" %} 6573 ins_encode %{ 6574 int vector_len = 0; 6575 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6576 %} 6577 ins_pipe( pipe_slow ); 6578 %} 6579 6580 instruct vsub4I(vecX dst, vecX src) %{ 6581 predicate(n->as_Vector()->length() == 4); 6582 match(Set dst (SubVI dst src)); 6583 format %{ "psubd $dst,$src\t! sub packed4I" %} 6584 ins_encode %{ 6585 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 6586 %} 6587 ins_pipe( pipe_slow ); 6588 %} 6589 6590 instruct vsub4I_reg(vecX dst, vecX src1, vecX src2) %{ 6591 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6592 match(Set dst (SubVI src1 src2)); 6593 format %{ "vpsubd $dst,$src1,$src2\t! sub packed4I" %} 6594 ins_encode %{ 6595 int vector_len = 0; 6596 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6597 %} 6598 ins_pipe( pipe_slow ); 6599 %} 6600 6601 instruct vsub4I_mem(vecX dst, vecX src, memory mem) %{ 6602 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6603 match(Set dst (SubVI src (LoadVector mem))); 6604 format %{ "vpsubd $dst,$src,$mem\t! sub packed4I" %} 6605 ins_encode %{ 6606 int vector_len = 0; 6607 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6608 %} 6609 ins_pipe( pipe_slow ); 6610 %} 6611 6612 instruct vsub8I_reg(vecY dst, vecY src1, vecY src2) %{ 6613 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 6614 match(Set dst (SubVI src1 src2)); 6615 format %{ "vpsubd $dst,$src1,$src2\t! sub packed8I" %} 6616 ins_encode %{ 6617 int vector_len = 1; 6618 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6619 %} 6620 ins_pipe( pipe_slow ); 6621 %} 6622 6623 instruct vsub8I_mem(vecY dst, vecY src, memory mem) %{ 6624 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 6625 match(Set dst (SubVI src (LoadVector mem))); 6626 format %{ "vpsubd $dst,$src,$mem\t! sub packed8I" %} 6627 ins_encode %{ 6628 int vector_len = 1; 6629 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6630 %} 6631 ins_pipe( pipe_slow ); 6632 %} 6633 6634 instruct vsub16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6635 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6636 match(Set dst (SubVI src1 src2)); 6637 format %{ "vpsubd $dst,$src1,$src2\t! sub packed16I" %} 6638 ins_encode %{ 6639 int vector_len = 2; 6640 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6641 %} 6642 ins_pipe( pipe_slow ); 6643 %} 6644 6645 instruct vsub16I_mem(vecZ dst, vecZ src, memory mem) %{ 6646 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6647 match(Set dst (SubVI src (LoadVector mem))); 6648 format %{ "vpsubd $dst,$src,$mem\t! sub packed16I" %} 6649 ins_encode %{ 6650 int vector_len = 2; 6651 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6652 %} 6653 ins_pipe( pipe_slow ); 6654 %} 6655 6656 // Longs vector sub 6657 instruct vsub2L(vecX dst, vecX src) %{ 6658 predicate(n->as_Vector()->length() == 2); 6659 match(Set dst (SubVL dst src)); 6660 format %{ "psubq $dst,$src\t! sub packed2L" %} 6661 ins_encode %{ 6662 __ psubq($dst$$XMMRegister, $src$$XMMRegister); 6663 %} 6664 ins_pipe( pipe_slow ); 6665 %} 6666 6667 instruct vsub2L_reg(vecX dst, vecX src1, vecX src2) %{ 6668 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6669 match(Set dst (SubVL src1 src2)); 6670 format %{ "vpsubq $dst,$src1,$src2\t! sub packed2L" %} 6671 ins_encode %{ 6672 int vector_len = 0; 6673 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6674 %} 6675 ins_pipe( pipe_slow ); 6676 %} 6677 6678 instruct vsub2L_mem(vecX dst, vecX src, memory mem) %{ 6679 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6680 match(Set dst (SubVL src (LoadVector mem))); 6681 format %{ "vpsubq $dst,$src,$mem\t! sub packed2L" %} 6682 ins_encode %{ 6683 int vector_len = 0; 6684 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6685 %} 6686 ins_pipe( pipe_slow ); 6687 %} 6688 6689 instruct vsub4L_reg(vecY dst, vecY src1, vecY src2) %{ 6690 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 6691 match(Set dst (SubVL src1 src2)); 6692 format %{ "vpsubq $dst,$src1,$src2\t! sub packed4L" %} 6693 ins_encode %{ 6694 int vector_len = 1; 6695 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6696 %} 6697 ins_pipe( pipe_slow ); 6698 %} 6699 6700 instruct vsub4L_mem(vecY dst, vecY src, memory mem) %{ 6701 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 6702 match(Set dst (SubVL src (LoadVector mem))); 6703 format %{ "vpsubq $dst,$src,$mem\t! sub packed4L" %} 6704 ins_encode %{ 6705 int vector_len = 1; 6706 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6707 %} 6708 ins_pipe( pipe_slow ); 6709 %} 6710 6711 instruct vsub8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6712 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6713 match(Set dst (SubVL src1 src2)); 6714 format %{ "vpsubq $dst,$src1,$src2\t! sub packed8L" %} 6715 ins_encode %{ 6716 int vector_len = 2; 6717 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6718 %} 6719 ins_pipe( pipe_slow ); 6720 %} 6721 6722 instruct vsub8L_mem(vecZ dst, vecZ src, memory mem) %{ 6723 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6724 match(Set dst (SubVL src (LoadVector mem))); 6725 format %{ "vpsubq $dst,$src,$mem\t! sub packed8L" %} 6726 ins_encode %{ 6727 int vector_len = 2; 6728 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6729 %} 6730 ins_pipe( pipe_slow ); 6731 %} 6732 6733 // Floats vector sub 6734 instruct vsub2F(vecD dst, vecD src) %{ 6735 predicate(n->as_Vector()->length() == 2); 6736 match(Set dst (SubVF dst src)); 6737 format %{ "subps $dst,$src\t! sub packed2F" %} 6738 ins_encode %{ 6739 __ subps($dst$$XMMRegister, $src$$XMMRegister); 6740 %} 6741 ins_pipe( pipe_slow ); 6742 %} 6743 6744 instruct vsub2F_reg(vecD dst, vecD src1, vecD src2) %{ 6745 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6746 match(Set dst (SubVF src1 src2)); 6747 format %{ "vsubps $dst,$src1,$src2\t! sub packed2F" %} 6748 ins_encode %{ 6749 int vector_len = 0; 6750 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6751 %} 6752 ins_pipe( pipe_slow ); 6753 %} 6754 6755 instruct vsub2F_mem(vecD dst, vecD src, memory mem) %{ 6756 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6757 match(Set dst (SubVF src (LoadVector mem))); 6758 format %{ "vsubps $dst,$src,$mem\t! sub packed2F" %} 6759 ins_encode %{ 6760 int vector_len = 0; 6761 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6762 %} 6763 ins_pipe( pipe_slow ); 6764 %} 6765 6766 instruct vsub4F(vecX dst, vecX src) %{ 6767 predicate(n->as_Vector()->length() == 4); 6768 match(Set dst (SubVF dst src)); 6769 format %{ "subps $dst,$src\t! sub packed4F" %} 6770 ins_encode %{ 6771 __ subps($dst$$XMMRegister, $src$$XMMRegister); 6772 %} 6773 ins_pipe( pipe_slow ); 6774 %} 6775 6776 instruct vsub4F_reg(vecX dst, vecX src1, vecX src2) %{ 6777 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6778 match(Set dst (SubVF src1 src2)); 6779 format %{ "vsubps $dst,$src1,$src2\t! sub packed4F" %} 6780 ins_encode %{ 6781 int vector_len = 0; 6782 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6783 %} 6784 ins_pipe( pipe_slow ); 6785 %} 6786 6787 instruct vsub4F_mem(vecX dst, vecX src, memory mem) %{ 6788 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6789 match(Set dst (SubVF src (LoadVector mem))); 6790 format %{ "vsubps $dst,$src,$mem\t! sub packed4F" %} 6791 ins_encode %{ 6792 int vector_len = 0; 6793 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6794 %} 6795 ins_pipe( pipe_slow ); 6796 %} 6797 6798 instruct vsub8F_reg(vecY dst, vecY src1, vecY src2) %{ 6799 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6800 match(Set dst (SubVF src1 src2)); 6801 format %{ "vsubps $dst,$src1,$src2\t! sub packed8F" %} 6802 ins_encode %{ 6803 int vector_len = 1; 6804 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6805 %} 6806 ins_pipe( pipe_slow ); 6807 %} 6808 6809 instruct vsub8F_mem(vecY dst, vecY src, memory mem) %{ 6810 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6811 match(Set dst (SubVF src (LoadVector mem))); 6812 format %{ "vsubps $dst,$src,$mem\t! sub packed8F" %} 6813 ins_encode %{ 6814 int vector_len = 1; 6815 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6816 %} 6817 ins_pipe( pipe_slow ); 6818 %} 6819 6820 instruct vsub16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6821 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6822 match(Set dst (SubVF src1 src2)); 6823 format %{ "vsubps $dst,$src1,$src2\t! sub packed16F" %} 6824 ins_encode %{ 6825 int vector_len = 2; 6826 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6827 %} 6828 ins_pipe( pipe_slow ); 6829 %} 6830 6831 instruct vsub16F_mem(vecZ dst, vecZ src, memory mem) %{ 6832 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6833 match(Set dst (SubVF src (LoadVector mem))); 6834 format %{ "vsubps $dst,$src,$mem\t! sub packed16F" %} 6835 ins_encode %{ 6836 int vector_len = 2; 6837 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6838 %} 6839 ins_pipe( pipe_slow ); 6840 %} 6841 6842 // Doubles vector sub 6843 instruct vsub2D(vecX dst, vecX src) %{ 6844 predicate(n->as_Vector()->length() == 2); 6845 match(Set dst (SubVD dst src)); 6846 format %{ "subpd $dst,$src\t! sub packed2D" %} 6847 ins_encode %{ 6848 __ subpd($dst$$XMMRegister, $src$$XMMRegister); 6849 %} 6850 ins_pipe( pipe_slow ); 6851 %} 6852 6853 instruct vsub2D_reg(vecX dst, vecX src1, vecX src2) %{ 6854 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6855 match(Set dst (SubVD src1 src2)); 6856 format %{ "vsubpd $dst,$src1,$src2\t! sub packed2D" %} 6857 ins_encode %{ 6858 int vector_len = 0; 6859 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6860 %} 6861 ins_pipe( pipe_slow ); 6862 %} 6863 6864 instruct vsub2D_mem(vecX dst, vecX src, memory mem) %{ 6865 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6866 match(Set dst (SubVD src (LoadVector mem))); 6867 format %{ "vsubpd $dst,$src,$mem\t! sub packed2D" %} 6868 ins_encode %{ 6869 int vector_len = 0; 6870 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6871 %} 6872 ins_pipe( pipe_slow ); 6873 %} 6874 6875 instruct vsub4D_reg(vecY dst, vecY src1, vecY src2) %{ 6876 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6877 match(Set dst (SubVD src1 src2)); 6878 format %{ "vsubpd $dst,$src1,$src2\t! sub packed4D" %} 6879 ins_encode %{ 6880 int vector_len = 1; 6881 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6882 %} 6883 ins_pipe( pipe_slow ); 6884 %} 6885 6886 instruct vsub4D_mem(vecY dst, vecY src, memory mem) %{ 6887 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6888 match(Set dst (SubVD src (LoadVector mem))); 6889 format %{ "vsubpd $dst,$src,$mem\t! sub packed4D" %} 6890 ins_encode %{ 6891 int vector_len = 1; 6892 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6893 %} 6894 ins_pipe( pipe_slow ); 6895 %} 6896 6897 instruct vsub8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6898 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6899 match(Set dst (SubVD src1 src2)); 6900 format %{ "vsubpd $dst,$src1,$src2\t! sub packed8D" %} 6901 ins_encode %{ 6902 int vector_len = 2; 6903 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6904 %} 6905 ins_pipe( pipe_slow ); 6906 %} 6907 6908 instruct vsub8D_mem(vecZ dst, vecZ src, memory mem) %{ 6909 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6910 match(Set dst (SubVD src (LoadVector mem))); 6911 format %{ "vsubpd $dst,$src,$mem\t! sub packed8D" %} 6912 ins_encode %{ 6913 int vector_len = 2; 6914 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6915 %} 6916 ins_pipe( pipe_slow ); 6917 %} 6918 6919 // --------------------------------- MUL -------------------------------------- 6920 6921 // Shorts/Chars vector mul 6922 instruct vmul2S(vecS dst, vecS src) %{ 6923 predicate(n->as_Vector()->length() == 2); 6924 match(Set dst (MulVS dst src)); 6925 format %{ "pmullw $dst,$src\t! mul packed2S" %} 6926 ins_encode %{ 6927 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 6928 %} 6929 ins_pipe( pipe_slow ); 6930 %} 6931 6932 instruct vmul2S_reg(vecS dst, vecS src1, vecS src2) %{ 6933 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6934 match(Set dst (MulVS src1 src2)); 6935 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} 6936 ins_encode %{ 6937 int vector_len = 0; 6938 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6939 %} 6940 ins_pipe( pipe_slow ); 6941 %} 6942 6943 instruct vmul2S_mem(vecS dst, vecS src, memory mem) %{ 6944 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6945 match(Set dst (MulVS src (LoadVector mem))); 6946 format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %} 6947 ins_encode %{ 6948 int vector_len = 0; 6949 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6950 %} 6951 ins_pipe( pipe_slow ); 6952 %} 6953 6954 instruct vmul4S(vecD dst, vecD src) %{ 6955 predicate(n->as_Vector()->length() == 4); 6956 match(Set dst (MulVS dst src)); 6957 format %{ "pmullw $dst,$src\t! mul packed4S" %} 6958 ins_encode %{ 6959 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 6960 %} 6961 ins_pipe( pipe_slow ); 6962 %} 6963 6964 instruct vmul4S_reg(vecD dst, vecD src1, vecD src2) %{ 6965 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6966 match(Set dst (MulVS src1 src2)); 6967 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} 6968 ins_encode %{ 6969 int vector_len = 0; 6970 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6971 %} 6972 ins_pipe( pipe_slow ); 6973 %} 6974 6975 instruct vmul4S_mem(vecD dst, vecD src, memory mem) %{ 6976 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6977 match(Set dst (MulVS src (LoadVector mem))); 6978 format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %} 6979 ins_encode %{ 6980 int vector_len = 0; 6981 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6982 %} 6983 ins_pipe( pipe_slow ); 6984 %} 6985 6986 instruct vmul8S(vecX dst, vecX src) %{ 6987 predicate(n->as_Vector()->length() == 8); 6988 match(Set dst (MulVS dst src)); 6989 format %{ "pmullw $dst,$src\t! mul packed8S" %} 6990 ins_encode %{ 6991 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 6992 %} 6993 ins_pipe( pipe_slow ); 6994 %} 6995 6996 instruct vmul8S_reg(vecX dst, vecX src1, vecX src2) %{ 6997 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6998 match(Set dst (MulVS src1 src2)); 6999 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} 7000 ins_encode %{ 7001 int vector_len = 0; 7002 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7003 %} 7004 ins_pipe( pipe_slow ); 7005 %} 7006 7007 instruct vmul8S_mem(vecX dst, vecX src, memory mem) %{ 7008 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7009 match(Set dst (MulVS src (LoadVector mem))); 7010 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} 7011 ins_encode %{ 7012 int vector_len = 0; 7013 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7014 %} 7015 ins_pipe( pipe_slow ); 7016 %} 7017 7018 instruct vmul16S_reg(vecY dst, vecY src1, vecY src2) %{ 7019 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 7020 match(Set dst (MulVS src1 src2)); 7021 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} 7022 ins_encode %{ 7023 int vector_len = 1; 7024 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7025 %} 7026 ins_pipe( pipe_slow ); 7027 %} 7028 7029 instruct vmul16S_mem(vecY dst, vecY src, memory mem) %{ 7030 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 7031 match(Set dst (MulVS src (LoadVector mem))); 7032 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} 7033 ins_encode %{ 7034 int vector_len = 1; 7035 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7036 %} 7037 ins_pipe( pipe_slow ); 7038 %} 7039 7040 instruct vmul32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7041 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); 7042 match(Set dst (MulVS src1 src2)); 7043 format %{ "vpmullw $dst,$src1,$src2\t! mul packed32S" %} 7044 ins_encode %{ 7045 int vector_len = 2; 7046 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7047 %} 7048 ins_pipe( pipe_slow ); 7049 %} 7050 7051 instruct vmul32S_mem(vecZ dst, vecZ src, memory mem) %{ 7052 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); 7053 match(Set dst (MulVS src (LoadVector mem))); 7054 format %{ "vpmullw $dst,$src,$mem\t! mul packed32S" %} 7055 ins_encode %{ 7056 int vector_len = 2; 7057 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7058 %} 7059 ins_pipe( pipe_slow ); 7060 %} 7061 7062 // Integers vector mul (sse4_1) 7063 instruct vmul2I(vecD dst, vecD src) %{ 7064 predicate(UseSSE > 3 && n->as_Vector()->length() == 2); 7065 match(Set dst (MulVI dst src)); 7066 format %{ "pmulld $dst,$src\t! mul packed2I" %} 7067 ins_encode %{ 7068 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 7069 %} 7070 ins_pipe( pipe_slow ); 7071 %} 7072 7073 instruct vmul2I_reg(vecD dst, vecD src1, vecD src2) %{ 7074 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7075 match(Set dst (MulVI src1 src2)); 7076 format %{ "vpmulld $dst,$src1,$src2\t! mul packed2I" %} 7077 ins_encode %{ 7078 int vector_len = 0; 7079 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7080 %} 7081 ins_pipe( pipe_slow ); 7082 %} 7083 7084 instruct vmul2I_mem(vecD dst, vecD src, memory mem) %{ 7085 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7086 match(Set dst (MulVI src (LoadVector mem))); 7087 format %{ "vpmulld $dst,$src,$mem\t! mul packed2I" %} 7088 ins_encode %{ 7089 int vector_len = 0; 7090 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7091 %} 7092 ins_pipe( pipe_slow ); 7093 %} 7094 7095 instruct vmul4I(vecX dst, vecX src) %{ 7096 predicate(UseSSE > 3 && n->as_Vector()->length() == 4); 7097 match(Set dst (MulVI dst src)); 7098 format %{ "pmulld $dst,$src\t! mul packed4I" %} 7099 ins_encode %{ 7100 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 7101 %} 7102 ins_pipe( pipe_slow ); 7103 %} 7104 7105 instruct vmul4I_reg(vecX dst, vecX src1, vecX src2) %{ 7106 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7107 match(Set dst (MulVI src1 src2)); 7108 format %{ "vpmulld $dst,$src1,$src2\t! mul packed4I" %} 7109 ins_encode %{ 7110 int vector_len = 0; 7111 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7112 %} 7113 ins_pipe( pipe_slow ); 7114 %} 7115 7116 instruct vmul4I_mem(vecX dst, vecX src, memory mem) %{ 7117 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7118 match(Set dst (MulVI src (LoadVector mem))); 7119 format %{ "vpmulld $dst,$src,$mem\t! mul packed4I" %} 7120 ins_encode %{ 7121 int vector_len = 0; 7122 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7123 %} 7124 ins_pipe( pipe_slow ); 7125 %} 7126 7127 instruct vmul2L_reg(vecX dst, vecX src1, vecX src2) %{ 7128 predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && VM_Version::supports_avx512dq()); 7129 match(Set dst (MulVL src1 src2)); 7130 format %{ "vpmullq $dst,$src1,$src2\t! mul packed2L" %} 7131 ins_encode %{ 7132 int vector_len = 0; 7133 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7134 %} 7135 ins_pipe( pipe_slow ); 7136 %} 7137 7138 instruct vmul2L_mem(vecX dst, vecX src, memory mem) %{ 7139 predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && VM_Version::supports_avx512dq()); 7140 match(Set dst (MulVL src (LoadVector mem))); 7141 format %{ "vpmullq $dst,$src,$mem\t! mul packed2L" %} 7142 ins_encode %{ 7143 int vector_len = 0; 7144 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7145 %} 7146 ins_pipe( pipe_slow ); 7147 %} 7148 7149 instruct vmul4L_reg(vecY dst, vecY src1, vecY src2) %{ 7150 predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && VM_Version::supports_avx512dq()); 7151 match(Set dst (MulVL src1 src2)); 7152 format %{ "vpmullq $dst,$src1,$src2\t! mul packed4L" %} 7153 ins_encode %{ 7154 int vector_len = 1; 7155 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7156 %} 7157 ins_pipe( pipe_slow ); 7158 %} 7159 7160 instruct vmul4L_mem(vecY dst, vecY src, memory mem) %{ 7161 predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && VM_Version::supports_avx512dq()); 7162 match(Set dst (MulVL src (LoadVector mem))); 7163 format %{ "vpmullq $dst,$src,$mem\t! mul packed4L" %} 7164 ins_encode %{ 7165 int vector_len = 1; 7166 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7167 %} 7168 ins_pipe( pipe_slow ); 7169 %} 7170 7171 instruct vmul8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7172 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq()); 7173 match(Set dst (MulVL src1 src2)); 7174 format %{ "vpmullq $dst,$src1,$src2\t! mul packed8L" %} 7175 ins_encode %{ 7176 int vector_len = 2; 7177 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7178 %} 7179 ins_pipe( pipe_slow ); 7180 %} 7181 7182 instruct vmul8L_mem(vecZ dst, vecZ src, memory mem) %{ 7183 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq()); 7184 match(Set dst (MulVL src (LoadVector mem))); 7185 format %{ "vpmullq $dst,$src,$mem\t! mul packed8L" %} 7186 ins_encode %{ 7187 int vector_len = 2; 7188 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7189 %} 7190 ins_pipe( pipe_slow ); 7191 %} 7192 7193 instruct vmul8I_reg(vecY dst, vecY src1, vecY src2) %{ 7194 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 7195 match(Set dst (MulVI src1 src2)); 7196 format %{ "vpmulld $dst,$src1,$src2\t! mul packed8I" %} 7197 ins_encode %{ 7198 int vector_len = 1; 7199 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7200 %} 7201 ins_pipe( pipe_slow ); 7202 %} 7203 7204 instruct vmul8I_mem(vecY dst, vecY src, memory mem) %{ 7205 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 7206 match(Set dst (MulVI src (LoadVector mem))); 7207 format %{ "vpmulld $dst,$src,$mem\t! mul packed8I" %} 7208 ins_encode %{ 7209 int vector_len = 1; 7210 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7211 %} 7212 ins_pipe( pipe_slow ); 7213 %} 7214 7215 instruct vmul16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7216 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7217 match(Set dst (MulVI src1 src2)); 7218 format %{ "vpmulld $dst,$src1,$src2\t! mul packed16I" %} 7219 ins_encode %{ 7220 int vector_len = 2; 7221 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7222 %} 7223 ins_pipe( pipe_slow ); 7224 %} 7225 7226 instruct vmul16I_mem(vecZ dst, vecZ src, memory mem) %{ 7227 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7228 match(Set dst (MulVI src (LoadVector mem))); 7229 format %{ "vpmulld $dst,$src,$mem\t! mul packed16I" %} 7230 ins_encode %{ 7231 int vector_len = 2; 7232 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7233 %} 7234 ins_pipe( pipe_slow ); 7235 %} 7236 7237 // Floats vector mul 7238 instruct vmul2F(vecD dst, vecD src) %{ 7239 predicate(n->as_Vector()->length() == 2); 7240 match(Set dst (MulVF dst src)); 7241 format %{ "mulps $dst,$src\t! mul packed2F" %} 7242 ins_encode %{ 7243 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 7244 %} 7245 ins_pipe( pipe_slow ); 7246 %} 7247 7248 instruct vmul2F_reg(vecD dst, vecD src1, vecD src2) %{ 7249 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7250 match(Set dst (MulVF src1 src2)); 7251 format %{ "vmulps $dst,$src1,$src2\t! mul packed2F" %} 7252 ins_encode %{ 7253 int vector_len = 0; 7254 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7255 %} 7256 ins_pipe( pipe_slow ); 7257 %} 7258 7259 instruct vmul2F_mem(vecD dst, vecD src, memory mem) %{ 7260 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7261 match(Set dst (MulVF src (LoadVector mem))); 7262 format %{ "vmulps $dst,$src,$mem\t! mul packed2F" %} 7263 ins_encode %{ 7264 int vector_len = 0; 7265 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7266 %} 7267 ins_pipe( pipe_slow ); 7268 %} 7269 7270 instruct vmul4F(vecX dst, vecX src) %{ 7271 predicate(n->as_Vector()->length() == 4); 7272 match(Set dst (MulVF dst src)); 7273 format %{ "mulps $dst,$src\t! mul packed4F" %} 7274 ins_encode %{ 7275 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 7276 %} 7277 ins_pipe( pipe_slow ); 7278 %} 7279 7280 instruct vmul4F_reg(vecX dst, vecX src1, vecX src2) %{ 7281 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7282 match(Set dst (MulVF src1 src2)); 7283 format %{ "vmulps $dst,$src1,$src2\t! mul packed4F" %} 7284 ins_encode %{ 7285 int vector_len = 0; 7286 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7287 %} 7288 ins_pipe( pipe_slow ); 7289 %} 7290 7291 instruct vmul4F_mem(vecX dst, vecX src, memory mem) %{ 7292 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7293 match(Set dst (MulVF src (LoadVector mem))); 7294 format %{ "vmulps $dst,$src,$mem\t! mul packed4F" %} 7295 ins_encode %{ 7296 int vector_len = 0; 7297 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7298 %} 7299 ins_pipe( pipe_slow ); 7300 %} 7301 7302 instruct vmul8F_reg(vecY dst, vecY src1, vecY src2) %{ 7303 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7304 match(Set dst (MulVF src1 src2)); 7305 format %{ "vmulps $dst,$src1,$src2\t! mul packed8F" %} 7306 ins_encode %{ 7307 int vector_len = 1; 7308 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7309 %} 7310 ins_pipe( pipe_slow ); 7311 %} 7312 7313 instruct vmul8F_mem(vecY dst, vecY src, memory mem) %{ 7314 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7315 match(Set dst (MulVF src (LoadVector mem))); 7316 format %{ "vmulps $dst,$src,$mem\t! mul packed8F" %} 7317 ins_encode %{ 7318 int vector_len = 1; 7319 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7320 %} 7321 ins_pipe( pipe_slow ); 7322 %} 7323 7324 instruct vmul16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7325 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7326 match(Set dst (MulVF src1 src2)); 7327 format %{ "vmulps $dst,$src1,$src2\t! mul packed16F" %} 7328 ins_encode %{ 7329 int vector_len = 2; 7330 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7331 %} 7332 ins_pipe( pipe_slow ); 7333 %} 7334 7335 instruct vmul16F_mem(vecZ dst, vecZ src, memory mem) %{ 7336 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7337 match(Set dst (MulVF src (LoadVector mem))); 7338 format %{ "vmulps $dst,$src,$mem\t! mul packed16F" %} 7339 ins_encode %{ 7340 int vector_len = 2; 7341 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7342 %} 7343 ins_pipe( pipe_slow ); 7344 %} 7345 7346 // Doubles vector mul 7347 instruct vmul2D(vecX dst, vecX src) %{ 7348 predicate(n->as_Vector()->length() == 2); 7349 match(Set dst (MulVD dst src)); 7350 format %{ "mulpd $dst,$src\t! mul packed2D" %} 7351 ins_encode %{ 7352 __ mulpd($dst$$XMMRegister, $src$$XMMRegister); 7353 %} 7354 ins_pipe( pipe_slow ); 7355 %} 7356 7357 instruct vmul2D_reg(vecX dst, vecX src1, vecX src2) %{ 7358 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7359 match(Set dst (MulVD src1 src2)); 7360 format %{ "vmulpd $dst,$src1,$src2\t! mul packed2D" %} 7361 ins_encode %{ 7362 int vector_len = 0; 7363 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7364 %} 7365 ins_pipe( pipe_slow ); 7366 %} 7367 7368 instruct vmul2D_mem(vecX dst, vecX src, memory mem) %{ 7369 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7370 match(Set dst (MulVD src (LoadVector mem))); 7371 format %{ "vmulpd $dst,$src,$mem\t! mul packed2D" %} 7372 ins_encode %{ 7373 int vector_len = 0; 7374 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7375 %} 7376 ins_pipe( pipe_slow ); 7377 %} 7378 7379 instruct vmul4D_reg(vecY dst, vecY src1, vecY src2) %{ 7380 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7381 match(Set dst (MulVD src1 src2)); 7382 format %{ "vmulpd $dst,$src1,$src2\t! mul packed4D" %} 7383 ins_encode %{ 7384 int vector_len = 1; 7385 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7386 %} 7387 ins_pipe( pipe_slow ); 7388 %} 7389 7390 instruct vmul4D_mem(vecY dst, vecY src, memory mem) %{ 7391 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7392 match(Set dst (MulVD src (LoadVector mem))); 7393 format %{ "vmulpd $dst,$src,$mem\t! mul packed4D" %} 7394 ins_encode %{ 7395 int vector_len = 1; 7396 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7397 %} 7398 ins_pipe( pipe_slow ); 7399 %} 7400 7401 instruct vmul8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7402 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7403 match(Set dst (MulVD src1 src2)); 7404 format %{ "vmulpd $dst k0,$src1,$src2\t! mul packed8D" %} 7405 ins_encode %{ 7406 int vector_len = 2; 7407 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7408 %} 7409 ins_pipe( pipe_slow ); 7410 %} 7411 7412 instruct vmul8D_mem(vecZ dst, vecZ src, memory mem) %{ 7413 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7414 match(Set dst (MulVD src (LoadVector mem))); 7415 format %{ "vmulpd $dst k0,$src,$mem\t! mul packed8D" %} 7416 ins_encode %{ 7417 int vector_len = 2; 7418 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7419 %} 7420 ins_pipe( pipe_slow ); 7421 %} 7422 7423 instruct vcmov4D_reg(vecY dst, vecY src1, vecY src2, immI8 cop, cmpOp_vcmppd copnd) %{ 7424 predicate(UseAVX > 0 && UseAVX < 3 && n->as_Vector()->length() == 4); 7425 match(Set dst (CMoveVD (Binary copnd cop) (Binary src1 src2))); 7426 effect(TEMP dst, USE src1, USE src2); 7427 format %{ "cmppd.$copnd $dst, $src1, $src2 ! vcmovevd, cond=$cop\n\t" 7428 "vpblendd $dst,$src1,$src2,$dst ! vcmovevd\n\t" 7429 %} 7430 ins_encode %{ 7431 int vector_len = 1; 7432 int cond = (Assembler::Condition)($copnd$$cmpcode); 7433 __ cmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cond, vector_len); 7434 __ vpblendd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vector_len); 7435 %} 7436 ins_pipe( pipe_slow ); 7437 %} 7438 7439 // --------------------------------- DIV -------------------------------------- 7440 7441 // Floats vector div 7442 instruct vdiv2F(vecD dst, vecD src) %{ 7443 predicate(n->as_Vector()->length() == 2); 7444 match(Set dst (DivVF dst src)); 7445 format %{ "divps $dst,$src\t! div packed2F" %} 7446 ins_encode %{ 7447 __ divps($dst$$XMMRegister, $src$$XMMRegister); 7448 %} 7449 ins_pipe( pipe_slow ); 7450 %} 7451 7452 instruct vdiv2F_reg(vecD dst, vecD src1, vecD src2) %{ 7453 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7454 match(Set dst (DivVF src1 src2)); 7455 format %{ "vdivps $dst,$src1,$src2\t! div packed2F" %} 7456 ins_encode %{ 7457 int vector_len = 0; 7458 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7459 %} 7460 ins_pipe( pipe_slow ); 7461 %} 7462 7463 instruct vdiv2F_mem(vecD dst, vecD src, memory mem) %{ 7464 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7465 match(Set dst (DivVF src (LoadVector mem))); 7466 format %{ "vdivps $dst,$src,$mem\t! div packed2F" %} 7467 ins_encode %{ 7468 int vector_len = 0; 7469 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7470 %} 7471 ins_pipe( pipe_slow ); 7472 %} 7473 7474 instruct vdiv4F(vecX dst, vecX src) %{ 7475 predicate(n->as_Vector()->length() == 4); 7476 match(Set dst (DivVF dst src)); 7477 format %{ "divps $dst,$src\t! div packed4F" %} 7478 ins_encode %{ 7479 __ divps($dst$$XMMRegister, $src$$XMMRegister); 7480 %} 7481 ins_pipe( pipe_slow ); 7482 %} 7483 7484 instruct vdiv4F_reg(vecX dst, vecX src1, vecX src2) %{ 7485 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7486 match(Set dst (DivVF src1 src2)); 7487 format %{ "vdivps $dst,$src1,$src2\t! div packed4F" %} 7488 ins_encode %{ 7489 int vector_len = 0; 7490 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7491 %} 7492 ins_pipe( pipe_slow ); 7493 %} 7494 7495 instruct vdiv4F_mem(vecX dst, vecX src, memory mem) %{ 7496 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7497 match(Set dst (DivVF src (LoadVector mem))); 7498 format %{ "vdivps $dst,$src,$mem\t! div packed4F" %} 7499 ins_encode %{ 7500 int vector_len = 0; 7501 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7502 %} 7503 ins_pipe( pipe_slow ); 7504 %} 7505 7506 instruct vdiv8F_reg(vecY dst, vecY src1, vecY src2) %{ 7507 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7508 match(Set dst (DivVF src1 src2)); 7509 format %{ "vdivps $dst,$src1,$src2\t! div packed8F" %} 7510 ins_encode %{ 7511 int vector_len = 1; 7512 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7513 %} 7514 ins_pipe( pipe_slow ); 7515 %} 7516 7517 instruct vdiv8F_mem(vecY dst, vecY src, memory mem) %{ 7518 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7519 match(Set dst (DivVF src (LoadVector mem))); 7520 format %{ "vdivps $dst,$src,$mem\t! div packed8F" %} 7521 ins_encode %{ 7522 int vector_len = 1; 7523 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7524 %} 7525 ins_pipe( pipe_slow ); 7526 %} 7527 7528 instruct vdiv16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7529 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 7530 match(Set dst (DivVF src1 src2)); 7531 format %{ "vdivps $dst,$src1,$src2\t! div packed16F" %} 7532 ins_encode %{ 7533 int vector_len = 2; 7534 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7535 %} 7536 ins_pipe( pipe_slow ); 7537 %} 7538 7539 instruct vdiv16F_mem(vecZ dst, vecZ src, memory mem) %{ 7540 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 7541 match(Set dst (DivVF src (LoadVector mem))); 7542 format %{ "vdivps $dst,$src,$mem\t! div packed16F" %} 7543 ins_encode %{ 7544 int vector_len = 2; 7545 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7546 %} 7547 ins_pipe( pipe_slow ); 7548 %} 7549 7550 // Doubles vector div 7551 instruct vdiv2D(vecX dst, vecX src) %{ 7552 predicate(n->as_Vector()->length() == 2); 7553 match(Set dst (DivVD dst src)); 7554 format %{ "divpd $dst,$src\t! div packed2D" %} 7555 ins_encode %{ 7556 __ divpd($dst$$XMMRegister, $src$$XMMRegister); 7557 %} 7558 ins_pipe( pipe_slow ); 7559 %} 7560 7561 instruct vdiv2D_reg(vecX dst, vecX src1, vecX src2) %{ 7562 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7563 match(Set dst (DivVD src1 src2)); 7564 format %{ "vdivpd $dst,$src1,$src2\t! div packed2D" %} 7565 ins_encode %{ 7566 int vector_len = 0; 7567 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7568 %} 7569 ins_pipe( pipe_slow ); 7570 %} 7571 7572 instruct vdiv2D_mem(vecX dst, vecX src, memory mem) %{ 7573 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7574 match(Set dst (DivVD src (LoadVector mem))); 7575 format %{ "vdivpd $dst,$src,$mem\t! div packed2D" %} 7576 ins_encode %{ 7577 int vector_len = 0; 7578 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7579 %} 7580 ins_pipe( pipe_slow ); 7581 %} 7582 7583 instruct vdiv4D_reg(vecY dst, vecY src1, vecY src2) %{ 7584 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7585 match(Set dst (DivVD src1 src2)); 7586 format %{ "vdivpd $dst,$src1,$src2\t! div packed4D" %} 7587 ins_encode %{ 7588 int vector_len = 1; 7589 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7590 %} 7591 ins_pipe( pipe_slow ); 7592 %} 7593 7594 instruct vdiv4D_mem(vecY dst, vecY src, memory mem) %{ 7595 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7596 match(Set dst (DivVD src (LoadVector mem))); 7597 format %{ "vdivpd $dst,$src,$mem\t! div packed4D" %} 7598 ins_encode %{ 7599 int vector_len = 1; 7600 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7601 %} 7602 ins_pipe( pipe_slow ); 7603 %} 7604 7605 instruct vdiv8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7606 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7607 match(Set dst (DivVD src1 src2)); 7608 format %{ "vdivpd $dst,$src1,$src2\t! div packed8D" %} 7609 ins_encode %{ 7610 int vector_len = 2; 7611 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7612 %} 7613 ins_pipe( pipe_slow ); 7614 %} 7615 7616 instruct vdiv8D_mem(vecZ dst, vecZ src, memory mem) %{ 7617 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7618 match(Set dst (DivVD src (LoadVector mem))); 7619 format %{ "vdivpd $dst,$src,$mem\t! div packed8D" %} 7620 ins_encode %{ 7621 int vector_len = 2; 7622 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7623 %} 7624 ins_pipe( pipe_slow ); 7625 %} 7626 7627 // ------------------------------ Shift --------------------------------------- 7628 7629 // Left and right shift count vectors are the same on x86 7630 // (only lowest bits of xmm reg are used for count). 7631 instruct vshiftcnt(vecS dst, rRegI cnt) %{ 7632 match(Set dst (LShiftCntV cnt)); 7633 match(Set dst (RShiftCntV cnt)); 7634 format %{ "movd $dst,$cnt\t! load shift count" %} 7635 ins_encode %{ 7636 __ movdl($dst$$XMMRegister, $cnt$$Register); 7637 %} 7638 ins_pipe( pipe_slow ); 7639 %} 7640 7641 // --------------------------------- Sqrt -------------------------------------- 7642 7643 // Floating point vector sqrt - double precision only 7644 instruct vsqrt2D_reg(vecX dst, vecX src) %{ 7645 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7646 match(Set dst (SqrtVD src)); 7647 format %{ "vsqrtpd $dst,$src\t! sqrt packed2D" %} 7648 ins_encode %{ 7649 int vector_len = 0; 7650 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 7651 %} 7652 ins_pipe( pipe_slow ); 7653 %} 7654 7655 instruct vsqrt2D_mem(vecX dst, memory mem) %{ 7656 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7657 match(Set dst (SqrtVD (LoadVector mem))); 7658 format %{ "vsqrtpd $dst,$mem\t! sqrt packed2D" %} 7659 ins_encode %{ 7660 int vector_len = 0; 7661 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); 7662 %} 7663 ins_pipe( pipe_slow ); 7664 %} 7665 7666 instruct vsqrt4D_reg(vecY dst, vecY src) %{ 7667 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7668 match(Set dst (SqrtVD src)); 7669 format %{ "vsqrtpd $dst,$src\t! sqrt packed4D" %} 7670 ins_encode %{ 7671 int vector_len = 1; 7672 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 7673 %} 7674 ins_pipe( pipe_slow ); 7675 %} 7676 7677 instruct vsqrt4D_mem(vecY dst, memory mem) %{ 7678 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7679 match(Set dst (SqrtVD (LoadVector mem))); 7680 format %{ "vsqrtpd $dst,$mem\t! sqrt packed4D" %} 7681 ins_encode %{ 7682 int vector_len = 1; 7683 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); 7684 %} 7685 ins_pipe( pipe_slow ); 7686 %} 7687 7688 instruct vsqrt8D_reg(vecZ dst, vecZ src) %{ 7689 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7690 match(Set dst (SqrtVD src)); 7691 format %{ "vsqrtpd $dst,$src\t! sqrt packed8D" %} 7692 ins_encode %{ 7693 int vector_len = 2; 7694 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 7695 %} 7696 ins_pipe( pipe_slow ); 7697 %} 7698 7699 instruct vsqrt8D_mem(vecZ dst, memory mem) %{ 7700 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7701 match(Set dst (SqrtVD (LoadVector mem))); 7702 format %{ "vsqrtpd $dst,$mem\t! sqrt packed8D" %} 7703 ins_encode %{ 7704 int vector_len = 2; 7705 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); 7706 %} 7707 ins_pipe( pipe_slow ); 7708 %} 7709 7710 // ------------------------------ LeftShift ----------------------------------- 7711 7712 // Shorts/Chars vector left shift 7713 instruct vsll2S(vecS dst, vecS shift) %{ 7714 predicate(n->as_Vector()->length() == 2); 7715 match(Set dst (LShiftVS dst shift)); 7716 format %{ "psllw $dst,$shift\t! left shift packed2S" %} 7717 ins_encode %{ 7718 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 7719 %} 7720 ins_pipe( pipe_slow ); 7721 %} 7722 7723 instruct vsll2S_imm(vecS dst, immI8 shift) %{ 7724 predicate(n->as_Vector()->length() == 2); 7725 match(Set dst (LShiftVS dst shift)); 7726 format %{ "psllw $dst,$shift\t! left shift packed2S" %} 7727 ins_encode %{ 7728 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 7729 %} 7730 ins_pipe( pipe_slow ); 7731 %} 7732 7733 instruct vsll2S_reg(vecS dst, vecS src, vecS shift) %{ 7734 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7735 match(Set dst (LShiftVS src shift)); 7736 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 7737 ins_encode %{ 7738 int vector_len = 0; 7739 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7740 %} 7741 ins_pipe( pipe_slow ); 7742 %} 7743 7744 instruct vsll2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ 7745 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7746 match(Set dst (LShiftVS src shift)); 7747 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 7748 ins_encode %{ 7749 int vector_len = 0; 7750 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7751 %} 7752 ins_pipe( pipe_slow ); 7753 %} 7754 7755 instruct vsll4S(vecD dst, vecS shift) %{ 7756 predicate(n->as_Vector()->length() == 4); 7757 match(Set dst (LShiftVS dst shift)); 7758 format %{ "psllw $dst,$shift\t! left shift packed4S" %} 7759 ins_encode %{ 7760 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 7761 %} 7762 ins_pipe( pipe_slow ); 7763 %} 7764 7765 instruct vsll4S_imm(vecD dst, immI8 shift) %{ 7766 predicate(n->as_Vector()->length() == 4); 7767 match(Set dst (LShiftVS dst shift)); 7768 format %{ "psllw $dst,$shift\t! left shift packed4S" %} 7769 ins_encode %{ 7770 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 7771 %} 7772 ins_pipe( pipe_slow ); 7773 %} 7774 7775 instruct vsll4S_reg(vecD dst, vecD src, vecS shift) %{ 7776 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7777 match(Set dst (LShiftVS src shift)); 7778 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 7779 ins_encode %{ 7780 int vector_len = 0; 7781 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7782 %} 7783 ins_pipe( pipe_slow ); 7784 %} 7785 7786 instruct vsll4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ 7787 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7788 match(Set dst (LShiftVS src shift)); 7789 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 7790 ins_encode %{ 7791 int vector_len = 0; 7792 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7793 %} 7794 ins_pipe( pipe_slow ); 7795 %} 7796 7797 instruct vsll8S(vecX dst, vecS shift) %{ 7798 predicate(n->as_Vector()->length() == 8); 7799 match(Set dst (LShiftVS dst shift)); 7800 format %{ "psllw $dst,$shift\t! left shift packed8S" %} 7801 ins_encode %{ 7802 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 7803 %} 7804 ins_pipe( pipe_slow ); 7805 %} 7806 7807 instruct vsll8S_imm(vecX dst, immI8 shift) %{ 7808 predicate(n->as_Vector()->length() == 8); 7809 match(Set dst (LShiftVS dst shift)); 7810 format %{ "psllw $dst,$shift\t! left shift packed8S" %} 7811 ins_encode %{ 7812 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 7813 %} 7814 ins_pipe( pipe_slow ); 7815 %} 7816 7817 instruct vsll8S_reg(vecX dst, vecX src, vecS shift) %{ 7818 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7819 match(Set dst (LShiftVS src shift)); 7820 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 7821 ins_encode %{ 7822 int vector_len = 0; 7823 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7824 %} 7825 ins_pipe( pipe_slow ); 7826 %} 7827 7828 instruct vsll8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ 7829 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7830 match(Set dst (LShiftVS src shift)); 7831 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 7832 ins_encode %{ 7833 int vector_len = 0; 7834 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7835 %} 7836 ins_pipe( pipe_slow ); 7837 %} 7838 7839 instruct vsll16S_reg(vecY dst, vecY src, vecS shift) %{ 7840 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 7841 match(Set dst (LShiftVS src shift)); 7842 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 7843 ins_encode %{ 7844 int vector_len = 1; 7845 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7846 %} 7847 ins_pipe( pipe_slow ); 7848 %} 7849 7850 instruct vsll16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ 7851 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 7852 match(Set dst (LShiftVS src shift)); 7853 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 7854 ins_encode %{ 7855 int vector_len = 1; 7856 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7857 %} 7858 ins_pipe( pipe_slow ); 7859 %} 7860 7861 instruct vsll32S_reg(vecZ dst, vecZ src, vecS shift) %{ 7862 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); 7863 match(Set dst (LShiftVS src shift)); 7864 format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %} 7865 ins_encode %{ 7866 int vector_len = 2; 7867 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7868 %} 7869 ins_pipe( pipe_slow ); 7870 %} 7871 7872 instruct vsll32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 7873 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); 7874 match(Set dst (LShiftVS src shift)); 7875 format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %} 7876 ins_encode %{ 7877 int vector_len = 2; 7878 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7879 %} 7880 ins_pipe( pipe_slow ); 7881 %} 7882 7883 // Integers vector left shift 7884 instruct vsll2I(vecD dst, vecS shift) %{ 7885 predicate(n->as_Vector()->length() == 2); 7886 match(Set dst (LShiftVI dst shift)); 7887 format %{ "pslld $dst,$shift\t! left shift packed2I" %} 7888 ins_encode %{ 7889 __ pslld($dst$$XMMRegister, $shift$$XMMRegister); 7890 %} 7891 ins_pipe( pipe_slow ); 7892 %} 7893 7894 instruct vsll2I_imm(vecD dst, immI8 shift) %{ 7895 predicate(n->as_Vector()->length() == 2); 7896 match(Set dst (LShiftVI dst shift)); 7897 format %{ "pslld $dst,$shift\t! left shift packed2I" %} 7898 ins_encode %{ 7899 __ pslld($dst$$XMMRegister, (int)$shift$$constant); 7900 %} 7901 ins_pipe( pipe_slow ); 7902 %} 7903 7904 instruct vsll2I_reg(vecD dst, vecD src, vecS shift) %{ 7905 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7906 match(Set dst (LShiftVI src shift)); 7907 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} 7908 ins_encode %{ 7909 int vector_len = 0; 7910 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7911 %} 7912 ins_pipe( pipe_slow ); 7913 %} 7914 7915 instruct vsll2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 7916 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7917 match(Set dst (LShiftVI src shift)); 7918 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} 7919 ins_encode %{ 7920 int vector_len = 0; 7921 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7922 %} 7923 ins_pipe( pipe_slow ); 7924 %} 7925 7926 instruct vsll4I(vecX dst, vecS shift) %{ 7927 predicate(n->as_Vector()->length() == 4); 7928 match(Set dst (LShiftVI dst shift)); 7929 format %{ "pslld $dst,$shift\t! left shift packed4I" %} 7930 ins_encode %{ 7931 __ pslld($dst$$XMMRegister, $shift$$XMMRegister); 7932 %} 7933 ins_pipe( pipe_slow ); 7934 %} 7935 7936 instruct vsll4I_imm(vecX dst, immI8 shift) %{ 7937 predicate(n->as_Vector()->length() == 4); 7938 match(Set dst (LShiftVI dst shift)); 7939 format %{ "pslld $dst,$shift\t! left shift packed4I" %} 7940 ins_encode %{ 7941 __ pslld($dst$$XMMRegister, (int)$shift$$constant); 7942 %} 7943 ins_pipe( pipe_slow ); 7944 %} 7945 7946 instruct vsll4I_reg(vecX dst, vecX src, vecS shift) %{ 7947 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7948 match(Set dst (LShiftVI src shift)); 7949 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} 7950 ins_encode %{ 7951 int vector_len = 0; 7952 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7953 %} 7954 ins_pipe( pipe_slow ); 7955 %} 7956 7957 instruct vsll4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 7958 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7959 match(Set dst (LShiftVI src shift)); 7960 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} 7961 ins_encode %{ 7962 int vector_len = 0; 7963 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7964 %} 7965 ins_pipe( pipe_slow ); 7966 %} 7967 7968 instruct vsll8I_reg(vecY dst, vecY src, vecS shift) %{ 7969 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 7970 match(Set dst (LShiftVI src shift)); 7971 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} 7972 ins_encode %{ 7973 int vector_len = 1; 7974 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7975 %} 7976 ins_pipe( pipe_slow ); 7977 %} 7978 7979 instruct vsll8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 7980 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 7981 match(Set dst (LShiftVI src shift)); 7982 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} 7983 ins_encode %{ 7984 int vector_len = 1; 7985 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 7986 %} 7987 ins_pipe( pipe_slow ); 7988 %} 7989 7990 instruct vsll16I_reg(vecZ dst, vecZ src, vecS shift) %{ 7991 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7992 match(Set dst (LShiftVI src shift)); 7993 format %{ "vpslld $dst,$src,$shift\t! left shift packed16I" %} 7994 ins_encode %{ 7995 int vector_len = 2; 7996 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 7997 %} 7998 ins_pipe( pipe_slow ); 7999 %} 8000 8001 instruct vsll16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 8002 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8003 match(Set dst (LShiftVI src shift)); 8004 format %{ "vpslld $dst,$src,$shift\t! left shift packed16I" %} 8005 ins_encode %{ 8006 int vector_len = 2; 8007 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8008 %} 8009 ins_pipe( pipe_slow ); 8010 %} 8011 8012 // Longs vector left shift 8013 instruct vsll2L(vecX dst, vecS shift) %{ 8014 predicate(n->as_Vector()->length() == 2); 8015 match(Set dst (LShiftVL dst shift)); 8016 format %{ "psllq $dst,$shift\t! left shift packed2L" %} 8017 ins_encode %{ 8018 __ psllq($dst$$XMMRegister, $shift$$XMMRegister); 8019 %} 8020 ins_pipe( pipe_slow ); 8021 %} 8022 8023 instruct vsll2L_imm(vecX dst, immI8 shift) %{ 8024 predicate(n->as_Vector()->length() == 2); 8025 match(Set dst (LShiftVL dst shift)); 8026 format %{ "psllq $dst,$shift\t! left shift packed2L" %} 8027 ins_encode %{ 8028 __ psllq($dst$$XMMRegister, (int)$shift$$constant); 8029 %} 8030 ins_pipe( pipe_slow ); 8031 %} 8032 8033 instruct vsll2L_reg(vecX dst, vecX src, vecS shift) %{ 8034 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8035 match(Set dst (LShiftVL src shift)); 8036 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} 8037 ins_encode %{ 8038 int vector_len = 0; 8039 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8040 %} 8041 ins_pipe( pipe_slow ); 8042 %} 8043 8044 instruct vsll2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ 8045 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8046 match(Set dst (LShiftVL src shift)); 8047 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} 8048 ins_encode %{ 8049 int vector_len = 0; 8050 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8051 %} 8052 ins_pipe( pipe_slow ); 8053 %} 8054 8055 instruct vsll4L_reg(vecY dst, vecY src, vecS shift) %{ 8056 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 8057 match(Set dst (LShiftVL src shift)); 8058 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} 8059 ins_encode %{ 8060 int vector_len = 1; 8061 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8062 %} 8063 ins_pipe( pipe_slow ); 8064 %} 8065 8066 instruct vsll4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ 8067 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 8068 match(Set dst (LShiftVL src shift)); 8069 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} 8070 ins_encode %{ 8071 int vector_len = 1; 8072 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8073 %} 8074 ins_pipe( pipe_slow ); 8075 %} 8076 8077 instruct vsll8L_reg(vecZ dst, vecZ src, vecS shift) %{ 8078 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8079 match(Set dst (LShiftVL src shift)); 8080 format %{ "vpsllq $dst,$src,$shift\t! left shift packed8L" %} 8081 ins_encode %{ 8082 int vector_len = 2; 8083 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8084 %} 8085 ins_pipe( pipe_slow ); 8086 %} 8087 8088 instruct vsll8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 8089 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8090 match(Set dst (LShiftVL src shift)); 8091 format %{ "vpsllq $dst,$src,$shift\t! left shift packed8L" %} 8092 ins_encode %{ 8093 int vector_len = 2; 8094 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8095 %} 8096 ins_pipe( pipe_slow ); 8097 %} 8098 8099 // ----------------------- LogicalRightShift ----------------------------------- 8100 8101 // Shorts vector logical right shift produces incorrect Java result 8102 // for negative data because java code convert short value into int with 8103 // sign extension before a shift. But char vectors are fine since chars are 8104 // unsigned values. 8105 8106 instruct vsrl2S(vecS dst, vecS shift) %{ 8107 predicate(n->as_Vector()->length() == 2); 8108 match(Set dst (URShiftVS dst shift)); 8109 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} 8110 ins_encode %{ 8111 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 8112 %} 8113 ins_pipe( pipe_slow ); 8114 %} 8115 8116 instruct vsrl2S_imm(vecS dst, immI8 shift) %{ 8117 predicate(n->as_Vector()->length() == 2); 8118 match(Set dst (URShiftVS dst shift)); 8119 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} 8120 ins_encode %{ 8121 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 8122 %} 8123 ins_pipe( pipe_slow ); 8124 %} 8125 8126 instruct vsrl2S_reg(vecS dst, vecS src, vecS shift) %{ 8127 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8128 match(Set dst (URShiftVS src shift)); 8129 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 8130 ins_encode %{ 8131 int vector_len = 0; 8132 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8133 %} 8134 ins_pipe( pipe_slow ); 8135 %} 8136 8137 instruct vsrl2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ 8138 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8139 match(Set dst (URShiftVS src shift)); 8140 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 8141 ins_encode %{ 8142 int vector_len = 0; 8143 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8144 %} 8145 ins_pipe( pipe_slow ); 8146 %} 8147 8148 instruct vsrl4S(vecD dst, vecS shift) %{ 8149 predicate(n->as_Vector()->length() == 4); 8150 match(Set dst (URShiftVS dst shift)); 8151 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} 8152 ins_encode %{ 8153 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 8154 %} 8155 ins_pipe( pipe_slow ); 8156 %} 8157 8158 instruct vsrl4S_imm(vecD dst, immI8 shift) %{ 8159 predicate(n->as_Vector()->length() == 4); 8160 match(Set dst (URShiftVS dst shift)); 8161 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} 8162 ins_encode %{ 8163 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 8164 %} 8165 ins_pipe( pipe_slow ); 8166 %} 8167 8168 instruct vsrl4S_reg(vecD dst, vecD src, vecS shift) %{ 8169 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8170 match(Set dst (URShiftVS src shift)); 8171 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 8172 ins_encode %{ 8173 int vector_len = 0; 8174 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8175 %} 8176 ins_pipe( pipe_slow ); 8177 %} 8178 8179 instruct vsrl4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ 8180 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8181 match(Set dst (URShiftVS src shift)); 8182 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 8183 ins_encode %{ 8184 int vector_len = 0; 8185 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8186 %} 8187 ins_pipe( pipe_slow ); 8188 %} 8189 8190 instruct vsrl8S(vecX dst, vecS shift) %{ 8191 predicate(n->as_Vector()->length() == 8); 8192 match(Set dst (URShiftVS dst shift)); 8193 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} 8194 ins_encode %{ 8195 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 8196 %} 8197 ins_pipe( pipe_slow ); 8198 %} 8199 8200 instruct vsrl8S_imm(vecX dst, immI8 shift) %{ 8201 predicate(n->as_Vector()->length() == 8); 8202 match(Set dst (URShiftVS dst shift)); 8203 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} 8204 ins_encode %{ 8205 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 8206 %} 8207 ins_pipe( pipe_slow ); 8208 %} 8209 8210 instruct vsrl8S_reg(vecX dst, vecX src, vecS shift) %{ 8211 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8212 match(Set dst (URShiftVS src shift)); 8213 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 8214 ins_encode %{ 8215 int vector_len = 0; 8216 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8217 %} 8218 ins_pipe( pipe_slow ); 8219 %} 8220 8221 instruct vsrl8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ 8222 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8223 match(Set dst (URShiftVS src shift)); 8224 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 8225 ins_encode %{ 8226 int vector_len = 0; 8227 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8228 %} 8229 ins_pipe( pipe_slow ); 8230 %} 8231 8232 instruct vsrl16S_reg(vecY dst, vecY src, vecS shift) %{ 8233 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 8234 match(Set dst (URShiftVS src shift)); 8235 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 8236 ins_encode %{ 8237 int vector_len = 1; 8238 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8239 %} 8240 ins_pipe( pipe_slow ); 8241 %} 8242 8243 instruct vsrl16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ 8244 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 8245 match(Set dst (URShiftVS src shift)); 8246 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 8247 ins_encode %{ 8248 int vector_len = 1; 8249 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8250 %} 8251 ins_pipe( pipe_slow ); 8252 %} 8253 8254 instruct vsrl32S_reg(vecZ dst, vecZ src, vecS shift) %{ 8255 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); 8256 match(Set dst (URShiftVS src shift)); 8257 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %} 8258 ins_encode %{ 8259 int vector_len = 2; 8260 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8261 %} 8262 ins_pipe( pipe_slow ); 8263 %} 8264 8265 instruct vsrl32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 8266 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); 8267 match(Set dst (URShiftVS src shift)); 8268 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %} 8269 ins_encode %{ 8270 int vector_len = 2; 8271 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8272 %} 8273 ins_pipe( pipe_slow ); 8274 %} 8275 8276 // Integers vector logical right shift 8277 instruct vsrl2I(vecD dst, vecS shift) %{ 8278 predicate(n->as_Vector()->length() == 2); 8279 match(Set dst (URShiftVI dst shift)); 8280 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} 8281 ins_encode %{ 8282 __ psrld($dst$$XMMRegister, $shift$$XMMRegister); 8283 %} 8284 ins_pipe( pipe_slow ); 8285 %} 8286 8287 instruct vsrl2I_imm(vecD dst, immI8 shift) %{ 8288 predicate(n->as_Vector()->length() == 2); 8289 match(Set dst (URShiftVI dst shift)); 8290 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} 8291 ins_encode %{ 8292 __ psrld($dst$$XMMRegister, (int)$shift$$constant); 8293 %} 8294 ins_pipe( pipe_slow ); 8295 %} 8296 8297 instruct vsrl2I_reg(vecD dst, vecD src, vecS shift) %{ 8298 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8299 match(Set dst (URShiftVI src shift)); 8300 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} 8301 ins_encode %{ 8302 int vector_len = 0; 8303 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8304 %} 8305 ins_pipe( pipe_slow ); 8306 %} 8307 8308 instruct vsrl2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 8309 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8310 match(Set dst (URShiftVI src shift)); 8311 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} 8312 ins_encode %{ 8313 int vector_len = 0; 8314 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8315 %} 8316 ins_pipe( pipe_slow ); 8317 %} 8318 8319 instruct vsrl4I(vecX dst, vecS shift) %{ 8320 predicate(n->as_Vector()->length() == 4); 8321 match(Set dst (URShiftVI dst shift)); 8322 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} 8323 ins_encode %{ 8324 __ psrld($dst$$XMMRegister, $shift$$XMMRegister); 8325 %} 8326 ins_pipe( pipe_slow ); 8327 %} 8328 8329 instruct vsrl4I_imm(vecX dst, immI8 shift) %{ 8330 predicate(n->as_Vector()->length() == 4); 8331 match(Set dst (URShiftVI dst shift)); 8332 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} 8333 ins_encode %{ 8334 __ psrld($dst$$XMMRegister, (int)$shift$$constant); 8335 %} 8336 ins_pipe( pipe_slow ); 8337 %} 8338 8339 instruct vsrl4I_reg(vecX dst, vecX src, vecS shift) %{ 8340 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8341 match(Set dst (URShiftVI src shift)); 8342 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} 8343 ins_encode %{ 8344 int vector_len = 0; 8345 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8346 %} 8347 ins_pipe( pipe_slow ); 8348 %} 8349 8350 instruct vsrl4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 8351 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8352 match(Set dst (URShiftVI src shift)); 8353 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} 8354 ins_encode %{ 8355 int vector_len = 0; 8356 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8357 %} 8358 ins_pipe( pipe_slow ); 8359 %} 8360 8361 instruct vsrl8I_reg(vecY dst, vecY src, vecS shift) %{ 8362 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 8363 match(Set dst (URShiftVI src shift)); 8364 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} 8365 ins_encode %{ 8366 int vector_len = 1; 8367 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8368 %} 8369 ins_pipe( pipe_slow ); 8370 %} 8371 8372 instruct vsrl8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 8373 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 8374 match(Set dst (URShiftVI src shift)); 8375 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} 8376 ins_encode %{ 8377 int vector_len = 1; 8378 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8379 %} 8380 ins_pipe( pipe_slow ); 8381 %} 8382 8383 instruct vsrl16I_reg(vecZ dst, vecZ src, vecS shift) %{ 8384 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8385 match(Set dst (URShiftVI src shift)); 8386 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed16I" %} 8387 ins_encode %{ 8388 int vector_len = 2; 8389 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8390 %} 8391 ins_pipe( pipe_slow ); 8392 %} 8393 8394 instruct vsrl16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 8395 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8396 match(Set dst (URShiftVI src shift)); 8397 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed16I" %} 8398 ins_encode %{ 8399 int vector_len = 2; 8400 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8401 %} 8402 ins_pipe( pipe_slow ); 8403 %} 8404 8405 // Longs vector logical right shift 8406 instruct vsrl2L(vecX dst, vecS shift) %{ 8407 predicate(n->as_Vector()->length() == 2); 8408 match(Set dst (URShiftVL dst shift)); 8409 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} 8410 ins_encode %{ 8411 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister); 8412 %} 8413 ins_pipe( pipe_slow ); 8414 %} 8415 8416 instruct vsrl2L_imm(vecX dst, immI8 shift) %{ 8417 predicate(n->as_Vector()->length() == 2); 8418 match(Set dst (URShiftVL dst shift)); 8419 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} 8420 ins_encode %{ 8421 __ psrlq($dst$$XMMRegister, (int)$shift$$constant); 8422 %} 8423 ins_pipe( pipe_slow ); 8424 %} 8425 8426 instruct vsrl2L_reg(vecX dst, vecX src, vecS shift) %{ 8427 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8428 match(Set dst (URShiftVL src shift)); 8429 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} 8430 ins_encode %{ 8431 int vector_len = 0; 8432 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8433 %} 8434 ins_pipe( pipe_slow ); 8435 %} 8436 8437 instruct vsrl2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ 8438 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8439 match(Set dst (URShiftVL src shift)); 8440 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} 8441 ins_encode %{ 8442 int vector_len = 0; 8443 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8444 %} 8445 ins_pipe( pipe_slow ); 8446 %} 8447 8448 instruct vsrl4L_reg(vecY dst, vecY src, vecS shift) %{ 8449 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 8450 match(Set dst (URShiftVL src shift)); 8451 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} 8452 ins_encode %{ 8453 int vector_len = 1; 8454 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8455 %} 8456 ins_pipe( pipe_slow ); 8457 %} 8458 8459 instruct vsrl4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ 8460 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 8461 match(Set dst (URShiftVL src shift)); 8462 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} 8463 ins_encode %{ 8464 int vector_len = 1; 8465 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8466 %} 8467 ins_pipe( pipe_slow ); 8468 %} 8469 8470 instruct vsrl8L_reg(vecZ dst, vecZ src, vecS shift) %{ 8471 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8472 match(Set dst (URShiftVL src shift)); 8473 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed8L" %} 8474 ins_encode %{ 8475 int vector_len = 2; 8476 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8477 %} 8478 ins_pipe( pipe_slow ); 8479 %} 8480 8481 instruct vsrl8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 8482 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8483 match(Set dst (URShiftVL src shift)); 8484 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed8L" %} 8485 ins_encode %{ 8486 int vector_len = 2; 8487 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8488 %} 8489 ins_pipe( pipe_slow ); 8490 %} 8491 8492 // ------------------- ArithmeticRightShift ----------------------------------- 8493 8494 // Shorts/Chars vector arithmetic right shift 8495 instruct vsra2S(vecS dst, vecS shift) %{ 8496 predicate(n->as_Vector()->length() == 2); 8497 match(Set dst (RShiftVS dst shift)); 8498 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} 8499 ins_encode %{ 8500 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 8501 %} 8502 ins_pipe( pipe_slow ); 8503 %} 8504 8505 instruct vsra2S_imm(vecS dst, immI8 shift) %{ 8506 predicate(n->as_Vector()->length() == 2); 8507 match(Set dst (RShiftVS dst shift)); 8508 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} 8509 ins_encode %{ 8510 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 8511 %} 8512 ins_pipe( pipe_slow ); 8513 %} 8514 8515 instruct vsra2S_reg(vecS dst, vecS src, vecS shift) %{ 8516 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8517 match(Set dst (RShiftVS src shift)); 8518 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 8519 ins_encode %{ 8520 int vector_len = 0; 8521 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8522 %} 8523 ins_pipe( pipe_slow ); 8524 %} 8525 8526 instruct vsra2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ 8527 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8528 match(Set dst (RShiftVS src shift)); 8529 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 8530 ins_encode %{ 8531 int vector_len = 0; 8532 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8533 %} 8534 ins_pipe( pipe_slow ); 8535 %} 8536 8537 instruct vsra4S(vecD dst, vecS shift) %{ 8538 predicate(n->as_Vector()->length() == 4); 8539 match(Set dst (RShiftVS dst shift)); 8540 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} 8541 ins_encode %{ 8542 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 8543 %} 8544 ins_pipe( pipe_slow ); 8545 %} 8546 8547 instruct vsra4S_imm(vecD dst, immI8 shift) %{ 8548 predicate(n->as_Vector()->length() == 4); 8549 match(Set dst (RShiftVS dst shift)); 8550 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} 8551 ins_encode %{ 8552 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 8553 %} 8554 ins_pipe( pipe_slow ); 8555 %} 8556 8557 instruct vsra4S_reg(vecD dst, vecD src, vecS shift) %{ 8558 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8559 match(Set dst (RShiftVS src shift)); 8560 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 8561 ins_encode %{ 8562 int vector_len = 0; 8563 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8564 %} 8565 ins_pipe( pipe_slow ); 8566 %} 8567 8568 instruct vsra4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ 8569 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8570 match(Set dst (RShiftVS src shift)); 8571 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 8572 ins_encode %{ 8573 int vector_len = 0; 8574 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8575 %} 8576 ins_pipe( pipe_slow ); 8577 %} 8578 8579 instruct vsra8S(vecX dst, vecS shift) %{ 8580 predicate(n->as_Vector()->length() == 8); 8581 match(Set dst (RShiftVS dst shift)); 8582 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} 8583 ins_encode %{ 8584 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 8585 %} 8586 ins_pipe( pipe_slow ); 8587 %} 8588 8589 instruct vsra8S_imm(vecX dst, immI8 shift) %{ 8590 predicate(n->as_Vector()->length() == 8); 8591 match(Set dst (RShiftVS dst shift)); 8592 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} 8593 ins_encode %{ 8594 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 8595 %} 8596 ins_pipe( pipe_slow ); 8597 %} 8598 8599 instruct vsra8S_reg(vecX dst, vecX src, vecS shift) %{ 8600 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8601 match(Set dst (RShiftVS src shift)); 8602 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 8603 ins_encode %{ 8604 int vector_len = 0; 8605 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8606 %} 8607 ins_pipe( pipe_slow ); 8608 %} 8609 8610 instruct vsra8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ 8611 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8612 match(Set dst (RShiftVS src shift)); 8613 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 8614 ins_encode %{ 8615 int vector_len = 0; 8616 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8617 %} 8618 ins_pipe( pipe_slow ); 8619 %} 8620 8621 instruct vsra16S_reg(vecY dst, vecY src, vecS shift) %{ 8622 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 8623 match(Set dst (RShiftVS src shift)); 8624 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 8625 ins_encode %{ 8626 int vector_len = 1; 8627 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8628 %} 8629 ins_pipe( pipe_slow ); 8630 %} 8631 8632 instruct vsra16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ 8633 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 8634 match(Set dst (RShiftVS src shift)); 8635 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 8636 ins_encode %{ 8637 int vector_len = 1; 8638 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8639 %} 8640 ins_pipe( pipe_slow ); 8641 %} 8642 8643 instruct vsra32S_reg(vecZ dst, vecZ src, vecS shift) %{ 8644 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); 8645 match(Set dst (RShiftVS src shift)); 8646 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %} 8647 ins_encode %{ 8648 int vector_len = 2; 8649 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8650 %} 8651 ins_pipe( pipe_slow ); 8652 %} 8653 8654 instruct vsra32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 8655 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); 8656 match(Set dst (RShiftVS src shift)); 8657 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %} 8658 ins_encode %{ 8659 int vector_len = 2; 8660 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8661 %} 8662 ins_pipe( pipe_slow ); 8663 %} 8664 8665 // Integers vector arithmetic right shift 8666 instruct vsra2I(vecD dst, vecS shift) %{ 8667 predicate(n->as_Vector()->length() == 2); 8668 match(Set dst (RShiftVI dst shift)); 8669 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} 8670 ins_encode %{ 8671 __ psrad($dst$$XMMRegister, $shift$$XMMRegister); 8672 %} 8673 ins_pipe( pipe_slow ); 8674 %} 8675 8676 instruct vsra2I_imm(vecD dst, immI8 shift) %{ 8677 predicate(n->as_Vector()->length() == 2); 8678 match(Set dst (RShiftVI dst shift)); 8679 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} 8680 ins_encode %{ 8681 __ psrad($dst$$XMMRegister, (int)$shift$$constant); 8682 %} 8683 ins_pipe( pipe_slow ); 8684 %} 8685 8686 instruct vsra2I_reg(vecD dst, vecD src, vecS shift) %{ 8687 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8688 match(Set dst (RShiftVI src shift)); 8689 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} 8690 ins_encode %{ 8691 int vector_len = 0; 8692 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8693 %} 8694 ins_pipe( pipe_slow ); 8695 %} 8696 8697 instruct vsra2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 8698 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8699 match(Set dst (RShiftVI src shift)); 8700 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} 8701 ins_encode %{ 8702 int vector_len = 0; 8703 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8704 %} 8705 ins_pipe( pipe_slow ); 8706 %} 8707 8708 instruct vsra4I(vecX dst, vecS shift) %{ 8709 predicate(n->as_Vector()->length() == 4); 8710 match(Set dst (RShiftVI dst shift)); 8711 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} 8712 ins_encode %{ 8713 __ psrad($dst$$XMMRegister, $shift$$XMMRegister); 8714 %} 8715 ins_pipe( pipe_slow ); 8716 %} 8717 8718 instruct vsra4I_imm(vecX dst, immI8 shift) %{ 8719 predicate(n->as_Vector()->length() == 4); 8720 match(Set dst (RShiftVI dst shift)); 8721 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} 8722 ins_encode %{ 8723 __ psrad($dst$$XMMRegister, (int)$shift$$constant); 8724 %} 8725 ins_pipe( pipe_slow ); 8726 %} 8727 8728 instruct vsra4I_reg(vecX dst, vecX src, vecS shift) %{ 8729 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8730 match(Set dst (RShiftVI src shift)); 8731 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} 8732 ins_encode %{ 8733 int vector_len = 0; 8734 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8735 %} 8736 ins_pipe( pipe_slow ); 8737 %} 8738 8739 instruct vsra4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 8740 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8741 match(Set dst (RShiftVI src shift)); 8742 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} 8743 ins_encode %{ 8744 int vector_len = 0; 8745 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8746 %} 8747 ins_pipe( pipe_slow ); 8748 %} 8749 8750 instruct vsra8I_reg(vecY dst, vecY src, vecS shift) %{ 8751 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 8752 match(Set dst (RShiftVI src shift)); 8753 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} 8754 ins_encode %{ 8755 int vector_len = 1; 8756 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8757 %} 8758 ins_pipe( pipe_slow ); 8759 %} 8760 8761 instruct vsra8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 8762 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 8763 match(Set dst (RShiftVI src shift)); 8764 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} 8765 ins_encode %{ 8766 int vector_len = 1; 8767 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8768 %} 8769 ins_pipe( pipe_slow ); 8770 %} 8771 8772 instruct vsra16I_reg(vecZ dst, vecZ src, vecS shift) %{ 8773 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8774 match(Set dst (RShiftVI src shift)); 8775 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed16I" %} 8776 ins_encode %{ 8777 int vector_len = 2; 8778 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8779 %} 8780 ins_pipe( pipe_slow ); 8781 %} 8782 8783 instruct vsra16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 8784 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8785 match(Set dst (RShiftVI src shift)); 8786 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed16I" %} 8787 ins_encode %{ 8788 int vector_len = 2; 8789 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8790 %} 8791 ins_pipe( pipe_slow ); 8792 %} 8793 8794 // There are no longs vector arithmetic right shift instructions. 8795 8796 8797 // --------------------------------- AND -------------------------------------- 8798 8799 instruct vand4B(vecS dst, vecS src) %{ 8800 predicate(n->as_Vector()->length_in_bytes() == 4); 8801 match(Set dst (AndV dst src)); 8802 format %{ "pand $dst,$src\t! and vectors (4 bytes)" %} 8803 ins_encode %{ 8804 __ pand($dst$$XMMRegister, $src$$XMMRegister); 8805 %} 8806 ins_pipe( pipe_slow ); 8807 %} 8808 8809 instruct vand4B_reg(vecS dst, vecS src1, vecS src2) %{ 8810 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 8811 match(Set dst (AndV src1 src2)); 8812 format %{ "vpand $dst,$src1,$src2\t! and vectors (4 bytes)" %} 8813 ins_encode %{ 8814 int vector_len = 0; 8815 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8816 %} 8817 ins_pipe( pipe_slow ); 8818 %} 8819 8820 instruct vand4B_mem(vecS dst, vecS src, memory mem) %{ 8821 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 8822 match(Set dst (AndV src (LoadVector mem))); 8823 format %{ "vpand $dst,$src,$mem\t! and vectors (4 bytes)" %} 8824 ins_encode %{ 8825 int vector_len = 0; 8826 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8827 %} 8828 ins_pipe( pipe_slow ); 8829 %} 8830 8831 instruct vand8B(vecD dst, vecD src) %{ 8832 predicate(n->as_Vector()->length_in_bytes() == 8); 8833 match(Set dst (AndV dst src)); 8834 format %{ "pand $dst,$src\t! and vectors (8 bytes)" %} 8835 ins_encode %{ 8836 __ pand($dst$$XMMRegister, $src$$XMMRegister); 8837 %} 8838 ins_pipe( pipe_slow ); 8839 %} 8840 8841 instruct vand8B_reg(vecD dst, vecD src1, vecD src2) %{ 8842 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 8843 match(Set dst (AndV src1 src2)); 8844 format %{ "vpand $dst,$src1,$src2\t! and vectors (8 bytes)" %} 8845 ins_encode %{ 8846 int vector_len = 0; 8847 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8848 %} 8849 ins_pipe( pipe_slow ); 8850 %} 8851 8852 instruct vand8B_mem(vecD dst, vecD src, memory mem) %{ 8853 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 8854 match(Set dst (AndV src (LoadVector mem))); 8855 format %{ "vpand $dst,$src,$mem\t! and vectors (8 bytes)" %} 8856 ins_encode %{ 8857 int vector_len = 0; 8858 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8859 %} 8860 ins_pipe( pipe_slow ); 8861 %} 8862 8863 instruct vand16B(vecX dst, vecX src) %{ 8864 predicate(n->as_Vector()->length_in_bytes() == 16); 8865 match(Set dst (AndV dst src)); 8866 format %{ "pand $dst,$src\t! and vectors (16 bytes)" %} 8867 ins_encode %{ 8868 __ pand($dst$$XMMRegister, $src$$XMMRegister); 8869 %} 8870 ins_pipe( pipe_slow ); 8871 %} 8872 8873 instruct vand16B_reg(vecX dst, vecX src1, vecX src2) %{ 8874 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 8875 match(Set dst (AndV src1 src2)); 8876 format %{ "vpand $dst,$src1,$src2\t! and vectors (16 bytes)" %} 8877 ins_encode %{ 8878 int vector_len = 0; 8879 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8880 %} 8881 ins_pipe( pipe_slow ); 8882 %} 8883 8884 instruct vand16B_mem(vecX dst, vecX src, memory mem) %{ 8885 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 8886 match(Set dst (AndV src (LoadVector mem))); 8887 format %{ "vpand $dst,$src,$mem\t! and vectors (16 bytes)" %} 8888 ins_encode %{ 8889 int vector_len = 0; 8890 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8891 %} 8892 ins_pipe( pipe_slow ); 8893 %} 8894 8895 instruct vand32B_reg(vecY dst, vecY src1, vecY src2) %{ 8896 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 8897 match(Set dst (AndV src1 src2)); 8898 format %{ "vpand $dst,$src1,$src2\t! and vectors (32 bytes)" %} 8899 ins_encode %{ 8900 int vector_len = 1; 8901 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8902 %} 8903 ins_pipe( pipe_slow ); 8904 %} 8905 8906 instruct vand32B_mem(vecY dst, vecY src, memory mem) %{ 8907 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 8908 match(Set dst (AndV src (LoadVector mem))); 8909 format %{ "vpand $dst,$src,$mem\t! and vectors (32 bytes)" %} 8910 ins_encode %{ 8911 int vector_len = 1; 8912 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8913 %} 8914 ins_pipe( pipe_slow ); 8915 %} 8916 8917 instruct vand64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8918 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 8919 match(Set dst (AndV src1 src2)); 8920 format %{ "vpand $dst,$src1,$src2\t! and vectors (64 bytes)" %} 8921 ins_encode %{ 8922 int vector_len = 2; 8923 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8924 %} 8925 ins_pipe( pipe_slow ); 8926 %} 8927 8928 instruct vand64B_mem(vecZ dst, vecZ src, memory mem) %{ 8929 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 8930 match(Set dst (AndV src (LoadVector mem))); 8931 format %{ "vpand $dst,$src,$mem\t! and vectors (64 bytes)" %} 8932 ins_encode %{ 8933 int vector_len = 2; 8934 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8935 %} 8936 ins_pipe( pipe_slow ); 8937 %} 8938 8939 // --------------------------------- OR --------------------------------------- 8940 8941 instruct vor4B(vecS dst, vecS src) %{ 8942 predicate(n->as_Vector()->length_in_bytes() == 4); 8943 match(Set dst (OrV dst src)); 8944 format %{ "por $dst,$src\t! or vectors (4 bytes)" %} 8945 ins_encode %{ 8946 __ por($dst$$XMMRegister, $src$$XMMRegister); 8947 %} 8948 ins_pipe( pipe_slow ); 8949 %} 8950 8951 instruct vor4B_reg(vecS dst, vecS src1, vecS src2) %{ 8952 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 8953 match(Set dst (OrV src1 src2)); 8954 format %{ "vpor $dst,$src1,$src2\t! or vectors (4 bytes)" %} 8955 ins_encode %{ 8956 int vector_len = 0; 8957 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8958 %} 8959 ins_pipe( pipe_slow ); 8960 %} 8961 8962 instruct vor4B_mem(vecS dst, vecS src, memory mem) %{ 8963 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 8964 match(Set dst (OrV src (LoadVector mem))); 8965 format %{ "vpor $dst,$src,$mem\t! or vectors (4 bytes)" %} 8966 ins_encode %{ 8967 int vector_len = 0; 8968 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8969 %} 8970 ins_pipe( pipe_slow ); 8971 %} 8972 8973 instruct vor8B(vecD dst, vecD src) %{ 8974 predicate(n->as_Vector()->length_in_bytes() == 8); 8975 match(Set dst (OrV dst src)); 8976 format %{ "por $dst,$src\t! or vectors (8 bytes)" %} 8977 ins_encode %{ 8978 __ por($dst$$XMMRegister, $src$$XMMRegister); 8979 %} 8980 ins_pipe( pipe_slow ); 8981 %} 8982 8983 instruct vor8B_reg(vecD dst, vecD src1, vecD src2) %{ 8984 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 8985 match(Set dst (OrV src1 src2)); 8986 format %{ "vpor $dst,$src1,$src2\t! or vectors (8 bytes)" %} 8987 ins_encode %{ 8988 int vector_len = 0; 8989 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8990 %} 8991 ins_pipe( pipe_slow ); 8992 %} 8993 8994 instruct vor8B_mem(vecD dst, vecD src, memory mem) %{ 8995 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 8996 match(Set dst (OrV src (LoadVector mem))); 8997 format %{ "vpor $dst,$src,$mem\t! or vectors (8 bytes)" %} 8998 ins_encode %{ 8999 int vector_len = 0; 9000 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9001 %} 9002 ins_pipe( pipe_slow ); 9003 %} 9004 9005 instruct vor16B(vecX dst, vecX src) %{ 9006 predicate(n->as_Vector()->length_in_bytes() == 16); 9007 match(Set dst (OrV dst src)); 9008 format %{ "por $dst,$src\t! or vectors (16 bytes)" %} 9009 ins_encode %{ 9010 __ por($dst$$XMMRegister, $src$$XMMRegister); 9011 %} 9012 ins_pipe( pipe_slow ); 9013 %} 9014 9015 instruct vor16B_reg(vecX dst, vecX src1, vecX src2) %{ 9016 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 9017 match(Set dst (OrV src1 src2)); 9018 format %{ "vpor $dst,$src1,$src2\t! or vectors (16 bytes)" %} 9019 ins_encode %{ 9020 int vector_len = 0; 9021 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9022 %} 9023 ins_pipe( pipe_slow ); 9024 %} 9025 9026 instruct vor16B_mem(vecX dst, vecX src, memory mem) %{ 9027 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 9028 match(Set dst (OrV src (LoadVector mem))); 9029 format %{ "vpor $dst,$src,$mem\t! or vectors (16 bytes)" %} 9030 ins_encode %{ 9031 int vector_len = 0; 9032 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9033 %} 9034 ins_pipe( pipe_slow ); 9035 %} 9036 9037 instruct vor32B_reg(vecY dst, vecY src1, vecY src2) %{ 9038 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 9039 match(Set dst (OrV src1 src2)); 9040 format %{ "vpor $dst,$src1,$src2\t! or vectors (32 bytes)" %} 9041 ins_encode %{ 9042 int vector_len = 1; 9043 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9044 %} 9045 ins_pipe( pipe_slow ); 9046 %} 9047 9048 instruct vor32B_mem(vecY dst, vecY src, memory mem) %{ 9049 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 9050 match(Set dst (OrV src (LoadVector mem))); 9051 format %{ "vpor $dst,$src,$mem\t! or vectors (32 bytes)" %} 9052 ins_encode %{ 9053 int vector_len = 1; 9054 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9055 %} 9056 ins_pipe( pipe_slow ); 9057 %} 9058 9059 instruct vor64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 9060 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 9061 match(Set dst (OrV src1 src2)); 9062 format %{ "vpor $dst,$src1,$src2\t! or vectors (64 bytes)" %} 9063 ins_encode %{ 9064 int vector_len = 2; 9065 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9066 %} 9067 ins_pipe( pipe_slow ); 9068 %} 9069 9070 instruct vor64B_mem(vecZ dst, vecZ src, memory mem) %{ 9071 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 9072 match(Set dst (OrV src (LoadVector mem))); 9073 format %{ "vpor $dst,$src,$mem\t! or vectors (64 bytes)" %} 9074 ins_encode %{ 9075 int vector_len = 2; 9076 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9077 %} 9078 ins_pipe( pipe_slow ); 9079 %} 9080 9081 // --------------------------------- XOR -------------------------------------- 9082 9083 instruct vxor4B(vecS dst, vecS src) %{ 9084 predicate(n->as_Vector()->length_in_bytes() == 4); 9085 match(Set dst (XorV dst src)); 9086 format %{ "pxor $dst,$src\t! xor vectors (4 bytes)" %} 9087 ins_encode %{ 9088 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 9089 %} 9090 ins_pipe( pipe_slow ); 9091 %} 9092 9093 instruct vxor4B_reg(vecS dst, vecS src1, vecS src2) %{ 9094 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 9095 match(Set dst (XorV src1 src2)); 9096 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (4 bytes)" %} 9097 ins_encode %{ 9098 int vector_len = 0; 9099 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9100 %} 9101 ins_pipe( pipe_slow ); 9102 %} 9103 9104 instruct vxor4B_mem(vecS dst, vecS src, memory mem) %{ 9105 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 9106 match(Set dst (XorV src (LoadVector mem))); 9107 format %{ "vpxor $dst,$src,$mem\t! xor vectors (4 bytes)" %} 9108 ins_encode %{ 9109 int vector_len = 0; 9110 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9111 %} 9112 ins_pipe( pipe_slow ); 9113 %} 9114 9115 instruct vxor8B(vecD dst, vecD src) %{ 9116 predicate(n->as_Vector()->length_in_bytes() == 8); 9117 match(Set dst (XorV dst src)); 9118 format %{ "pxor $dst,$src\t! xor vectors (8 bytes)" %} 9119 ins_encode %{ 9120 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 9121 %} 9122 ins_pipe( pipe_slow ); 9123 %} 9124 9125 instruct vxor8B_reg(vecD dst, vecD src1, vecD src2) %{ 9126 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 9127 match(Set dst (XorV src1 src2)); 9128 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (8 bytes)" %} 9129 ins_encode %{ 9130 int vector_len = 0; 9131 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9132 %} 9133 ins_pipe( pipe_slow ); 9134 %} 9135 9136 instruct vxor8B_mem(vecD dst, vecD src, memory mem) %{ 9137 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 9138 match(Set dst (XorV src (LoadVector mem))); 9139 format %{ "vpxor $dst,$src,$mem\t! xor vectors (8 bytes)" %} 9140 ins_encode %{ 9141 int vector_len = 0; 9142 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9143 %} 9144 ins_pipe( pipe_slow ); 9145 %} 9146 9147 instruct vxor16B(vecX dst, vecX src) %{ 9148 predicate(n->as_Vector()->length_in_bytes() == 16); 9149 match(Set dst (XorV dst src)); 9150 format %{ "pxor $dst,$src\t! xor vectors (16 bytes)" %} 9151 ins_encode %{ 9152 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 9153 %} 9154 ins_pipe( pipe_slow ); 9155 %} 9156 9157 instruct vxor16B_reg(vecX dst, vecX src1, vecX src2) %{ 9158 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 9159 match(Set dst (XorV src1 src2)); 9160 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (16 bytes)" %} 9161 ins_encode %{ 9162 int vector_len = 0; 9163 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9164 %} 9165 ins_pipe( pipe_slow ); 9166 %} 9167 9168 instruct vxor16B_mem(vecX dst, vecX src, memory mem) %{ 9169 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 9170 match(Set dst (XorV src (LoadVector mem))); 9171 format %{ "vpxor $dst,$src,$mem\t! xor vectors (16 bytes)" %} 9172 ins_encode %{ 9173 int vector_len = 0; 9174 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9175 %} 9176 ins_pipe( pipe_slow ); 9177 %} 9178 9179 instruct vxor32B_reg(vecY dst, vecY src1, vecY src2) %{ 9180 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 9181 match(Set dst (XorV src1 src2)); 9182 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (32 bytes)" %} 9183 ins_encode %{ 9184 int vector_len = 1; 9185 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9186 %} 9187 ins_pipe( pipe_slow ); 9188 %} 9189 9190 instruct vxor32B_mem(vecY dst, vecY src, memory mem) %{ 9191 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 9192 match(Set dst (XorV src (LoadVector mem))); 9193 format %{ "vpxor $dst,$src,$mem\t! xor vectors (32 bytes)" %} 9194 ins_encode %{ 9195 int vector_len = 1; 9196 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9197 %} 9198 ins_pipe( pipe_slow ); 9199 %} 9200 9201 instruct vxor64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 9202 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 9203 match(Set dst (XorV src1 src2)); 9204 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (64 bytes)" %} 9205 ins_encode %{ 9206 int vector_len = 2; 9207 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9208 %} 9209 ins_pipe( pipe_slow ); 9210 %} 9211 9212 instruct vxor64B_mem(vecZ dst, vecZ src, memory mem) %{ 9213 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 9214 match(Set dst (XorV src (LoadVector mem))); 9215 format %{ "vpxor $dst,$src,$mem\t! xor vectors (64 bytes)" %} 9216 ins_encode %{ 9217 int vector_len = 2; 9218 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9219 %} 9220 ins_pipe( pipe_slow ); 9221 %} 9222