1 // 2 // Copyright (c) 2011, 2019, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Common Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // archtecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // XMM registers. 512-bit registers or 8 words each, labeled (a)-p. 63 // Word a in each register holds a Float, words ab hold a Double. 64 // The whole registers are used in SSE4.2 version intrinsics, 65 // array copy stubs and superword operations (see UseSSE42Intrinsics, 66 // UseXMMForArrayCopy and UseSuperword flags). 67 // For pre EVEX enabled architectures: 68 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX) 69 // For EVEX enabled architectures: 70 // XMM8-XMM31 must be encoded with REX (EVEX for UseAVX). 71 // 72 // Linux ABI: No register preserved across function calls 73 // XMM0-XMM7 might hold parameters 74 // Windows ABI: XMM6-XMM31 preserved across function calls 75 // XMM0-XMM3 might hold parameters 76 77 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()); 78 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1)); 79 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2)); 80 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3)); 81 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4)); 82 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5)); 83 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6)); 84 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7)); 85 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8)); 86 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9)); 87 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10)); 88 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11)); 89 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12)); 90 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13)); 91 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14)); 92 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15)); 93 94 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()); 95 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1)); 96 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2)); 97 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3)); 98 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4)); 99 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5)); 100 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6)); 101 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7)); 102 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8)); 103 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9)); 104 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10)); 105 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11)); 106 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12)); 107 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13)); 108 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14)); 109 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15)); 110 111 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()); 112 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1)); 113 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2)); 114 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3)); 115 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4)); 116 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5)); 117 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6)); 118 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7)); 119 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8)); 120 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9)); 121 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10)); 122 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11)); 123 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12)); 124 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13)); 125 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14)); 126 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15)); 127 128 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()); 129 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1)); 130 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2)); 131 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3)); 132 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4)); 133 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5)); 134 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6)); 135 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7)); 136 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8)); 137 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9)); 138 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10)); 139 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11)); 140 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12)); 141 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13)); 142 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14)); 143 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15)); 144 145 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()); 146 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1)); 147 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2)); 148 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3)); 149 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4)); 150 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5)); 151 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6)); 152 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7)); 153 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8)); 154 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9)); 155 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10)); 156 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11)); 157 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12)); 158 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13)); 159 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14)); 160 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15)); 161 162 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()); 163 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1)); 164 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2)); 165 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3)); 166 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4)); 167 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5)); 168 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6)); 169 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7)); 170 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8)); 171 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9)); 172 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10)); 173 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11)); 174 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12)); 175 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13)); 176 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14)); 177 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15)); 178 179 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()); 180 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1)); 181 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2)); 182 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3)); 183 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4)); 184 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5)); 185 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6)); 186 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7)); 187 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8)); 188 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9)); 189 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10)); 190 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11)); 191 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12)); 192 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13)); 193 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14)); 194 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15)); 195 196 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()); 197 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1)); 198 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2)); 199 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3)); 200 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4)); 201 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5)); 202 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6)); 203 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7)); 204 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8)); 205 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9)); 206 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10)); 207 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11)); 208 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12)); 209 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13)); 210 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14)); 211 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15)); 212 213 #ifdef _LP64 214 215 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()); 216 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1)); 217 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2)); 218 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3)); 219 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4)); 220 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5)); 221 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6)); 222 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7)); 223 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8)); 224 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9)); 225 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10)); 226 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11)); 227 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12)); 228 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13)); 229 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14)); 230 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15)); 231 232 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()); 233 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1)); 234 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2)); 235 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3)); 236 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4)); 237 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5)); 238 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6)); 239 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7)); 240 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8)); 241 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9)); 242 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10)); 243 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11)); 244 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12)); 245 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13)); 246 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14)); 247 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15)); 248 249 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()); 250 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1)); 251 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2)); 252 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3)); 253 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4)); 254 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5)); 255 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6)); 256 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7)); 257 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8)); 258 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9)); 259 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10)); 260 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11)); 261 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12)); 262 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13)); 263 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14)); 264 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15)); 265 266 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()); 267 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1)); 268 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2)); 269 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3)); 270 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4)); 271 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5)); 272 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6)); 273 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7)); 274 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8)); 275 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9)); 276 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10)); 277 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11)); 278 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12)); 279 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13)); 280 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14)); 281 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15)); 282 283 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()); 284 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1)); 285 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2)); 286 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3)); 287 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4)); 288 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5)); 289 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6)); 290 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7)); 291 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8)); 292 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9)); 293 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10)); 294 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11)); 295 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12)); 296 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13)); 297 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14)); 298 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15)); 299 300 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()); 301 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1)); 302 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2)); 303 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3)); 304 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4)); 305 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5)); 306 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6)); 307 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7)); 308 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8)); 309 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9)); 310 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10)); 311 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11)); 312 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12)); 313 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13)); 314 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14)); 315 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15)); 316 317 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()); 318 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1)); 319 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2)); 320 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3)); 321 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4)); 322 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5)); 323 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6)); 324 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7)); 325 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8)); 326 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9)); 327 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10)); 328 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11)); 329 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12)); 330 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13)); 331 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14)); 332 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15)); 333 334 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()); 335 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1)); 336 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2)); 337 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3)); 338 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4)); 339 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5)); 340 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6)); 341 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7)); 342 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8)); 343 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9)); 344 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10)); 345 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11)); 346 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12)); 347 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13)); 348 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14)); 349 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15)); 350 351 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()); 352 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1)); 353 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2)); 354 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3)); 355 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4)); 356 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5)); 357 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6)); 358 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7)); 359 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8)); 360 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9)); 361 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10)); 362 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11)); 363 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12)); 364 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13)); 365 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14)); 366 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15)); 367 368 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()); 369 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1)); 370 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2)); 371 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3)); 372 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4)); 373 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5)); 374 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6)); 375 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7)); 376 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8)); 377 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9)); 378 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10)); 379 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11)); 380 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12)); 381 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13)); 382 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14)); 383 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15)); 384 385 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()); 386 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1)); 387 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2)); 388 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3)); 389 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4)); 390 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5)); 391 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6)); 392 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7)); 393 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8)); 394 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9)); 395 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10)); 396 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11)); 397 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12)); 398 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13)); 399 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14)); 400 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15)); 401 402 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()); 403 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1)); 404 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2)); 405 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3)); 406 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4)); 407 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5)); 408 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6)); 409 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7)); 410 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8)); 411 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9)); 412 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10)); 413 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11)); 414 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12)); 415 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13)); 416 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14)); 417 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15)); 418 419 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()); 420 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1)); 421 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2)); 422 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3)); 423 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4)); 424 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5)); 425 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6)); 426 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7)); 427 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8)); 428 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9)); 429 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10)); 430 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11)); 431 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12)); 432 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13)); 433 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14)); 434 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15)); 435 436 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()); 437 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1)); 438 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2)); 439 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3)); 440 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4)); 441 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5)); 442 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6)); 443 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7)); 444 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8)); 445 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9)); 446 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10)); 447 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11)); 448 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12)); 449 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13)); 450 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14)); 451 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15)); 452 453 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()); 454 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1)); 455 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2)); 456 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3)); 457 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4)); 458 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5)); 459 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6)); 460 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7)); 461 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8)); 462 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9)); 463 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10)); 464 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11)); 465 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12)); 466 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13)); 467 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14)); 468 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15)); 469 470 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()); 471 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1)); 472 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2)); 473 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3)); 474 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4)); 475 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5)); 476 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6)); 477 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7)); 478 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8)); 479 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9)); 480 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10)); 481 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11)); 482 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12)); 483 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13)); 484 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14)); 485 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15)); 486 487 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()); 488 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1)); 489 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2)); 490 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3)); 491 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4)); 492 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5)); 493 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6)); 494 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7)); 495 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8)); 496 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9)); 497 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10)); 498 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11)); 499 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12)); 500 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13)); 501 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14)); 502 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15)); 503 504 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()); 505 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1)); 506 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2)); 507 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3)); 508 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4)); 509 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5)); 510 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6)); 511 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7)); 512 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8)); 513 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9)); 514 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10)); 515 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11)); 516 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12)); 517 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13)); 518 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14)); 519 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15)); 520 521 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()); 522 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1)); 523 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2)); 524 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3)); 525 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4)); 526 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5)); 527 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6)); 528 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7)); 529 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8)); 530 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9)); 531 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10)); 532 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11)); 533 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12)); 534 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13)); 535 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14)); 536 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15)); 537 538 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()); 539 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1)); 540 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2)); 541 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3)); 542 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4)); 543 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5)); 544 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6)); 545 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7)); 546 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8)); 547 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9)); 548 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10)); 549 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11)); 550 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12)); 551 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13)); 552 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14)); 553 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15)); 554 555 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()); 556 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1)); 557 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2)); 558 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3)); 559 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4)); 560 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5)); 561 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6)); 562 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7)); 563 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8)); 564 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9)); 565 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10)); 566 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11)); 567 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12)); 568 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13)); 569 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14)); 570 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15)); 571 572 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()); 573 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1)); 574 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2)); 575 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3)); 576 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4)); 577 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5)); 578 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6)); 579 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7)); 580 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8)); 581 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9)); 582 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10)); 583 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11)); 584 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12)); 585 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13)); 586 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14)); 587 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15)); 588 589 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()); 590 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1)); 591 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2)); 592 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3)); 593 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4)); 594 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5)); 595 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6)); 596 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7)); 597 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8)); 598 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9)); 599 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10)); 600 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11)); 601 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12)); 602 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13)); 603 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14)); 604 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15)); 605 606 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()); 607 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1)); 608 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2)); 609 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3)); 610 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4)); 611 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5)); 612 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6)); 613 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7)); 614 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8)); 615 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9)); 616 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10)); 617 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11)); 618 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12)); 619 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13)); 620 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14)); 621 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15)); 622 623 #endif // _LP64 624 625 #ifdef _LP64 626 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad()); 627 #else 628 reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad()); 629 #endif // _LP64 630 631 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 632 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 633 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 634 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 635 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 636 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 637 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 638 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 639 #ifdef _LP64 640 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 641 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 642 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 643 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 644 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 645 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 646 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 647 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 648 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 649 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 650 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 651 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 652 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 653 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 654 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 655 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 656 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 657 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 658 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 659 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 660 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 661 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 662 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 663 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 664 #endif 665 ); 666 667 // flags allocation class should be last. 668 alloc_class chunk2(RFLAGS); 669 670 // Singleton class for condition codes 671 reg_class int_flags(RFLAGS); 672 673 // Class for pre evex float registers 674 reg_class float_reg_legacy(XMM0, 675 XMM1, 676 XMM2, 677 XMM3, 678 XMM4, 679 XMM5, 680 XMM6, 681 XMM7 682 #ifdef _LP64 683 ,XMM8, 684 XMM9, 685 XMM10, 686 XMM11, 687 XMM12, 688 XMM13, 689 XMM14, 690 XMM15 691 #endif 692 ); 693 694 // Class for evex float registers 695 reg_class float_reg_evex(XMM0, 696 XMM1, 697 XMM2, 698 XMM3, 699 XMM4, 700 XMM5, 701 XMM6, 702 XMM7 703 #ifdef _LP64 704 ,XMM8, 705 XMM9, 706 XMM10, 707 XMM11, 708 XMM12, 709 XMM13, 710 XMM14, 711 XMM15, 712 XMM16, 713 XMM17, 714 XMM18, 715 XMM19, 716 XMM20, 717 XMM21, 718 XMM22, 719 XMM23, 720 XMM24, 721 XMM25, 722 XMM26, 723 XMM27, 724 XMM28, 725 XMM29, 726 XMM30, 727 XMM31 728 #endif 729 ); 730 731 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} ); 732 reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 733 734 // Class for pre evex double registers 735 reg_class double_reg_legacy(XMM0, XMM0b, 736 XMM1, XMM1b, 737 XMM2, XMM2b, 738 XMM3, XMM3b, 739 XMM4, XMM4b, 740 XMM5, XMM5b, 741 XMM6, XMM6b, 742 XMM7, XMM7b 743 #ifdef _LP64 744 ,XMM8, XMM8b, 745 XMM9, XMM9b, 746 XMM10, XMM10b, 747 XMM11, XMM11b, 748 XMM12, XMM12b, 749 XMM13, XMM13b, 750 XMM14, XMM14b, 751 XMM15, XMM15b 752 #endif 753 ); 754 755 // Class for evex double registers 756 reg_class double_reg_evex(XMM0, XMM0b, 757 XMM1, XMM1b, 758 XMM2, XMM2b, 759 XMM3, XMM3b, 760 XMM4, XMM4b, 761 XMM5, XMM5b, 762 XMM6, XMM6b, 763 XMM7, XMM7b 764 #ifdef _LP64 765 ,XMM8, XMM8b, 766 XMM9, XMM9b, 767 XMM10, XMM10b, 768 XMM11, XMM11b, 769 XMM12, XMM12b, 770 XMM13, XMM13b, 771 XMM14, XMM14b, 772 XMM15, XMM15b, 773 XMM16, XMM16b, 774 XMM17, XMM17b, 775 XMM18, XMM18b, 776 XMM19, XMM19b, 777 XMM20, XMM20b, 778 XMM21, XMM21b, 779 XMM22, XMM22b, 780 XMM23, XMM23b, 781 XMM24, XMM24b, 782 XMM25, XMM25b, 783 XMM26, XMM26b, 784 XMM27, XMM27b, 785 XMM28, XMM28b, 786 XMM29, XMM29b, 787 XMM30, XMM30b, 788 XMM31, XMM31b 789 #endif 790 ); 791 792 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} ); 793 reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 794 795 // Class for pre evex 32bit vector registers 796 reg_class vectors_reg_legacy(XMM0, 797 XMM1, 798 XMM2, 799 XMM3, 800 XMM4, 801 XMM5, 802 XMM6, 803 XMM7 804 #ifdef _LP64 805 ,XMM8, 806 XMM9, 807 XMM10, 808 XMM11, 809 XMM12, 810 XMM13, 811 XMM14, 812 XMM15 813 #endif 814 ); 815 816 // Class for evex 32bit vector registers 817 reg_class vectors_reg_evex(XMM0, 818 XMM1, 819 XMM2, 820 XMM3, 821 XMM4, 822 XMM5, 823 XMM6, 824 XMM7 825 #ifdef _LP64 826 ,XMM8, 827 XMM9, 828 XMM10, 829 XMM11, 830 XMM12, 831 XMM13, 832 XMM14, 833 XMM15, 834 XMM16, 835 XMM17, 836 XMM18, 837 XMM19, 838 XMM20, 839 XMM21, 840 XMM22, 841 XMM23, 842 XMM24, 843 XMM25, 844 XMM26, 845 XMM27, 846 XMM28, 847 XMM29, 848 XMM30, 849 XMM31 850 #endif 851 ); 852 853 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} ); 854 reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 855 856 // Class for all 64bit vector registers 857 reg_class vectord_reg_legacy(XMM0, XMM0b, 858 XMM1, XMM1b, 859 XMM2, XMM2b, 860 XMM3, XMM3b, 861 XMM4, XMM4b, 862 XMM5, XMM5b, 863 XMM6, XMM6b, 864 XMM7, XMM7b 865 #ifdef _LP64 866 ,XMM8, XMM8b, 867 XMM9, XMM9b, 868 XMM10, XMM10b, 869 XMM11, XMM11b, 870 XMM12, XMM12b, 871 XMM13, XMM13b, 872 XMM14, XMM14b, 873 XMM15, XMM15b 874 #endif 875 ); 876 877 // Class for all 64bit vector registers 878 reg_class vectord_reg_evex(XMM0, XMM0b, 879 XMM1, XMM1b, 880 XMM2, XMM2b, 881 XMM3, XMM3b, 882 XMM4, XMM4b, 883 XMM5, XMM5b, 884 XMM6, XMM6b, 885 XMM7, XMM7b 886 #ifdef _LP64 887 ,XMM8, XMM8b, 888 XMM9, XMM9b, 889 XMM10, XMM10b, 890 XMM11, XMM11b, 891 XMM12, XMM12b, 892 XMM13, XMM13b, 893 XMM14, XMM14b, 894 XMM15, XMM15b, 895 XMM16, XMM16b, 896 XMM17, XMM17b, 897 XMM18, XMM18b, 898 XMM19, XMM19b, 899 XMM20, XMM20b, 900 XMM21, XMM21b, 901 XMM22, XMM22b, 902 XMM23, XMM23b, 903 XMM24, XMM24b, 904 XMM25, XMM25b, 905 XMM26, XMM26b, 906 XMM27, XMM27b, 907 XMM28, XMM28b, 908 XMM29, XMM29b, 909 XMM30, XMM30b, 910 XMM31, XMM31b 911 #endif 912 ); 913 914 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} ); 915 reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 916 917 // Class for all 128bit vector registers 918 reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, 919 XMM1, XMM1b, XMM1c, XMM1d, 920 XMM2, XMM2b, XMM2c, XMM2d, 921 XMM3, XMM3b, XMM3c, XMM3d, 922 XMM4, XMM4b, XMM4c, XMM4d, 923 XMM5, XMM5b, XMM5c, XMM5d, 924 XMM6, XMM6b, XMM6c, XMM6d, 925 XMM7, XMM7b, XMM7c, XMM7d 926 #ifdef _LP64 927 ,XMM8, XMM8b, XMM8c, XMM8d, 928 XMM9, XMM9b, XMM9c, XMM9d, 929 XMM10, XMM10b, XMM10c, XMM10d, 930 XMM11, XMM11b, XMM11c, XMM11d, 931 XMM12, XMM12b, XMM12c, XMM12d, 932 XMM13, XMM13b, XMM13c, XMM13d, 933 XMM14, XMM14b, XMM14c, XMM14d, 934 XMM15, XMM15b, XMM15c, XMM15d 935 #endif 936 ); 937 938 // Class for all 128bit vector registers 939 reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, 940 XMM1, XMM1b, XMM1c, XMM1d, 941 XMM2, XMM2b, XMM2c, XMM2d, 942 XMM3, XMM3b, XMM3c, XMM3d, 943 XMM4, XMM4b, XMM4c, XMM4d, 944 XMM5, XMM5b, XMM5c, XMM5d, 945 XMM6, XMM6b, XMM6c, XMM6d, 946 XMM7, XMM7b, XMM7c, XMM7d 947 #ifdef _LP64 948 ,XMM8, XMM8b, XMM8c, XMM8d, 949 XMM9, XMM9b, XMM9c, XMM9d, 950 XMM10, XMM10b, XMM10c, XMM10d, 951 XMM11, XMM11b, XMM11c, XMM11d, 952 XMM12, XMM12b, XMM12c, XMM12d, 953 XMM13, XMM13b, XMM13c, XMM13d, 954 XMM14, XMM14b, XMM14c, XMM14d, 955 XMM15, XMM15b, XMM15c, XMM15d, 956 XMM16, XMM16b, XMM16c, XMM16d, 957 XMM17, XMM17b, XMM17c, XMM17d, 958 XMM18, XMM18b, XMM18c, XMM18d, 959 XMM19, XMM19b, XMM19c, XMM19d, 960 XMM20, XMM20b, XMM20c, XMM20d, 961 XMM21, XMM21b, XMM21c, XMM21d, 962 XMM22, XMM22b, XMM22c, XMM22d, 963 XMM23, XMM23b, XMM23c, XMM23d, 964 XMM24, XMM24b, XMM24c, XMM24d, 965 XMM25, XMM25b, XMM25c, XMM25d, 966 XMM26, XMM26b, XMM26c, XMM26d, 967 XMM27, XMM27b, XMM27c, XMM27d, 968 XMM28, XMM28b, XMM28c, XMM28d, 969 XMM29, XMM29b, XMM29c, XMM29d, 970 XMM30, XMM30b, XMM30c, XMM30d, 971 XMM31, XMM31b, XMM31c, XMM31d 972 #endif 973 ); 974 975 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} ); 976 reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 977 978 // Class for all 256bit vector registers 979 reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 980 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 981 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 982 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 983 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 984 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 985 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 986 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 987 #ifdef _LP64 988 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 989 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 990 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 991 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 992 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 993 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 994 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 995 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h 996 #endif 997 ); 998 999 // Class for all 256bit vector registers 1000 reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 1001 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 1002 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 1003 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 1004 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 1005 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 1006 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 1007 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 1008 #ifdef _LP64 1009 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 1010 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 1011 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 1012 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 1013 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 1014 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1015 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1016 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, 1017 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, 1018 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, 1019 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, 1020 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, 1021 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, 1022 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, 1023 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, 1024 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, 1025 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, 1026 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, 1027 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, 1028 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, 1029 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, 1030 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, 1031 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, 1032 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h 1033 #endif 1034 ); 1035 1036 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} ); 1037 reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 1038 1039 // Class for all 512bit vector registers 1040 reg_class vectorz_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1041 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1042 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1043 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1044 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1045 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1046 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1047 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1048 #ifdef _LP64 1049 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1050 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1051 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1052 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1053 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1054 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1055 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1056 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1057 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 1058 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 1059 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 1060 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 1061 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 1062 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 1063 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 1064 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 1065 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 1066 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 1067 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 1068 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 1069 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 1070 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 1071 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 1072 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 1073 #endif 1074 ); 1075 1076 // Class for restricted 512bit vector registers 1077 reg_class vectorz_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1078 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1079 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1080 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1081 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1082 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1083 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1084 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1085 #ifdef _LP64 1086 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1087 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1088 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1089 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1090 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1091 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1092 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1093 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1094 #endif 1095 ); 1096 1097 reg_class_dynamic vectorz_reg(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} ); 1098 reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 1099 1100 reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d); 1101 reg_class ymm0_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h); 1102 reg_class zmm0_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p); 1103 1104 reg_class xmm1_reg(XMM1, XMM1b, XMM1c, XMM1d); 1105 reg_class ymm1_reg(XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h); 1106 reg_class zmm1_reg(XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p); 1107 1108 reg_class xmm2_reg(XMM2, XMM2b, XMM2c, XMM2d); 1109 reg_class ymm2_reg(XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h); 1110 reg_class zmm2_reg(XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p); 1111 1112 reg_class xmm3_reg(XMM3, XMM3b, XMM3c, XMM3d); 1113 reg_class ymm3_reg(XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h); 1114 reg_class zmm3_reg(XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p); 1115 1116 reg_class xmm4_reg(XMM4, XMM4b, XMM4c, XMM4d); 1117 reg_class ymm4_reg(XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h); 1118 reg_class zmm4_reg(XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p); 1119 1120 reg_class xmm5_reg(XMM5, XMM5b, XMM5c, XMM5d); 1121 reg_class ymm5_reg(XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h); 1122 reg_class zmm5_reg(XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p); 1123 1124 reg_class xmm6_reg(XMM6, XMM6b, XMM6c, XMM6d); 1125 reg_class ymm6_reg(XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h); 1126 reg_class zmm6_reg(XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p); 1127 1128 reg_class xmm7_reg(XMM7, XMM7b, XMM7c, XMM7d); 1129 reg_class ymm7_reg(XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h); 1130 reg_class zmm7_reg(XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p); 1131 1132 #ifdef _LP64 1133 1134 reg_class xmm8_reg(XMM8, XMM8b, XMM8c, XMM8d); 1135 reg_class ymm8_reg(XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h); 1136 reg_class zmm8_reg(XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p); 1137 1138 reg_class xmm9_reg(XMM9, XMM9b, XMM9c, XMM9d); 1139 reg_class ymm9_reg(XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h); 1140 reg_class zmm9_reg(XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p); 1141 1142 reg_class xmm10_reg(XMM10, XMM10b, XMM10c, XMM10d); 1143 reg_class ymm10_reg(XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h); 1144 reg_class zmm10_reg(XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p); 1145 1146 reg_class xmm11_reg(XMM11, XMM11b, XMM11c, XMM11d); 1147 reg_class ymm11_reg(XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h); 1148 reg_class zmm11_reg(XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p); 1149 1150 reg_class xmm12_reg(XMM12, XMM12b, XMM12c, XMM12d); 1151 reg_class ymm12_reg(XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h); 1152 reg_class zmm12_reg(XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p); 1153 1154 reg_class xmm13_reg(XMM13, XMM13b, XMM13c, XMM13d); 1155 reg_class ymm13_reg(XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h); 1156 reg_class zmm13_reg(XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p); 1157 1158 reg_class xmm14_reg(XMM14, XMM14b, XMM14c, XMM14d); 1159 reg_class ymm14_reg(XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h); 1160 reg_class zmm14_reg(XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p); 1161 1162 reg_class xmm15_reg(XMM15, XMM15b, XMM15c, XMM15d); 1163 reg_class ymm15_reg(XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h); 1164 reg_class zmm15_reg(XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p); 1165 1166 reg_class xmm16_reg(XMM16, XMM16b, XMM16c, XMM16d); 1167 reg_class ymm16_reg(XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h); 1168 reg_class zmm16_reg(XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p); 1169 1170 reg_class xmm17_reg(XMM17, XMM17b, XMM17c, XMM17d); 1171 reg_class ymm17_reg(XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h); 1172 reg_class zmm17_reg(XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p); 1173 1174 reg_class xmm18_reg(XMM18, XMM18b, XMM18c, XMM18d); 1175 reg_class ymm18_reg(XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h); 1176 reg_class zmm18_reg(XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p); 1177 1178 reg_class xmm19_reg(XMM19, XMM19b, XMM19c, XMM19d); 1179 reg_class ymm19_reg(XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h); 1180 reg_class zmm19_reg(XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p); 1181 1182 reg_class xmm20_reg(XMM20, XMM20b, XMM20c, XMM20d); 1183 reg_class ymm20_reg(XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h); 1184 reg_class zmm20_reg(XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p); 1185 1186 reg_class xmm21_reg(XMM21, XMM21b, XMM21c, XMM21d); 1187 reg_class ymm21_reg(XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h); 1188 reg_class zmm21_reg(XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p); 1189 1190 reg_class xmm22_reg(XMM22, XMM22b, XMM22c, XMM22d); 1191 reg_class ymm22_reg(XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h); 1192 reg_class zmm22_reg(XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p); 1193 1194 reg_class xmm23_reg(XMM23, XMM23b, XMM23c, XMM23d); 1195 reg_class ymm23_reg(XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h); 1196 reg_class zmm23_reg(XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p); 1197 1198 reg_class xmm24_reg(XMM24, XMM24b, XMM24c, XMM24d); 1199 reg_class ymm24_reg(XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h); 1200 reg_class zmm24_reg(XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p); 1201 1202 reg_class xmm25_reg(XMM25, XMM25b, XMM25c, XMM25d); 1203 reg_class ymm25_reg(XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h); 1204 reg_class zmm25_reg(XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p); 1205 1206 reg_class xmm26_reg(XMM26, XMM26b, XMM26c, XMM26d); 1207 reg_class ymm26_reg(XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h); 1208 reg_class zmm26_reg(XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p); 1209 1210 reg_class xmm27_reg(XMM27, XMM27b, XMM27c, XMM27d); 1211 reg_class ymm27_reg(XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h); 1212 reg_class zmm27_reg(XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p); 1213 1214 reg_class xmm28_reg(XMM28, XMM28b, XMM28c, XMM28d); 1215 reg_class ymm28_reg(XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h); 1216 reg_class zmm28_reg(XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p); 1217 1218 reg_class xmm29_reg(XMM29, XMM29b, XMM29c, XMM29d); 1219 reg_class ymm29_reg(XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h); 1220 reg_class zmm29_reg(XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p); 1221 1222 reg_class xmm30_reg(XMM30, XMM30b, XMM30c, XMM30d); 1223 reg_class ymm30_reg(XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h); 1224 reg_class zmm30_reg(XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p); 1225 1226 reg_class xmm31_reg(XMM31, XMM31b, XMM31c, XMM31d); 1227 reg_class ymm31_reg(XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h); 1228 reg_class zmm31_reg(XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p); 1229 1230 #endif 1231 1232 %} 1233 1234 1235 //----------SOURCE BLOCK------------------------------------------------------- 1236 // This is a block of C++ code which provides values, functions, and 1237 // definitions necessary in the rest of the architecture description 1238 1239 source_hpp %{ 1240 // Header information of the source block. 1241 // Method declarations/definitions which are used outside 1242 // the ad-scope can conveniently be defined here. 1243 // 1244 // To keep related declarations/definitions/uses close together, 1245 // we switch between source %{ }% and source_hpp %{ }% freely as needed. 1246 1247 class NativeJump; 1248 1249 class CallStubImpl { 1250 1251 //-------------------------------------------------------------- 1252 //---< Used for optimization in Compile::shorten_branches >--- 1253 //-------------------------------------------------------------- 1254 1255 public: 1256 // Size of call trampoline stub. 1257 static uint size_call_trampoline() { 1258 return 0; // no call trampolines on this platform 1259 } 1260 1261 // number of relocations needed by a call trampoline stub 1262 static uint reloc_call_trampoline() { 1263 return 0; // no call trampolines on this platform 1264 } 1265 }; 1266 1267 class HandlerImpl { 1268 1269 public: 1270 1271 static int emit_exception_handler(CodeBuffer &cbuf); 1272 static int emit_deopt_handler(CodeBuffer& cbuf); 1273 1274 static uint size_exception_handler() { 1275 // NativeCall instruction size is the same as NativeJump. 1276 // exception handler starts out as jump and can be patched to 1277 // a call be deoptimization. (4932387) 1278 // Note that this value is also credited (in output.cpp) to 1279 // the size of the code section. 1280 return NativeJump::instruction_size; 1281 } 1282 1283 #ifdef _LP64 1284 static uint size_deopt_handler() { 1285 // three 5 byte instructions plus one move for unreachable address. 1286 return 15+3; 1287 } 1288 #else 1289 static uint size_deopt_handler() { 1290 // NativeCall instruction size is the same as NativeJump. 1291 // exception handler starts out as jump and can be patched to 1292 // a call be deoptimization. (4932387) 1293 // Note that this value is also credited (in output.cpp) to 1294 // the size of the code section. 1295 return 5 + NativeJump::instruction_size; // pushl(); jmp; 1296 } 1297 #endif 1298 }; 1299 1300 %} // end source_hpp 1301 1302 source %{ 1303 1304 #include "opto/addnode.hpp" 1305 1306 // Emit exception handler code. 1307 // Stuff framesize into a register and call a VM stub routine. 1308 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) { 1309 1310 // Note that the code buffer's insts_mark is always relative to insts. 1311 // That's why we must use the macroassembler to generate a handler. 1312 MacroAssembler _masm(&cbuf); 1313 address base = __ start_a_stub(size_exception_handler()); 1314 if (base == NULL) { 1315 ciEnv::current()->record_failure("CodeCache is full"); 1316 return 0; // CodeBuffer::expand failed 1317 } 1318 int offset = __ offset(); 1319 __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point())); 1320 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); 1321 __ end_a_stub(); 1322 return offset; 1323 } 1324 1325 // Emit deopt handler code. 1326 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) { 1327 1328 // Note that the code buffer's insts_mark is always relative to insts. 1329 // That's why we must use the macroassembler to generate a handler. 1330 MacroAssembler _masm(&cbuf); 1331 address base = __ start_a_stub(size_deopt_handler()); 1332 if (base == NULL) { 1333 ciEnv::current()->record_failure("CodeCache is full"); 1334 return 0; // CodeBuffer::expand failed 1335 } 1336 int offset = __ offset(); 1337 1338 #ifdef _LP64 1339 address the_pc = (address) __ pc(); 1340 Label next; 1341 // push a "the_pc" on the stack without destroying any registers 1342 // as they all may be live. 1343 1344 // push address of "next" 1345 __ call(next, relocInfo::none); // reloc none is fine since it is a disp32 1346 __ bind(next); 1347 // adjust it so it matches "the_pc" 1348 __ subptr(Address(rsp, 0), __ offset() - offset); 1349 #else 1350 InternalAddress here(__ pc()); 1351 __ pushptr(here.addr()); 1352 #endif 1353 1354 __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack())); 1355 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset)); 1356 __ end_a_stub(); 1357 return offset; 1358 } 1359 1360 1361 //============================================================================= 1362 1363 // Float masks come from different places depending on platform. 1364 #ifdef _LP64 1365 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); } 1366 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); } 1367 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); } 1368 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); } 1369 #else 1370 static address float_signmask() { return (address)float_signmask_pool; } 1371 static address float_signflip() { return (address)float_signflip_pool; } 1372 static address double_signmask() { return (address)double_signmask_pool; } 1373 static address double_signflip() { return (address)double_signflip_pool; } 1374 #endif 1375 static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); } 1376 static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); } 1377 static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); } 1378 1379 //============================================================================= 1380 const bool Matcher::match_rule_supported(int opcode) { 1381 if (!has_match_rule(opcode)) 1382 return false; 1383 1384 bool ret_value = true; 1385 switch (opcode) { 1386 case Op_AbsVL: 1387 if (UseAVX < 3) 1388 ret_value = false; 1389 case Op_PopCountI: 1390 case Op_PopCountL: 1391 if (!UsePopCountInstruction) 1392 ret_value = false; 1393 break; 1394 case Op_PopCountVI: 1395 if (!UsePopCountInstruction || !VM_Version::supports_vpopcntdq()) 1396 ret_value = false; 1397 break; 1398 case Op_MulVI: 1399 if ((UseSSE < 4) && (UseAVX < 1)) // only with SSE4_1 or AVX 1400 ret_value = false; 1401 break; 1402 case Op_MulVL: 1403 case Op_MulReductionVL: 1404 if (VM_Version::supports_avx512dq() == false) 1405 ret_value = false; 1406 break; 1407 case Op_AddReductionVL: 1408 if (UseAVX < 3) // only EVEX : vector connectivity becomes an issue here 1409 ret_value = false; 1410 break; 1411 case Op_AbsVB: 1412 case Op_AbsVS: 1413 case Op_AbsVI: 1414 case Op_AddReductionVI: 1415 if (UseSSE < 3) // requires at least SSE3 1416 ret_value = false; 1417 break; 1418 case Op_MulReductionVI: 1419 if (UseSSE < 4) // requires at least SSE4 1420 ret_value = false; 1421 break; 1422 case Op_AddReductionVF: 1423 case Op_AddReductionVD: 1424 case Op_MulReductionVF: 1425 case Op_MulReductionVD: 1426 if (UseSSE < 1) // requires at least SSE 1427 ret_value = false; 1428 break; 1429 case Op_SqrtVD: 1430 case Op_SqrtVF: 1431 if (UseAVX < 1) // enabled for AVX only 1432 ret_value = false; 1433 break; 1434 case Op_CompareAndSwapL: 1435 #ifdef _LP64 1436 case Op_CompareAndSwapP: 1437 #endif 1438 if (!VM_Version::supports_cx8()) 1439 ret_value = false; 1440 break; 1441 case Op_CMoveVF: 1442 case Op_CMoveVD: 1443 if (UseAVX < 1 || UseAVX > 2) 1444 ret_value = false; 1445 break; 1446 case Op_StrIndexOf: 1447 if (!UseSSE42Intrinsics) 1448 ret_value = false; 1449 break; 1450 case Op_StrIndexOfChar: 1451 if (!UseSSE42Intrinsics) 1452 ret_value = false; 1453 break; 1454 case Op_OnSpinWait: 1455 if (VM_Version::supports_on_spin_wait() == false) 1456 ret_value = false; 1457 break; 1458 case Op_MulAddVS2VI: 1459 case Op_RShiftVL: 1460 case Op_AbsVD: 1461 case Op_NegVD: 1462 if (UseSSE < 2) 1463 ret_value = false; 1464 break; 1465 case Op_MulVB: 1466 case Op_LShiftVB: 1467 case Op_RShiftVB: 1468 case Op_URShiftVB: 1469 if (UseSSE < 4) 1470 ret_value = false; 1471 break; 1472 #ifdef _LP64 1473 case Op_MaxD: 1474 case Op_MaxF: 1475 case Op_MinD: 1476 case Op_MinF: 1477 if (UseAVX < 1) // enabled for AVX only 1478 ret_value = false; 1479 break; 1480 #endif 1481 } 1482 1483 return ret_value; // Per default match rules are supported. 1484 } 1485 1486 const bool Matcher::match_rule_supported_vector(int opcode, int vlen) { 1487 // identify extra cases that we might want to provide match rules for 1488 // e.g. Op_ vector nodes and other intrinsics while guarding with vlen 1489 bool ret_value = match_rule_supported(opcode); 1490 if (ret_value) { 1491 switch (opcode) { 1492 case Op_AbsVB: 1493 case Op_AddVB: 1494 case Op_SubVB: 1495 if ((vlen == 64) && (VM_Version::supports_avx512bw() == false)) 1496 ret_value = false; 1497 break; 1498 case Op_AbsVS: 1499 case Op_AddVS: 1500 case Op_SubVS: 1501 case Op_MulVS: 1502 case Op_LShiftVS: 1503 case Op_RShiftVS: 1504 case Op_URShiftVS: 1505 if ((vlen == 32) && (VM_Version::supports_avx512bw() == false)) 1506 ret_value = false; 1507 break; 1508 case Op_MulVB: 1509 case Op_LShiftVB: 1510 case Op_RShiftVB: 1511 case Op_URShiftVB: 1512 if ((vlen == 32 && UseAVX < 2) || 1513 ((vlen == 64) && (VM_Version::supports_avx512bw() == false))) 1514 ret_value = false; 1515 break; 1516 case Op_NegVF: 1517 if ((vlen == 16) && (VM_Version::supports_avx512dq() == false)) 1518 ret_value = false; 1519 break; 1520 case Op_CMoveVF: 1521 if (vlen != 8) 1522 ret_value = false; 1523 break; 1524 case Op_NegVD: 1525 if ((vlen == 8) && (VM_Version::supports_avx512dq() == false)) 1526 ret_value = false; 1527 break; 1528 case Op_CMoveVD: 1529 if (vlen != 4) 1530 ret_value = false; 1531 break; 1532 } 1533 } 1534 1535 return ret_value; // Per default match rules are supported. 1536 } 1537 1538 const bool Matcher::has_predicated_vectors(void) { 1539 bool ret_value = false; 1540 if (UseAVX > 2) { 1541 ret_value = VM_Version::supports_avx512vl(); 1542 } 1543 1544 return ret_value; 1545 } 1546 1547 const int Matcher::float_pressure(int default_pressure_threshold) { 1548 int float_pressure_threshold = default_pressure_threshold; 1549 #ifdef _LP64 1550 if (UseAVX > 2) { 1551 // Increase pressure threshold on machines with AVX3 which have 1552 // 2x more XMM registers. 1553 float_pressure_threshold = default_pressure_threshold * 2; 1554 } 1555 #endif 1556 return float_pressure_threshold; 1557 } 1558 1559 // Max vector size in bytes. 0 if not supported. 1560 const int Matcher::vector_width_in_bytes(BasicType bt) { 1561 assert(is_java_primitive(bt), "only primitive type vectors"); 1562 if (UseSSE < 2) return 0; 1563 // SSE2 supports 128bit vectors for all types. 1564 // AVX2 supports 256bit vectors for all types. 1565 // AVX2/EVEX supports 512bit vectors for all types. 1566 int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16; 1567 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE. 1568 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE)) 1569 size = (UseAVX > 2) ? 64 : 32; 1570 if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR)) 1571 size = (VM_Version::supports_avx512bw()) ? 64 : 32; 1572 // Use flag to limit vector size. 1573 size = MIN2(size,(int)MaxVectorSize); 1574 // Minimum 2 values in vector (or 4 for bytes). 1575 switch (bt) { 1576 case T_DOUBLE: 1577 case T_LONG: 1578 if (size < 16) return 0; 1579 break; 1580 case T_FLOAT: 1581 case T_INT: 1582 if (size < 8) return 0; 1583 break; 1584 case T_BOOLEAN: 1585 if (size < 4) return 0; 1586 break; 1587 case T_CHAR: 1588 if (size < 4) return 0; 1589 break; 1590 case T_BYTE: 1591 if (size < 4) return 0; 1592 break; 1593 case T_SHORT: 1594 if (size < 4) return 0; 1595 break; 1596 default: 1597 ShouldNotReachHere(); 1598 } 1599 return size; 1600 } 1601 1602 // Limits on vector size (number of elements) loaded into vector. 1603 const int Matcher::max_vector_size(const BasicType bt) { 1604 return vector_width_in_bytes(bt)/type2aelembytes(bt); 1605 } 1606 const int Matcher::min_vector_size(const BasicType bt) { 1607 int max_size = max_vector_size(bt); 1608 // Min size which can be loaded into vector is 4 bytes. 1609 int size = (type2aelembytes(bt) == 1) ? 4 : 2; 1610 return MIN2(size,max_size); 1611 } 1612 1613 // Vector ideal reg corresponding to specified size in bytes 1614 const uint Matcher::vector_ideal_reg(int size) { 1615 assert(MaxVectorSize >= size, ""); 1616 switch(size) { 1617 case 4: return Op_VecS; 1618 case 8: return Op_VecD; 1619 case 16: return Op_VecX; 1620 case 32: return Op_VecY; 1621 case 64: return Op_VecZ; 1622 } 1623 ShouldNotReachHere(); 1624 return 0; 1625 } 1626 1627 // Only lowest bits of xmm reg are used for vector shift count. 1628 const uint Matcher::vector_shift_count_ideal_reg(int size) { 1629 return Op_VecS; 1630 } 1631 1632 // x86 supports misaligned vectors store/load. 1633 const bool Matcher::misaligned_vectors_ok() { 1634 return true; 1635 } 1636 1637 // x86 AES instructions are compatible with SunJCE expanded 1638 // keys, hence we do not need to pass the original key to stubs 1639 const bool Matcher::pass_original_key_for_aes() { 1640 return false; 1641 } 1642 1643 1644 const bool Matcher::convi2l_type_required = true; 1645 1646 // Check for shift by small constant as well 1647 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) { 1648 if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() && 1649 shift->in(2)->get_int() <= 3 && 1650 // Are there other uses besides address expressions? 1651 !matcher->is_visited(shift)) { 1652 address_visited.set(shift->_idx); // Flag as address_visited 1653 mstack.push(shift->in(2), Matcher::Visit); 1654 Node *conv = shift->in(1); 1655 #ifdef _LP64 1656 // Allow Matcher to match the rule which bypass 1657 // ConvI2L operation for an array index on LP64 1658 // if the index value is positive. 1659 if (conv->Opcode() == Op_ConvI2L && 1660 conv->as_Type()->type()->is_long()->_lo >= 0 && 1661 // Are there other uses besides address expressions? 1662 !matcher->is_visited(conv)) { 1663 address_visited.set(conv->_idx); // Flag as address_visited 1664 mstack.push(conv->in(1), Matcher::Pre_Visit); 1665 } else 1666 #endif 1667 mstack.push(conv, Matcher::Pre_Visit); 1668 return true; 1669 } 1670 return false; 1671 } 1672 1673 // Should the Matcher clone shifts on addressing modes, expecting them 1674 // to be subsumed into complex addressing expressions or compute them 1675 // into registers? 1676 bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { 1677 Node *off = m->in(AddPNode::Offset); 1678 if (off->is_Con()) { 1679 address_visited.test_set(m->_idx); // Flag as address_visited 1680 Node *adr = m->in(AddPNode::Address); 1681 1682 // Intel can handle 2 adds in addressing mode 1683 // AtomicAdd is not an addressing expression. 1684 // Cheap to find it by looking for screwy base. 1685 if (adr->is_AddP() && 1686 !adr->in(AddPNode::Base)->is_top() && 1687 LP64_ONLY( off->get_long() == (int) (off->get_long()) && ) // immL32 1688 // Are there other uses besides address expressions? 1689 !is_visited(adr)) { 1690 address_visited.set(adr->_idx); // Flag as address_visited 1691 Node *shift = adr->in(AddPNode::Offset); 1692 if (!clone_shift(shift, this, mstack, address_visited)) { 1693 mstack.push(shift, Pre_Visit); 1694 } 1695 mstack.push(adr->in(AddPNode::Address), Pre_Visit); 1696 mstack.push(adr->in(AddPNode::Base), Pre_Visit); 1697 } else { 1698 mstack.push(adr, Pre_Visit); 1699 } 1700 1701 // Clone X+offset as it also folds into most addressing expressions 1702 mstack.push(off, Visit); 1703 mstack.push(m->in(AddPNode::Base), Pre_Visit); 1704 return true; 1705 } else if (clone_shift(off, this, mstack, address_visited)) { 1706 address_visited.test_set(m->_idx); // Flag as address_visited 1707 mstack.push(m->in(AddPNode::Address), Pre_Visit); 1708 mstack.push(m->in(AddPNode::Base), Pre_Visit); 1709 return true; 1710 } 1711 return false; 1712 } 1713 1714 void Compile::reshape_address(AddPNode* addp) { 1715 } 1716 1717 // Helper methods for MachSpillCopyNode::implementation(). 1718 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 1719 int src_hi, int dst_hi, uint ireg, outputStream* st) { 1720 // In 64-bit VM size calculation is very complex. Emitting instructions 1721 // into scratch buffer is used to get size in 64-bit VM. 1722 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 1723 assert(ireg == Op_VecS || // 32bit vector 1724 (src_lo & 1) == 0 && (src_lo + 1) == src_hi && 1725 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi, 1726 "no non-adjacent vector moves" ); 1727 if (cbuf) { 1728 MacroAssembler _masm(cbuf); 1729 int offset = __ offset(); 1730 switch (ireg) { 1731 case Op_VecS: // copy whole register 1732 case Op_VecD: 1733 case Op_VecX: 1734 #ifndef _LP64 1735 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1736 #else 1737 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 1738 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1739 } else { 1740 __ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0); 1741 } 1742 #endif 1743 break; 1744 case Op_VecY: 1745 #ifndef _LP64 1746 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1747 #else 1748 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 1749 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1750 } else { 1751 __ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0); 1752 } 1753 #endif 1754 break; 1755 case Op_VecZ: 1756 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2); 1757 break; 1758 default: 1759 ShouldNotReachHere(); 1760 } 1761 int size = __ offset() - offset; 1762 #ifdef ASSERT 1763 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1764 assert(!do_size || size == 4, "incorrect size calculattion"); 1765 #endif 1766 return size; 1767 #ifndef PRODUCT 1768 } else if (!do_size) { 1769 switch (ireg) { 1770 case Op_VecS: 1771 case Op_VecD: 1772 case Op_VecX: 1773 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 1774 break; 1775 case Op_VecY: 1776 case Op_VecZ: 1777 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 1778 break; 1779 default: 1780 ShouldNotReachHere(); 1781 } 1782 #endif 1783 } 1784 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 1785 return (UseAVX > 2) ? 6 : 4; 1786 } 1787 1788 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, 1789 int stack_offset, int reg, uint ireg, outputStream* st) { 1790 // In 64-bit VM size calculation is very complex. Emitting instructions 1791 // into scratch buffer is used to get size in 64-bit VM. 1792 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 1793 if (cbuf) { 1794 MacroAssembler _masm(cbuf); 1795 int offset = __ offset(); 1796 if (is_load) { 1797 switch (ireg) { 1798 case Op_VecS: 1799 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1800 break; 1801 case Op_VecD: 1802 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1803 break; 1804 case Op_VecX: 1805 #ifndef _LP64 1806 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1807 #else 1808 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 1809 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1810 } else { 1811 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2); 1812 __ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0); 1813 } 1814 #endif 1815 break; 1816 case Op_VecY: 1817 #ifndef _LP64 1818 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1819 #else 1820 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 1821 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1822 } else { 1823 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2); 1824 __ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0); 1825 } 1826 #endif 1827 break; 1828 case Op_VecZ: 1829 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2); 1830 break; 1831 default: 1832 ShouldNotReachHere(); 1833 } 1834 } else { // store 1835 switch (ireg) { 1836 case Op_VecS: 1837 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1838 break; 1839 case Op_VecD: 1840 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1841 break; 1842 case Op_VecX: 1843 #ifndef _LP64 1844 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1845 #else 1846 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 1847 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1848 } 1849 else { 1850 __ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0); 1851 } 1852 #endif 1853 break; 1854 case Op_VecY: 1855 #ifndef _LP64 1856 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1857 #else 1858 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 1859 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1860 } 1861 else { 1862 __ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0); 1863 } 1864 #endif 1865 break; 1866 case Op_VecZ: 1867 __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2); 1868 break; 1869 default: 1870 ShouldNotReachHere(); 1871 } 1872 } 1873 int size = __ offset() - offset; 1874 #ifdef ASSERT 1875 int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : (UseAVX > 2) ? 6 : 4); 1876 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1877 assert(!do_size || size == (5+offset_size), "incorrect size calculattion"); 1878 #endif 1879 return size; 1880 #ifndef PRODUCT 1881 } else if (!do_size) { 1882 if (is_load) { 1883 switch (ireg) { 1884 case Op_VecS: 1885 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1886 break; 1887 case Op_VecD: 1888 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1889 break; 1890 case Op_VecX: 1891 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1892 break; 1893 case Op_VecY: 1894 case Op_VecZ: 1895 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1896 break; 1897 default: 1898 ShouldNotReachHere(); 1899 } 1900 } else { // store 1901 switch (ireg) { 1902 case Op_VecS: 1903 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1904 break; 1905 case Op_VecD: 1906 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1907 break; 1908 case Op_VecX: 1909 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1910 break; 1911 case Op_VecY: 1912 case Op_VecZ: 1913 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1914 break; 1915 default: 1916 ShouldNotReachHere(); 1917 } 1918 } 1919 #endif 1920 } 1921 bool is_single_byte = false; 1922 int vec_len = 0; 1923 if ((UseAVX > 2) && (stack_offset != 0)) { 1924 int tuple_type = Assembler::EVEX_FVM; 1925 int input_size = Assembler::EVEX_32bit; 1926 switch (ireg) { 1927 case Op_VecS: 1928 tuple_type = Assembler::EVEX_T1S; 1929 break; 1930 case Op_VecD: 1931 tuple_type = Assembler::EVEX_T1S; 1932 input_size = Assembler::EVEX_64bit; 1933 break; 1934 case Op_VecX: 1935 break; 1936 case Op_VecY: 1937 vec_len = 1; 1938 break; 1939 case Op_VecZ: 1940 vec_len = 2; 1941 break; 1942 } 1943 is_single_byte = Assembler::query_compressed_disp_byte(stack_offset, true, vec_len, tuple_type, input_size, 0); 1944 } 1945 int offset_size = 0; 1946 int size = 5; 1947 if (UseAVX > 2 ) { 1948 if (VM_Version::supports_avx512novl() && (vec_len == 2)) { 1949 offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 1950 size += 2; // Need an additional two bytes for EVEX encoding 1951 } else if (VM_Version::supports_avx512novl() && (vec_len < 2)) { 1952 offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4); 1953 } else { 1954 offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 1955 size += 2; // Need an additional two bytes for EVEX encodding 1956 } 1957 } else { 1958 offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4); 1959 } 1960 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1961 return size+offset_size; 1962 } 1963 1964 static inline jint replicate4_imm(int con, int width) { 1965 // Load a constant of "width" (in bytes) and replicate it to fill 32bit. 1966 assert(width == 1 || width == 2, "only byte or short types here"); 1967 int bit_width = width * 8; 1968 jint val = con; 1969 val &= (1 << bit_width) - 1; // mask off sign bits 1970 while(bit_width < 32) { 1971 val |= (val << bit_width); 1972 bit_width <<= 1; 1973 } 1974 return val; 1975 } 1976 1977 static inline jlong replicate8_imm(int con, int width) { 1978 // Load a constant of "width" (in bytes) and replicate it to fill 64bit. 1979 assert(width == 1 || width == 2 || width == 4, "only byte, short or int types here"); 1980 int bit_width = width * 8; 1981 jlong val = con; 1982 val &= (((jlong) 1) << bit_width) - 1; // mask off sign bits 1983 while(bit_width < 64) { 1984 val |= (val << bit_width); 1985 bit_width <<= 1; 1986 } 1987 return val; 1988 } 1989 1990 #ifndef PRODUCT 1991 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const { 1992 st->print("nop \t# %d bytes pad for loops and calls", _count); 1993 } 1994 #endif 1995 1996 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const { 1997 MacroAssembler _masm(&cbuf); 1998 __ nop(_count); 1999 } 2000 2001 uint MachNopNode::size(PhaseRegAlloc*) const { 2002 return _count; 2003 } 2004 2005 #ifndef PRODUCT 2006 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const { 2007 st->print("# breakpoint"); 2008 } 2009 #endif 2010 2011 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const { 2012 MacroAssembler _masm(&cbuf); 2013 __ int3(); 2014 } 2015 2016 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const { 2017 return MachNode::size(ra_); 2018 } 2019 2020 %} 2021 2022 encode %{ 2023 2024 enc_class call_epilog %{ 2025 if (VerifyStackAtCalls) { 2026 // Check that stack depth is unchanged: find majik cookie on stack 2027 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word)); 2028 MacroAssembler _masm(&cbuf); 2029 Label L; 2030 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d); 2031 __ jccb(Assembler::equal, L); 2032 // Die if stack mismatch 2033 __ int3(); 2034 __ bind(L); 2035 } 2036 %} 2037 2038 %} 2039 2040 2041 //----------OPERANDS----------------------------------------------------------- 2042 // Operand definitions must precede instruction definitions for correct parsing 2043 // in the ADLC because operands constitute user defined types which are used in 2044 // instruction definitions. 2045 2046 operand vecZ() %{ 2047 constraint(ALLOC_IN_RC(vectorz_reg)); 2048 match(VecZ); 2049 2050 format %{ %} 2051 interface(REG_INTER); 2052 %} 2053 2054 operand legVecZ() %{ 2055 constraint(ALLOC_IN_RC(vectorz_reg_vl)); 2056 match(VecZ); 2057 2058 format %{ %} 2059 interface(REG_INTER); 2060 %} 2061 2062 // Comparison Code for FP conditional move 2063 operand cmpOp_vcmppd() %{ 2064 match(Bool); 2065 2066 predicate(n->as_Bool()->_test._test != BoolTest::overflow && 2067 n->as_Bool()->_test._test != BoolTest::no_overflow); 2068 format %{ "" %} 2069 interface(COND_INTER) %{ 2070 equal (0x0, "eq"); 2071 less (0x1, "lt"); 2072 less_equal (0x2, "le"); 2073 not_equal (0xC, "ne"); 2074 greater_equal(0xD, "ge"); 2075 greater (0xE, "gt"); 2076 //TODO cannot compile (adlc breaks) without two next lines with error: 2077 // x86_64.ad(13987) Syntax Error: :In operand cmpOp_vcmppd: Do not support this encode constant: ' %{ 2078 // equal' for overflow. 2079 overflow (0x20, "o"); // not really supported by the instruction 2080 no_overflow (0x21, "no"); // not really supported by the instruction 2081 %} 2082 %} 2083 2084 2085 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit) 2086 2087 // ============================================================================ 2088 2089 instruct ShouldNotReachHere() %{ 2090 match(Halt); 2091 format %{ "ud2\t# ShouldNotReachHere" %} 2092 ins_encode %{ 2093 __ ud2(); 2094 %} 2095 ins_pipe(pipe_slow); 2096 %} 2097 2098 // =================================EVEX special=============================== 2099 2100 instruct setMask(rRegI dst, rRegI src) %{ 2101 predicate(Matcher::has_predicated_vectors()); 2102 match(Set dst (SetVectMaskI src)); 2103 effect(TEMP dst); 2104 format %{ "setvectmask $dst, $src" %} 2105 ins_encode %{ 2106 __ setvectmask($dst$$Register, $src$$Register); 2107 %} 2108 ins_pipe(pipe_slow); 2109 %} 2110 2111 // ============================================================================ 2112 2113 instruct addF_reg(regF dst, regF src) %{ 2114 predicate((UseSSE>=1) && (UseAVX == 0)); 2115 match(Set dst (AddF dst src)); 2116 2117 format %{ "addss $dst, $src" %} 2118 ins_cost(150); 2119 ins_encode %{ 2120 __ addss($dst$$XMMRegister, $src$$XMMRegister); 2121 %} 2122 ins_pipe(pipe_slow); 2123 %} 2124 2125 instruct addF_mem(regF dst, memory src) %{ 2126 predicate((UseSSE>=1) && (UseAVX == 0)); 2127 match(Set dst (AddF dst (LoadF src))); 2128 2129 format %{ "addss $dst, $src" %} 2130 ins_cost(150); 2131 ins_encode %{ 2132 __ addss($dst$$XMMRegister, $src$$Address); 2133 %} 2134 ins_pipe(pipe_slow); 2135 %} 2136 2137 instruct addF_imm(regF dst, immF con) %{ 2138 predicate((UseSSE>=1) && (UseAVX == 0)); 2139 match(Set dst (AddF dst con)); 2140 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2141 ins_cost(150); 2142 ins_encode %{ 2143 __ addss($dst$$XMMRegister, $constantaddress($con)); 2144 %} 2145 ins_pipe(pipe_slow); 2146 %} 2147 2148 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{ 2149 predicate(UseAVX > 0); 2150 match(Set dst (AddF src1 src2)); 2151 2152 format %{ "vaddss $dst, $src1, $src2" %} 2153 ins_cost(150); 2154 ins_encode %{ 2155 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2156 %} 2157 ins_pipe(pipe_slow); 2158 %} 2159 2160 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{ 2161 predicate(UseAVX > 0); 2162 match(Set dst (AddF src1 (LoadF src2))); 2163 2164 format %{ "vaddss $dst, $src1, $src2" %} 2165 ins_cost(150); 2166 ins_encode %{ 2167 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2168 %} 2169 ins_pipe(pipe_slow); 2170 %} 2171 2172 instruct addF_reg_imm(regF dst, regF src, immF con) %{ 2173 predicate(UseAVX > 0); 2174 match(Set dst (AddF src con)); 2175 2176 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2177 ins_cost(150); 2178 ins_encode %{ 2179 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2180 %} 2181 ins_pipe(pipe_slow); 2182 %} 2183 2184 instruct addD_reg(regD dst, regD src) %{ 2185 predicate((UseSSE>=2) && (UseAVX == 0)); 2186 match(Set dst (AddD dst src)); 2187 2188 format %{ "addsd $dst, $src" %} 2189 ins_cost(150); 2190 ins_encode %{ 2191 __ addsd($dst$$XMMRegister, $src$$XMMRegister); 2192 %} 2193 ins_pipe(pipe_slow); 2194 %} 2195 2196 instruct addD_mem(regD dst, memory src) %{ 2197 predicate((UseSSE>=2) && (UseAVX == 0)); 2198 match(Set dst (AddD dst (LoadD src))); 2199 2200 format %{ "addsd $dst, $src" %} 2201 ins_cost(150); 2202 ins_encode %{ 2203 __ addsd($dst$$XMMRegister, $src$$Address); 2204 %} 2205 ins_pipe(pipe_slow); 2206 %} 2207 2208 instruct addD_imm(regD dst, immD con) %{ 2209 predicate((UseSSE>=2) && (UseAVX == 0)); 2210 match(Set dst (AddD dst con)); 2211 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2212 ins_cost(150); 2213 ins_encode %{ 2214 __ addsd($dst$$XMMRegister, $constantaddress($con)); 2215 %} 2216 ins_pipe(pipe_slow); 2217 %} 2218 2219 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{ 2220 predicate(UseAVX > 0); 2221 match(Set dst (AddD src1 src2)); 2222 2223 format %{ "vaddsd $dst, $src1, $src2" %} 2224 ins_cost(150); 2225 ins_encode %{ 2226 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2227 %} 2228 ins_pipe(pipe_slow); 2229 %} 2230 2231 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{ 2232 predicate(UseAVX > 0); 2233 match(Set dst (AddD src1 (LoadD src2))); 2234 2235 format %{ "vaddsd $dst, $src1, $src2" %} 2236 ins_cost(150); 2237 ins_encode %{ 2238 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2239 %} 2240 ins_pipe(pipe_slow); 2241 %} 2242 2243 instruct addD_reg_imm(regD dst, regD src, immD con) %{ 2244 predicate(UseAVX > 0); 2245 match(Set dst (AddD src con)); 2246 2247 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2248 ins_cost(150); 2249 ins_encode %{ 2250 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2251 %} 2252 ins_pipe(pipe_slow); 2253 %} 2254 2255 instruct subF_reg(regF dst, regF src) %{ 2256 predicate((UseSSE>=1) && (UseAVX == 0)); 2257 match(Set dst (SubF dst src)); 2258 2259 format %{ "subss $dst, $src" %} 2260 ins_cost(150); 2261 ins_encode %{ 2262 __ subss($dst$$XMMRegister, $src$$XMMRegister); 2263 %} 2264 ins_pipe(pipe_slow); 2265 %} 2266 2267 instruct subF_mem(regF dst, memory src) %{ 2268 predicate((UseSSE>=1) && (UseAVX == 0)); 2269 match(Set dst (SubF dst (LoadF src))); 2270 2271 format %{ "subss $dst, $src" %} 2272 ins_cost(150); 2273 ins_encode %{ 2274 __ subss($dst$$XMMRegister, $src$$Address); 2275 %} 2276 ins_pipe(pipe_slow); 2277 %} 2278 2279 instruct subF_imm(regF dst, immF con) %{ 2280 predicate((UseSSE>=1) && (UseAVX == 0)); 2281 match(Set dst (SubF dst con)); 2282 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2283 ins_cost(150); 2284 ins_encode %{ 2285 __ subss($dst$$XMMRegister, $constantaddress($con)); 2286 %} 2287 ins_pipe(pipe_slow); 2288 %} 2289 2290 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{ 2291 predicate(UseAVX > 0); 2292 match(Set dst (SubF src1 src2)); 2293 2294 format %{ "vsubss $dst, $src1, $src2" %} 2295 ins_cost(150); 2296 ins_encode %{ 2297 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2298 %} 2299 ins_pipe(pipe_slow); 2300 %} 2301 2302 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{ 2303 predicate(UseAVX > 0); 2304 match(Set dst (SubF src1 (LoadF src2))); 2305 2306 format %{ "vsubss $dst, $src1, $src2" %} 2307 ins_cost(150); 2308 ins_encode %{ 2309 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2310 %} 2311 ins_pipe(pipe_slow); 2312 %} 2313 2314 instruct subF_reg_imm(regF dst, regF src, immF con) %{ 2315 predicate(UseAVX > 0); 2316 match(Set dst (SubF src con)); 2317 2318 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2319 ins_cost(150); 2320 ins_encode %{ 2321 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2322 %} 2323 ins_pipe(pipe_slow); 2324 %} 2325 2326 instruct subD_reg(regD dst, regD src) %{ 2327 predicate((UseSSE>=2) && (UseAVX == 0)); 2328 match(Set dst (SubD dst src)); 2329 2330 format %{ "subsd $dst, $src" %} 2331 ins_cost(150); 2332 ins_encode %{ 2333 __ subsd($dst$$XMMRegister, $src$$XMMRegister); 2334 %} 2335 ins_pipe(pipe_slow); 2336 %} 2337 2338 instruct subD_mem(regD dst, memory src) %{ 2339 predicate((UseSSE>=2) && (UseAVX == 0)); 2340 match(Set dst (SubD dst (LoadD src))); 2341 2342 format %{ "subsd $dst, $src" %} 2343 ins_cost(150); 2344 ins_encode %{ 2345 __ subsd($dst$$XMMRegister, $src$$Address); 2346 %} 2347 ins_pipe(pipe_slow); 2348 %} 2349 2350 instruct subD_imm(regD dst, immD con) %{ 2351 predicate((UseSSE>=2) && (UseAVX == 0)); 2352 match(Set dst (SubD dst con)); 2353 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2354 ins_cost(150); 2355 ins_encode %{ 2356 __ subsd($dst$$XMMRegister, $constantaddress($con)); 2357 %} 2358 ins_pipe(pipe_slow); 2359 %} 2360 2361 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{ 2362 predicate(UseAVX > 0); 2363 match(Set dst (SubD src1 src2)); 2364 2365 format %{ "vsubsd $dst, $src1, $src2" %} 2366 ins_cost(150); 2367 ins_encode %{ 2368 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2369 %} 2370 ins_pipe(pipe_slow); 2371 %} 2372 2373 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{ 2374 predicate(UseAVX > 0); 2375 match(Set dst (SubD src1 (LoadD src2))); 2376 2377 format %{ "vsubsd $dst, $src1, $src2" %} 2378 ins_cost(150); 2379 ins_encode %{ 2380 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2381 %} 2382 ins_pipe(pipe_slow); 2383 %} 2384 2385 instruct subD_reg_imm(regD dst, regD src, immD con) %{ 2386 predicate(UseAVX > 0); 2387 match(Set dst (SubD src con)); 2388 2389 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2390 ins_cost(150); 2391 ins_encode %{ 2392 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2393 %} 2394 ins_pipe(pipe_slow); 2395 %} 2396 2397 instruct mulF_reg(regF dst, regF src) %{ 2398 predicate((UseSSE>=1) && (UseAVX == 0)); 2399 match(Set dst (MulF dst src)); 2400 2401 format %{ "mulss $dst, $src" %} 2402 ins_cost(150); 2403 ins_encode %{ 2404 __ mulss($dst$$XMMRegister, $src$$XMMRegister); 2405 %} 2406 ins_pipe(pipe_slow); 2407 %} 2408 2409 instruct mulF_mem(regF dst, memory src) %{ 2410 predicate((UseSSE>=1) && (UseAVX == 0)); 2411 match(Set dst (MulF dst (LoadF src))); 2412 2413 format %{ "mulss $dst, $src" %} 2414 ins_cost(150); 2415 ins_encode %{ 2416 __ mulss($dst$$XMMRegister, $src$$Address); 2417 %} 2418 ins_pipe(pipe_slow); 2419 %} 2420 2421 instruct mulF_imm(regF dst, immF con) %{ 2422 predicate((UseSSE>=1) && (UseAVX == 0)); 2423 match(Set dst (MulF dst con)); 2424 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2425 ins_cost(150); 2426 ins_encode %{ 2427 __ mulss($dst$$XMMRegister, $constantaddress($con)); 2428 %} 2429 ins_pipe(pipe_slow); 2430 %} 2431 2432 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{ 2433 predicate(UseAVX > 0); 2434 match(Set dst (MulF src1 src2)); 2435 2436 format %{ "vmulss $dst, $src1, $src2" %} 2437 ins_cost(150); 2438 ins_encode %{ 2439 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2440 %} 2441 ins_pipe(pipe_slow); 2442 %} 2443 2444 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{ 2445 predicate(UseAVX > 0); 2446 match(Set dst (MulF src1 (LoadF src2))); 2447 2448 format %{ "vmulss $dst, $src1, $src2" %} 2449 ins_cost(150); 2450 ins_encode %{ 2451 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2452 %} 2453 ins_pipe(pipe_slow); 2454 %} 2455 2456 instruct mulF_reg_imm(regF dst, regF src, immF con) %{ 2457 predicate(UseAVX > 0); 2458 match(Set dst (MulF src con)); 2459 2460 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2461 ins_cost(150); 2462 ins_encode %{ 2463 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2464 %} 2465 ins_pipe(pipe_slow); 2466 %} 2467 2468 instruct mulD_reg(regD dst, regD src) %{ 2469 predicate((UseSSE>=2) && (UseAVX == 0)); 2470 match(Set dst (MulD dst src)); 2471 2472 format %{ "mulsd $dst, $src" %} 2473 ins_cost(150); 2474 ins_encode %{ 2475 __ mulsd($dst$$XMMRegister, $src$$XMMRegister); 2476 %} 2477 ins_pipe(pipe_slow); 2478 %} 2479 2480 instruct mulD_mem(regD dst, memory src) %{ 2481 predicate((UseSSE>=2) && (UseAVX == 0)); 2482 match(Set dst (MulD dst (LoadD src))); 2483 2484 format %{ "mulsd $dst, $src" %} 2485 ins_cost(150); 2486 ins_encode %{ 2487 __ mulsd($dst$$XMMRegister, $src$$Address); 2488 %} 2489 ins_pipe(pipe_slow); 2490 %} 2491 2492 instruct mulD_imm(regD dst, immD con) %{ 2493 predicate((UseSSE>=2) && (UseAVX == 0)); 2494 match(Set dst (MulD dst con)); 2495 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2496 ins_cost(150); 2497 ins_encode %{ 2498 __ mulsd($dst$$XMMRegister, $constantaddress($con)); 2499 %} 2500 ins_pipe(pipe_slow); 2501 %} 2502 2503 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{ 2504 predicate(UseAVX > 0); 2505 match(Set dst (MulD src1 src2)); 2506 2507 format %{ "vmulsd $dst, $src1, $src2" %} 2508 ins_cost(150); 2509 ins_encode %{ 2510 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2511 %} 2512 ins_pipe(pipe_slow); 2513 %} 2514 2515 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{ 2516 predicate(UseAVX > 0); 2517 match(Set dst (MulD src1 (LoadD src2))); 2518 2519 format %{ "vmulsd $dst, $src1, $src2" %} 2520 ins_cost(150); 2521 ins_encode %{ 2522 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2523 %} 2524 ins_pipe(pipe_slow); 2525 %} 2526 2527 instruct mulD_reg_imm(regD dst, regD src, immD con) %{ 2528 predicate(UseAVX > 0); 2529 match(Set dst (MulD src con)); 2530 2531 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2532 ins_cost(150); 2533 ins_encode %{ 2534 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2535 %} 2536 ins_pipe(pipe_slow); 2537 %} 2538 2539 instruct divF_reg(regF dst, regF src) %{ 2540 predicate((UseSSE>=1) && (UseAVX == 0)); 2541 match(Set dst (DivF dst src)); 2542 2543 format %{ "divss $dst, $src" %} 2544 ins_cost(150); 2545 ins_encode %{ 2546 __ divss($dst$$XMMRegister, $src$$XMMRegister); 2547 %} 2548 ins_pipe(pipe_slow); 2549 %} 2550 2551 instruct divF_mem(regF dst, memory src) %{ 2552 predicate((UseSSE>=1) && (UseAVX == 0)); 2553 match(Set dst (DivF dst (LoadF src))); 2554 2555 format %{ "divss $dst, $src" %} 2556 ins_cost(150); 2557 ins_encode %{ 2558 __ divss($dst$$XMMRegister, $src$$Address); 2559 %} 2560 ins_pipe(pipe_slow); 2561 %} 2562 2563 instruct divF_imm(regF dst, immF con) %{ 2564 predicate((UseSSE>=1) && (UseAVX == 0)); 2565 match(Set dst (DivF dst con)); 2566 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2567 ins_cost(150); 2568 ins_encode %{ 2569 __ divss($dst$$XMMRegister, $constantaddress($con)); 2570 %} 2571 ins_pipe(pipe_slow); 2572 %} 2573 2574 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{ 2575 predicate(UseAVX > 0); 2576 match(Set dst (DivF src1 src2)); 2577 2578 format %{ "vdivss $dst, $src1, $src2" %} 2579 ins_cost(150); 2580 ins_encode %{ 2581 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2582 %} 2583 ins_pipe(pipe_slow); 2584 %} 2585 2586 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{ 2587 predicate(UseAVX > 0); 2588 match(Set dst (DivF src1 (LoadF src2))); 2589 2590 format %{ "vdivss $dst, $src1, $src2" %} 2591 ins_cost(150); 2592 ins_encode %{ 2593 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2594 %} 2595 ins_pipe(pipe_slow); 2596 %} 2597 2598 instruct divF_reg_imm(regF dst, regF src, immF con) %{ 2599 predicate(UseAVX > 0); 2600 match(Set dst (DivF src con)); 2601 2602 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2603 ins_cost(150); 2604 ins_encode %{ 2605 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2606 %} 2607 ins_pipe(pipe_slow); 2608 %} 2609 2610 instruct divD_reg(regD dst, regD src) %{ 2611 predicate((UseSSE>=2) && (UseAVX == 0)); 2612 match(Set dst (DivD dst src)); 2613 2614 format %{ "divsd $dst, $src" %} 2615 ins_cost(150); 2616 ins_encode %{ 2617 __ divsd($dst$$XMMRegister, $src$$XMMRegister); 2618 %} 2619 ins_pipe(pipe_slow); 2620 %} 2621 2622 instruct divD_mem(regD dst, memory src) %{ 2623 predicate((UseSSE>=2) && (UseAVX == 0)); 2624 match(Set dst (DivD dst (LoadD src))); 2625 2626 format %{ "divsd $dst, $src" %} 2627 ins_cost(150); 2628 ins_encode %{ 2629 __ divsd($dst$$XMMRegister, $src$$Address); 2630 %} 2631 ins_pipe(pipe_slow); 2632 %} 2633 2634 instruct divD_imm(regD dst, immD con) %{ 2635 predicate((UseSSE>=2) && (UseAVX == 0)); 2636 match(Set dst (DivD dst con)); 2637 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2638 ins_cost(150); 2639 ins_encode %{ 2640 __ divsd($dst$$XMMRegister, $constantaddress($con)); 2641 %} 2642 ins_pipe(pipe_slow); 2643 %} 2644 2645 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{ 2646 predicate(UseAVX > 0); 2647 match(Set dst (DivD src1 src2)); 2648 2649 format %{ "vdivsd $dst, $src1, $src2" %} 2650 ins_cost(150); 2651 ins_encode %{ 2652 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2653 %} 2654 ins_pipe(pipe_slow); 2655 %} 2656 2657 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{ 2658 predicate(UseAVX > 0); 2659 match(Set dst (DivD src1 (LoadD src2))); 2660 2661 format %{ "vdivsd $dst, $src1, $src2" %} 2662 ins_cost(150); 2663 ins_encode %{ 2664 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2665 %} 2666 ins_pipe(pipe_slow); 2667 %} 2668 2669 instruct divD_reg_imm(regD dst, regD src, immD con) %{ 2670 predicate(UseAVX > 0); 2671 match(Set dst (DivD src con)); 2672 2673 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2674 ins_cost(150); 2675 ins_encode %{ 2676 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2677 %} 2678 ins_pipe(pipe_slow); 2679 %} 2680 2681 instruct absF_reg(regF dst) %{ 2682 predicate((UseSSE>=1) && (UseAVX == 0)); 2683 match(Set dst (AbsF dst)); 2684 ins_cost(150); 2685 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %} 2686 ins_encode %{ 2687 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask())); 2688 %} 2689 ins_pipe(pipe_slow); 2690 %} 2691 2692 instruct absF_reg_reg(vlRegF dst, vlRegF src) %{ 2693 predicate(UseAVX > 0); 2694 match(Set dst (AbsF src)); 2695 ins_cost(150); 2696 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 2697 ins_encode %{ 2698 int vector_len = 0; 2699 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 2700 ExternalAddress(float_signmask()), vector_len); 2701 %} 2702 ins_pipe(pipe_slow); 2703 %} 2704 2705 instruct absD_reg(regD dst) %{ 2706 predicate((UseSSE>=2) && (UseAVX == 0)); 2707 match(Set dst (AbsD dst)); 2708 ins_cost(150); 2709 format %{ "andpd $dst, [0x7fffffffffffffff]\t" 2710 "# abs double by sign masking" %} 2711 ins_encode %{ 2712 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask())); 2713 %} 2714 ins_pipe(pipe_slow); 2715 %} 2716 2717 instruct absD_reg_reg(vlRegD dst, vlRegD src) %{ 2718 predicate(UseAVX > 0); 2719 match(Set dst (AbsD src)); 2720 ins_cost(150); 2721 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 2722 "# abs double by sign masking" %} 2723 ins_encode %{ 2724 int vector_len = 0; 2725 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 2726 ExternalAddress(double_signmask()), vector_len); 2727 %} 2728 ins_pipe(pipe_slow); 2729 %} 2730 2731 instruct negF_reg(regF dst) %{ 2732 predicate((UseSSE>=1) && (UseAVX == 0)); 2733 match(Set dst (NegF dst)); 2734 ins_cost(150); 2735 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %} 2736 ins_encode %{ 2737 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip())); 2738 %} 2739 ins_pipe(pipe_slow); 2740 %} 2741 2742 instruct negF_reg_reg(vlRegF dst, vlRegF src) %{ 2743 predicate(UseAVX > 0); 2744 match(Set dst (NegF src)); 2745 ins_cost(150); 2746 format %{ "vnegatess $dst, $src, [0x80000000]\t# neg float by sign flipping" %} 2747 ins_encode %{ 2748 __ vnegatess($dst$$XMMRegister, $src$$XMMRegister, 2749 ExternalAddress(float_signflip())); 2750 %} 2751 ins_pipe(pipe_slow); 2752 %} 2753 2754 instruct negD_reg(regD dst) %{ 2755 predicate((UseSSE>=2) && (UseAVX == 0)); 2756 match(Set dst (NegD dst)); 2757 ins_cost(150); 2758 format %{ "xorpd $dst, [0x8000000000000000]\t" 2759 "# neg double by sign flipping" %} 2760 ins_encode %{ 2761 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip())); 2762 %} 2763 ins_pipe(pipe_slow); 2764 %} 2765 2766 instruct negD_reg_reg(vlRegD dst, vlRegD src) %{ 2767 predicate(UseAVX > 0); 2768 match(Set dst (NegD src)); 2769 ins_cost(150); 2770 format %{ "vnegatesd $dst, $src, [0x8000000000000000]\t" 2771 "# neg double by sign flipping" %} 2772 ins_encode %{ 2773 __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister, 2774 ExternalAddress(double_signflip())); 2775 %} 2776 ins_pipe(pipe_slow); 2777 %} 2778 2779 instruct sqrtF_reg(regF dst, regF src) %{ 2780 predicate(UseSSE>=1); 2781 match(Set dst (SqrtF src)); 2782 2783 format %{ "sqrtss $dst, $src" %} 2784 ins_cost(150); 2785 ins_encode %{ 2786 __ sqrtss($dst$$XMMRegister, $src$$XMMRegister); 2787 %} 2788 ins_pipe(pipe_slow); 2789 %} 2790 2791 instruct sqrtF_mem(regF dst, memory src) %{ 2792 predicate(UseSSE>=1); 2793 match(Set dst (SqrtF (LoadF src))); 2794 2795 format %{ "sqrtss $dst, $src" %} 2796 ins_cost(150); 2797 ins_encode %{ 2798 __ sqrtss($dst$$XMMRegister, $src$$Address); 2799 %} 2800 ins_pipe(pipe_slow); 2801 %} 2802 2803 instruct sqrtF_imm(regF dst, immF con) %{ 2804 predicate(UseSSE>=1); 2805 match(Set dst (SqrtF con)); 2806 2807 format %{ "sqrtss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2808 ins_cost(150); 2809 ins_encode %{ 2810 __ sqrtss($dst$$XMMRegister, $constantaddress($con)); 2811 %} 2812 ins_pipe(pipe_slow); 2813 %} 2814 2815 instruct sqrtD_reg(regD dst, regD src) %{ 2816 predicate(UseSSE>=2); 2817 match(Set dst (SqrtD src)); 2818 2819 format %{ "sqrtsd $dst, $src" %} 2820 ins_cost(150); 2821 ins_encode %{ 2822 __ sqrtsd($dst$$XMMRegister, $src$$XMMRegister); 2823 %} 2824 ins_pipe(pipe_slow); 2825 %} 2826 2827 instruct sqrtD_mem(regD dst, memory src) %{ 2828 predicate(UseSSE>=2); 2829 match(Set dst (SqrtD (LoadD src))); 2830 2831 format %{ "sqrtsd $dst, $src" %} 2832 ins_cost(150); 2833 ins_encode %{ 2834 __ sqrtsd($dst$$XMMRegister, $src$$Address); 2835 %} 2836 ins_pipe(pipe_slow); 2837 %} 2838 2839 instruct sqrtD_imm(regD dst, immD con) %{ 2840 predicate(UseSSE>=2); 2841 match(Set dst (SqrtD con)); 2842 format %{ "sqrtsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2843 ins_cost(150); 2844 ins_encode %{ 2845 __ sqrtsd($dst$$XMMRegister, $constantaddress($con)); 2846 %} 2847 ins_pipe(pipe_slow); 2848 %} 2849 2850 instruct onspinwait() %{ 2851 match(OnSpinWait); 2852 ins_cost(200); 2853 2854 format %{ 2855 $$template 2856 $$emit$$"pause\t! membar_onspinwait" 2857 %} 2858 ins_encode %{ 2859 __ pause(); 2860 %} 2861 ins_pipe(pipe_slow); 2862 %} 2863 2864 // a * b + c 2865 instruct fmaD_reg(regD a, regD b, regD c) %{ 2866 predicate(UseFMA); 2867 match(Set c (FmaD c (Binary a b))); 2868 format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %} 2869 ins_cost(150); 2870 ins_encode %{ 2871 __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 2872 %} 2873 ins_pipe( pipe_slow ); 2874 %} 2875 2876 // a * b + c 2877 instruct fmaF_reg(regF a, regF b, regF c) %{ 2878 predicate(UseFMA); 2879 match(Set c (FmaF c (Binary a b))); 2880 format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %} 2881 ins_cost(150); 2882 ins_encode %{ 2883 __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 2884 %} 2885 ins_pipe( pipe_slow ); 2886 %} 2887 2888 // ====================VECTOR INSTRUCTIONS===================================== 2889 2890 2891 // Load vectors (4 bytes long) 2892 instruct loadV4(vecS dst, memory mem) %{ 2893 predicate(n->as_LoadVector()->memory_size() == 4); 2894 match(Set dst (LoadVector mem)); 2895 ins_cost(125); 2896 format %{ "movd $dst,$mem\t! load vector (4 bytes)" %} 2897 ins_encode %{ 2898 __ movdl($dst$$XMMRegister, $mem$$Address); 2899 %} 2900 ins_pipe( pipe_slow ); 2901 %} 2902 2903 // Load vectors (4 bytes long) 2904 instruct MoveVecS2Leg(legVecS dst, vecS src) %{ 2905 match(Set dst src); 2906 format %{ "movss $dst,$src\t! load vector (4 bytes)" %} 2907 ins_encode %{ 2908 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 2909 %} 2910 ins_pipe( fpu_reg_reg ); 2911 %} 2912 2913 // Load vectors (4 bytes long) 2914 instruct MoveLeg2VecS(vecS dst, legVecS src) %{ 2915 match(Set dst src); 2916 format %{ "movss $dst,$src\t! load vector (4 bytes)" %} 2917 ins_encode %{ 2918 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 2919 %} 2920 ins_pipe( fpu_reg_reg ); 2921 %} 2922 2923 // Load vectors (8 bytes long) 2924 instruct loadV8(vecD dst, memory mem) %{ 2925 predicate(n->as_LoadVector()->memory_size() == 8); 2926 match(Set dst (LoadVector mem)); 2927 ins_cost(125); 2928 format %{ "movq $dst,$mem\t! load vector (8 bytes)" %} 2929 ins_encode %{ 2930 __ movq($dst$$XMMRegister, $mem$$Address); 2931 %} 2932 ins_pipe( pipe_slow ); 2933 %} 2934 2935 // Load vectors (8 bytes long) 2936 instruct MoveVecD2Leg(legVecD dst, vecD src) %{ 2937 match(Set dst src); 2938 format %{ "movsd $dst,$src\t! load vector (8 bytes)" %} 2939 ins_encode %{ 2940 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 2941 %} 2942 ins_pipe( fpu_reg_reg ); 2943 %} 2944 2945 // Load vectors (8 bytes long) 2946 instruct MoveLeg2VecD(vecD dst, legVecD src) %{ 2947 match(Set dst src); 2948 format %{ "movsd $dst,$src\t! load vector (8 bytes)" %} 2949 ins_encode %{ 2950 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 2951 %} 2952 ins_pipe( fpu_reg_reg ); 2953 %} 2954 2955 // Load vectors (16 bytes long) 2956 instruct loadV16(vecX dst, memory mem) %{ 2957 predicate(n->as_LoadVector()->memory_size() == 16); 2958 match(Set dst (LoadVector mem)); 2959 ins_cost(125); 2960 format %{ "movdqu $dst,$mem\t! load vector (16 bytes)" %} 2961 ins_encode %{ 2962 __ movdqu($dst$$XMMRegister, $mem$$Address); 2963 %} 2964 ins_pipe( pipe_slow ); 2965 %} 2966 2967 // Load vectors (16 bytes long) 2968 instruct MoveVecX2Leg(legVecX dst, vecX src) %{ 2969 match(Set dst src); 2970 format %{ "movdqu $dst,$src\t! load vector (16 bytes)" %} 2971 ins_encode %{ 2972 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) { 2973 int vector_len = 2; 2974 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 2975 } else { 2976 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 2977 } 2978 %} 2979 ins_pipe( fpu_reg_reg ); 2980 %} 2981 2982 // Load vectors (16 bytes long) 2983 instruct MoveLeg2VecX(vecX dst, legVecX src) %{ 2984 match(Set dst src); 2985 format %{ "movdqu $dst,$src\t! load vector (16 bytes)" %} 2986 ins_encode %{ 2987 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) { 2988 int vector_len = 2; 2989 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 2990 } else { 2991 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 2992 } 2993 %} 2994 ins_pipe( fpu_reg_reg ); 2995 %} 2996 2997 // Load vectors (32 bytes long) 2998 instruct loadV32(vecY dst, memory mem) %{ 2999 predicate(n->as_LoadVector()->memory_size() == 32); 3000 match(Set dst (LoadVector mem)); 3001 ins_cost(125); 3002 format %{ "vmovdqu $dst,$mem\t! load vector (32 bytes)" %} 3003 ins_encode %{ 3004 __ vmovdqu($dst$$XMMRegister, $mem$$Address); 3005 %} 3006 ins_pipe( pipe_slow ); 3007 %} 3008 3009 // Load vectors (32 bytes long) 3010 instruct MoveVecY2Leg(legVecY dst, vecY src) %{ 3011 match(Set dst src); 3012 format %{ "vmovdqu $dst,$src\t! load vector (32 bytes)" %} 3013 ins_encode %{ 3014 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) { 3015 int vector_len = 2; 3016 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 3017 } else { 3018 __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); 3019 } 3020 %} 3021 ins_pipe( fpu_reg_reg ); 3022 %} 3023 3024 // Load vectors (32 bytes long) 3025 instruct MoveLeg2VecY(vecY dst, legVecY src) %{ 3026 match(Set dst src); 3027 format %{ "vmovdqu $dst,$src\t! load vector (32 bytes)" %} 3028 ins_encode %{ 3029 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) { 3030 int vector_len = 2; 3031 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 3032 } else { 3033 __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); 3034 } 3035 %} 3036 ins_pipe( fpu_reg_reg ); 3037 %} 3038 3039 // Load vectors (64 bytes long) 3040 instruct loadV64_dword(vecZ dst, memory mem) %{ 3041 predicate(n->as_LoadVector()->memory_size() == 64 && n->as_LoadVector()->element_size() <= 4); 3042 match(Set dst (LoadVector mem)); 3043 ins_cost(125); 3044 format %{ "vmovdqul $dst k0,$mem\t! load vector (64 bytes)" %} 3045 ins_encode %{ 3046 int vector_len = 2; 3047 __ evmovdqul($dst$$XMMRegister, $mem$$Address, vector_len); 3048 %} 3049 ins_pipe( pipe_slow ); 3050 %} 3051 3052 // Load vectors (64 bytes long) 3053 instruct loadV64_qword(vecZ dst, memory mem) %{ 3054 predicate(n->as_LoadVector()->memory_size() == 64 && n->as_LoadVector()->element_size() > 4); 3055 match(Set dst (LoadVector mem)); 3056 ins_cost(125); 3057 format %{ "vmovdquq $dst k0,$mem\t! load vector (64 bytes)" %} 3058 ins_encode %{ 3059 int vector_len = 2; 3060 __ evmovdquq($dst$$XMMRegister, $mem$$Address, vector_len); 3061 %} 3062 ins_pipe( pipe_slow ); 3063 %} 3064 3065 instruct MoveVecZ2Leg(legVecZ dst, vecZ src) %{ 3066 match(Set dst src); 3067 format %{ "vmovdquq $dst k0,$src\t! Move vector (64 bytes)" %} 3068 ins_encode %{ 3069 int vector_len = 2; 3070 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 3071 %} 3072 ins_pipe( fpu_reg_reg ); 3073 %} 3074 3075 instruct MoveLeg2VecZ(vecZ dst, legVecZ src) %{ 3076 match(Set dst src); 3077 format %{ "vmovdquq $dst k0,$src\t! Move vector (64 bytes)" %} 3078 ins_encode %{ 3079 int vector_len = 2; 3080 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 3081 %} 3082 ins_pipe( fpu_reg_reg ); 3083 %} 3084 3085 // Store vectors 3086 instruct storeV4(memory mem, vecS src) %{ 3087 predicate(n->as_StoreVector()->memory_size() == 4); 3088 match(Set mem (StoreVector mem src)); 3089 ins_cost(145); 3090 format %{ "movd $mem,$src\t! store vector (4 bytes)" %} 3091 ins_encode %{ 3092 __ movdl($mem$$Address, $src$$XMMRegister); 3093 %} 3094 ins_pipe( pipe_slow ); 3095 %} 3096 3097 instruct storeV8(memory mem, vecD src) %{ 3098 predicate(n->as_StoreVector()->memory_size() == 8); 3099 match(Set mem (StoreVector mem src)); 3100 ins_cost(145); 3101 format %{ "movq $mem,$src\t! store vector (8 bytes)" %} 3102 ins_encode %{ 3103 __ movq($mem$$Address, $src$$XMMRegister); 3104 %} 3105 ins_pipe( pipe_slow ); 3106 %} 3107 3108 instruct storeV16(memory mem, vecX src) %{ 3109 predicate(n->as_StoreVector()->memory_size() == 16); 3110 match(Set mem (StoreVector mem src)); 3111 ins_cost(145); 3112 format %{ "movdqu $mem,$src\t! store vector (16 bytes)" %} 3113 ins_encode %{ 3114 __ movdqu($mem$$Address, $src$$XMMRegister); 3115 %} 3116 ins_pipe( pipe_slow ); 3117 %} 3118 3119 instruct storeV32(memory mem, vecY src) %{ 3120 predicate(n->as_StoreVector()->memory_size() == 32); 3121 match(Set mem (StoreVector mem src)); 3122 ins_cost(145); 3123 format %{ "vmovdqu $mem,$src\t! store vector (32 bytes)" %} 3124 ins_encode %{ 3125 __ vmovdqu($mem$$Address, $src$$XMMRegister); 3126 %} 3127 ins_pipe( pipe_slow ); 3128 %} 3129 3130 instruct storeV64_dword(memory mem, vecZ src) %{ 3131 predicate(n->as_StoreVector()->memory_size() == 64 && n->as_StoreVector()->element_size() <= 4); 3132 match(Set mem (StoreVector mem src)); 3133 ins_cost(145); 3134 format %{ "vmovdqul $mem k0,$src\t! store vector (64 bytes)" %} 3135 ins_encode %{ 3136 int vector_len = 2; 3137 __ evmovdqul($mem$$Address, $src$$XMMRegister, vector_len); 3138 %} 3139 ins_pipe( pipe_slow ); 3140 %} 3141 3142 instruct storeV64_qword(memory mem, vecZ src) %{ 3143 predicate(n->as_StoreVector()->memory_size() == 64 && n->as_StoreVector()->element_size() > 4); 3144 match(Set mem (StoreVector mem src)); 3145 ins_cost(145); 3146 format %{ "vmovdquq $mem k0,$src\t! store vector (64 bytes)" %} 3147 ins_encode %{ 3148 int vector_len = 2; 3149 __ evmovdquq($mem$$Address, $src$$XMMRegister, vector_len); 3150 %} 3151 ins_pipe( pipe_slow ); 3152 %} 3153 3154 // ====================LEGACY REPLICATE======================================= 3155 3156 instruct Repl4B_mem(vecS dst, memory mem) %{ 3157 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3158 match(Set dst (ReplicateB (LoadB mem))); 3159 format %{ "punpcklbw $dst,$mem\n\t" 3160 "pshuflw $dst,$dst,0x00\t! replicate4B" %} 3161 ins_encode %{ 3162 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 3163 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3164 %} 3165 ins_pipe( pipe_slow ); 3166 %} 3167 3168 instruct Repl8B_mem(vecD dst, memory mem) %{ 3169 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3170 match(Set dst (ReplicateB (LoadB mem))); 3171 format %{ "punpcklbw $dst,$mem\n\t" 3172 "pshuflw $dst,$dst,0x00\t! replicate8B" %} 3173 ins_encode %{ 3174 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 3175 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3176 %} 3177 ins_pipe( pipe_slow ); 3178 %} 3179 3180 instruct Repl16B(vecX dst, rRegI src) %{ 3181 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3182 match(Set dst (ReplicateB src)); 3183 format %{ "movd $dst,$src\n\t" 3184 "punpcklbw $dst,$dst\n\t" 3185 "pshuflw $dst,$dst,0x00\n\t" 3186 "punpcklqdq $dst,$dst\t! replicate16B" %} 3187 ins_encode %{ 3188 __ movdl($dst$$XMMRegister, $src$$Register); 3189 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3190 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3191 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3192 %} 3193 ins_pipe( pipe_slow ); 3194 %} 3195 3196 instruct Repl16B_mem(vecX dst, memory mem) %{ 3197 predicate(n->as_Vector()->length() == 16 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3198 match(Set dst (ReplicateB (LoadB mem))); 3199 format %{ "punpcklbw $dst,$mem\n\t" 3200 "pshuflw $dst,$dst,0x00\n\t" 3201 "punpcklqdq $dst,$dst\t! replicate16B" %} 3202 ins_encode %{ 3203 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 3204 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3205 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3206 %} 3207 ins_pipe( pipe_slow ); 3208 %} 3209 3210 instruct Repl32B(vecY dst, rRegI src) %{ 3211 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3212 match(Set dst (ReplicateB src)); 3213 format %{ "movd $dst,$src\n\t" 3214 "punpcklbw $dst,$dst\n\t" 3215 "pshuflw $dst,$dst,0x00\n\t" 3216 "punpcklqdq $dst,$dst\n\t" 3217 "vinserti128_high $dst,$dst\t! replicate32B" %} 3218 ins_encode %{ 3219 __ movdl($dst$$XMMRegister, $src$$Register); 3220 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3221 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3222 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3223 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3224 %} 3225 ins_pipe( pipe_slow ); 3226 %} 3227 3228 instruct Repl32B_mem(vecY dst, memory mem) %{ 3229 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3230 match(Set dst (ReplicateB (LoadB mem))); 3231 format %{ "punpcklbw $dst,$mem\n\t" 3232 "pshuflw $dst,$dst,0x00\n\t" 3233 "punpcklqdq $dst,$dst\n\t" 3234 "vinserti128_high $dst,$dst\t! replicate32B" %} 3235 ins_encode %{ 3236 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 3237 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3238 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3239 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3240 %} 3241 ins_pipe( pipe_slow ); 3242 %} 3243 3244 instruct Repl64B(legVecZ dst, rRegI src) %{ 3245 predicate(n->as_Vector()->length() == 64 && !VM_Version::supports_avx512vlbw()); 3246 match(Set dst (ReplicateB src)); 3247 format %{ "movd $dst,$src\n\t" 3248 "punpcklbw $dst,$dst\n\t" 3249 "pshuflw $dst,$dst,0x00\n\t" 3250 "punpcklqdq $dst,$dst\n\t" 3251 "vinserti128_high $dst,$dst\t" 3252 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate64B" %} 3253 ins_encode %{ 3254 __ movdl($dst$$XMMRegister, $src$$Register); 3255 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3256 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3257 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3258 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3259 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3260 %} 3261 ins_pipe( pipe_slow ); 3262 %} 3263 3264 instruct Repl64B_mem(legVecZ dst, memory mem) %{ 3265 predicate(n->as_Vector()->length() == 64 && !VM_Version::supports_avx512vlbw()); 3266 match(Set dst (ReplicateB (LoadB mem))); 3267 format %{ "punpcklbw $dst,$mem\n\t" 3268 "pshuflw $dst,$dst,0x00\n\t" 3269 "punpcklqdq $dst,$dst\n\t" 3270 "vinserti128_high $dst,$dst\t" 3271 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate64B" %} 3272 ins_encode %{ 3273 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 3274 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3275 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3276 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3277 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3278 %} 3279 ins_pipe( pipe_slow ); 3280 %} 3281 3282 instruct Repl16B_imm(vecX dst, immI con) %{ 3283 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3284 match(Set dst (ReplicateB con)); 3285 format %{ "movq $dst,[$constantaddress]\n\t" 3286 "punpcklqdq $dst,$dst\t! replicate16B($con)" %} 3287 ins_encode %{ 3288 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3289 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3290 %} 3291 ins_pipe( pipe_slow ); 3292 %} 3293 3294 instruct Repl32B_imm(vecY dst, immI con) %{ 3295 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3296 match(Set dst (ReplicateB con)); 3297 format %{ "movq $dst,[$constantaddress]\n\t" 3298 "punpcklqdq $dst,$dst\n\t" 3299 "vinserti128_high $dst,$dst\t! lreplicate32B($con)" %} 3300 ins_encode %{ 3301 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3302 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3303 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3304 %} 3305 ins_pipe( pipe_slow ); 3306 %} 3307 3308 instruct Repl64B_imm(legVecZ dst, immI con) %{ 3309 predicate(n->as_Vector()->length() == 64 && !VM_Version::supports_avx512vlbw()); 3310 match(Set dst (ReplicateB con)); 3311 format %{ "movq $dst,[$constantaddress]\n\t" 3312 "punpcklqdq $dst,$dst\n\t" 3313 "vinserti128_high $dst,$dst\t" 3314 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate64B($con)" %} 3315 ins_encode %{ 3316 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3317 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3318 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3319 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3320 %} 3321 ins_pipe( pipe_slow ); 3322 %} 3323 3324 instruct Repl4S(vecD dst, rRegI src) %{ 3325 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vlbw()); 3326 match(Set dst (ReplicateS src)); 3327 format %{ "movd $dst,$src\n\t" 3328 "pshuflw $dst,$dst,0x00\t! replicate4S" %} 3329 ins_encode %{ 3330 __ movdl($dst$$XMMRegister, $src$$Register); 3331 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3332 %} 3333 ins_pipe( pipe_slow ); 3334 %} 3335 3336 instruct Repl4S_mem(vecD dst, memory mem) %{ 3337 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3338 match(Set dst (ReplicateS (LoadS mem))); 3339 format %{ "pshuflw $dst,$mem,0x00\t! replicate4S" %} 3340 ins_encode %{ 3341 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3342 %} 3343 ins_pipe( pipe_slow ); 3344 %} 3345 3346 instruct Repl8S(vecX dst, rRegI src) %{ 3347 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vlbw()); 3348 match(Set dst (ReplicateS src)); 3349 format %{ "movd $dst,$src\n\t" 3350 "pshuflw $dst,$dst,0x00\n\t" 3351 "punpcklqdq $dst,$dst\t! replicate8S" %} 3352 ins_encode %{ 3353 __ movdl($dst$$XMMRegister, $src$$Register); 3354 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3355 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3356 %} 3357 ins_pipe( pipe_slow ); 3358 %} 3359 3360 instruct Repl8S_mem(vecX dst, memory mem) %{ 3361 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3362 match(Set dst (ReplicateS (LoadS mem))); 3363 format %{ "pshuflw $dst,$mem,0x00\n\t" 3364 "punpcklqdq $dst,$dst\t! replicate8S" %} 3365 ins_encode %{ 3366 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3367 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3368 %} 3369 ins_pipe( pipe_slow ); 3370 %} 3371 3372 instruct Repl8S_imm(vecX dst, immI con) %{ 3373 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vlbw()); 3374 match(Set dst (ReplicateS con)); 3375 format %{ "movq $dst,[$constantaddress]\n\t" 3376 "punpcklqdq $dst,$dst\t! replicate8S($con)" %} 3377 ins_encode %{ 3378 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3379 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3380 %} 3381 ins_pipe( pipe_slow ); 3382 %} 3383 3384 instruct Repl16S(vecY dst, rRegI src) %{ 3385 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3386 match(Set dst (ReplicateS src)); 3387 format %{ "movd $dst,$src\n\t" 3388 "pshuflw $dst,$dst,0x00\n\t" 3389 "punpcklqdq $dst,$dst\n\t" 3390 "vinserti128_high $dst,$dst\t! replicate16S" %} 3391 ins_encode %{ 3392 __ movdl($dst$$XMMRegister, $src$$Register); 3393 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3394 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3395 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3396 %} 3397 ins_pipe( pipe_slow ); 3398 %} 3399 3400 instruct Repl16S_mem(vecY dst, memory mem) %{ 3401 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3402 match(Set dst (ReplicateS (LoadS mem))); 3403 format %{ "pshuflw $dst,$mem,0x00\n\t" 3404 "punpcklqdq $dst,$dst\n\t" 3405 "vinserti128_high $dst,$dst\t! replicate16S" %} 3406 ins_encode %{ 3407 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3408 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3409 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3410 %} 3411 ins_pipe( pipe_slow ); 3412 %} 3413 3414 instruct Repl16S_imm(vecY dst, immI con) %{ 3415 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3416 match(Set dst (ReplicateS con)); 3417 format %{ "movq $dst,[$constantaddress]\n\t" 3418 "punpcklqdq $dst,$dst\n\t" 3419 "vinserti128_high $dst,$dst\t! replicate16S($con)" %} 3420 ins_encode %{ 3421 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3422 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3423 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3424 %} 3425 ins_pipe( pipe_slow ); 3426 %} 3427 3428 instruct Repl32S(legVecZ dst, rRegI src) %{ 3429 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3430 match(Set dst (ReplicateS src)); 3431 format %{ "movd $dst,$src\n\t" 3432 "pshuflw $dst,$dst,0x00\n\t" 3433 "punpcklqdq $dst,$dst\n\t" 3434 "vinserti128_high $dst,$dst\t" 3435 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate32S" %} 3436 ins_encode %{ 3437 __ movdl($dst$$XMMRegister, $src$$Register); 3438 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3439 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3440 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3441 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3442 %} 3443 ins_pipe( pipe_slow ); 3444 %} 3445 3446 instruct Repl32S_mem(legVecZ dst, memory mem) %{ 3447 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3448 match(Set dst (ReplicateS (LoadS mem))); 3449 format %{ "pshuflw $dst,$mem,0x00\n\t" 3450 "punpcklqdq $dst,$dst\n\t" 3451 "vinserti128_high $dst,$dst\t" 3452 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate32S" %} 3453 ins_encode %{ 3454 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3455 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3456 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3457 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3458 %} 3459 ins_pipe( pipe_slow ); 3460 %} 3461 3462 instruct Repl32S_imm(legVecZ dst, immI con) %{ 3463 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3464 match(Set dst (ReplicateS con)); 3465 format %{ "movq $dst,[$constantaddress]\n\t" 3466 "punpcklqdq $dst,$dst\n\t" 3467 "vinserti128_high $dst,$dst\t" 3468 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate32S($con)" %} 3469 ins_encode %{ 3470 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3471 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3472 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3473 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3474 %} 3475 ins_pipe( pipe_slow ); 3476 %} 3477 3478 instruct Repl4I(vecX dst, rRegI src) %{ 3479 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3480 match(Set dst (ReplicateI src)); 3481 format %{ "movd $dst,$src\n\t" 3482 "pshufd $dst,$dst,0x00\t! replicate4I" %} 3483 ins_encode %{ 3484 __ movdl($dst$$XMMRegister, $src$$Register); 3485 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3486 %} 3487 ins_pipe( pipe_slow ); 3488 %} 3489 3490 instruct Repl4I_mem(vecX dst, memory mem) %{ 3491 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3492 match(Set dst (ReplicateI (LoadI mem))); 3493 format %{ "pshufd $dst,$mem,0x00\t! replicate4I" %} 3494 ins_encode %{ 3495 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3496 %} 3497 ins_pipe( pipe_slow ); 3498 %} 3499 3500 instruct Repl8I(vecY dst, rRegI src) %{ 3501 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3502 match(Set dst (ReplicateI src)); 3503 format %{ "movd $dst,$src\n\t" 3504 "pshufd $dst,$dst,0x00\n\t" 3505 "vinserti128_high $dst,$dst\t! replicate8I" %} 3506 ins_encode %{ 3507 __ movdl($dst$$XMMRegister, $src$$Register); 3508 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3509 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3510 %} 3511 ins_pipe( pipe_slow ); 3512 %} 3513 3514 instruct Repl8I_mem(vecY dst, memory mem) %{ 3515 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3516 match(Set dst (ReplicateI (LoadI mem))); 3517 format %{ "pshufd $dst,$mem,0x00\n\t" 3518 "vinserti128_high $dst,$dst\t! replicate8I" %} 3519 ins_encode %{ 3520 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3521 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3522 %} 3523 ins_pipe( pipe_slow ); 3524 %} 3525 3526 instruct Repl16I(legVecZ dst, rRegI src) %{ 3527 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vl()); 3528 match(Set dst (ReplicateI src)); 3529 format %{ "movd $dst,$src\n\t" 3530 "pshufd $dst,$dst,0x00\n\t" 3531 "vinserti128_high $dst,$dst\t" 3532 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate16I" %} 3533 ins_encode %{ 3534 __ movdl($dst$$XMMRegister, $src$$Register); 3535 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3536 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3537 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3538 %} 3539 ins_pipe( pipe_slow ); 3540 %} 3541 3542 instruct Repl16I_mem(legVecZ dst, memory mem) %{ 3543 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vl()); 3544 match(Set dst (ReplicateI (LoadI mem))); 3545 format %{ "pshufd $dst,$mem,0x00\n\t" 3546 "vinserti128_high $dst,$dst\t" 3547 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate16I" %} 3548 ins_encode %{ 3549 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3550 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3551 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3552 %} 3553 ins_pipe( pipe_slow ); 3554 %} 3555 3556 instruct Repl4I_imm(vecX dst, immI con) %{ 3557 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3558 match(Set dst (ReplicateI con)); 3559 format %{ "movq $dst,[$constantaddress]\t! replicate4I($con)\n\t" 3560 "punpcklqdq $dst,$dst" %} 3561 ins_encode %{ 3562 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3563 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3564 %} 3565 ins_pipe( pipe_slow ); 3566 %} 3567 3568 instruct Repl8I_imm(vecY dst, immI con) %{ 3569 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3570 match(Set dst (ReplicateI con)); 3571 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 3572 "punpcklqdq $dst,$dst\n\t" 3573 "vinserti128_high $dst,$dst" %} 3574 ins_encode %{ 3575 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3576 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3577 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3578 %} 3579 ins_pipe( pipe_slow ); 3580 %} 3581 3582 instruct Repl16I_imm(legVecZ dst, immI con) %{ 3583 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vl()); 3584 match(Set dst (ReplicateI con)); 3585 format %{ "movq $dst,[$constantaddress]\t" 3586 "punpcklqdq $dst,$dst\n\t" 3587 "vinserti128_high $dst,$dst" 3588 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate16I($con)" %} 3589 ins_encode %{ 3590 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3591 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3592 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3593 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3594 %} 3595 ins_pipe( pipe_slow ); 3596 %} 3597 3598 // Long could be loaded into xmm register directly from memory. 3599 instruct Repl2L_mem(vecX dst, memory mem) %{ 3600 predicate(n->as_Vector()->length() == 2 && !VM_Version::supports_avx512vlbw()); 3601 match(Set dst (ReplicateL (LoadL mem))); 3602 format %{ "movq $dst,$mem\n\t" 3603 "punpcklqdq $dst,$dst\t! replicate2L" %} 3604 ins_encode %{ 3605 __ movq($dst$$XMMRegister, $mem$$Address); 3606 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3607 %} 3608 ins_pipe( pipe_slow ); 3609 %} 3610 3611 // Replicate long (8 byte) scalar to be vector 3612 #ifdef _LP64 3613 instruct Repl4L(vecY dst, rRegL src) %{ 3614 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3615 match(Set dst (ReplicateL src)); 3616 format %{ "movdq $dst,$src\n\t" 3617 "punpcklqdq $dst,$dst\n\t" 3618 "vinserti128_high $dst,$dst\t! replicate4L" %} 3619 ins_encode %{ 3620 __ movdq($dst$$XMMRegister, $src$$Register); 3621 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3622 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3623 %} 3624 ins_pipe( pipe_slow ); 3625 %} 3626 3627 instruct Repl8L(legVecZ dst, rRegL src) %{ 3628 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3629 match(Set dst (ReplicateL src)); 3630 format %{ "movdq $dst,$src\n\t" 3631 "punpcklqdq $dst,$dst\n\t" 3632 "vinserti128_high $dst,$dst\t" 3633 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate8L" %} 3634 ins_encode %{ 3635 __ movdq($dst$$XMMRegister, $src$$Register); 3636 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3637 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3638 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3639 %} 3640 ins_pipe( pipe_slow ); 3641 %} 3642 #else // _LP64 3643 instruct Repl4L(vecY dst, eRegL src, vecY tmp) %{ 3644 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3645 match(Set dst (ReplicateL src)); 3646 effect(TEMP dst, USE src, TEMP tmp); 3647 format %{ "movdl $dst,$src.lo\n\t" 3648 "movdl $tmp,$src.hi\n\t" 3649 "punpckldq $dst,$tmp\n\t" 3650 "punpcklqdq $dst,$dst\n\t" 3651 "vinserti128_high $dst,$dst\t! replicate4L" %} 3652 ins_encode %{ 3653 __ movdl($dst$$XMMRegister, $src$$Register); 3654 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3655 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3656 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3657 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3658 %} 3659 ins_pipe( pipe_slow ); 3660 %} 3661 3662 instruct Repl8L(legVecZ dst, eRegL src, legVecZ tmp) %{ 3663 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3664 match(Set dst (ReplicateL src)); 3665 effect(TEMP dst, USE src, TEMP tmp); 3666 format %{ "movdl $dst,$src.lo\n\t" 3667 "movdl $tmp,$src.hi\n\t" 3668 "punpckldq $dst,$tmp\n\t" 3669 "punpcklqdq $dst,$dst\n\t" 3670 "vinserti128_high $dst,$dst\t" 3671 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate8L" %} 3672 ins_encode %{ 3673 __ movdl($dst$$XMMRegister, $src$$Register); 3674 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3675 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3676 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3677 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3678 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3679 %} 3680 ins_pipe( pipe_slow ); 3681 %} 3682 #endif // _LP64 3683 3684 instruct Repl4L_imm(vecY dst, immL con) %{ 3685 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3686 match(Set dst (ReplicateL con)); 3687 format %{ "movq $dst,[$constantaddress]\n\t" 3688 "punpcklqdq $dst,$dst\n\t" 3689 "vinserti128_high $dst,$dst\t! replicate4L($con)" %} 3690 ins_encode %{ 3691 __ movq($dst$$XMMRegister, $constantaddress($con)); 3692 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3693 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3694 %} 3695 ins_pipe( pipe_slow ); 3696 %} 3697 3698 instruct Repl8L_imm(legVecZ dst, immL con) %{ 3699 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3700 match(Set dst (ReplicateL con)); 3701 format %{ "movq $dst,[$constantaddress]\n\t" 3702 "punpcklqdq $dst,$dst\n\t" 3703 "vinserti128_high $dst,$dst\t" 3704 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate8L($con)" %} 3705 ins_encode %{ 3706 __ movq($dst$$XMMRegister, $constantaddress($con)); 3707 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3708 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3709 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3710 %} 3711 ins_pipe( pipe_slow ); 3712 %} 3713 3714 instruct Repl4L_mem(vecY dst, memory mem) %{ 3715 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3716 match(Set dst (ReplicateL (LoadL mem))); 3717 format %{ "movq $dst,$mem\n\t" 3718 "punpcklqdq $dst,$dst\n\t" 3719 "vinserti128_high $dst,$dst\t! replicate4L" %} 3720 ins_encode %{ 3721 __ movq($dst$$XMMRegister, $mem$$Address); 3722 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3723 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3724 %} 3725 ins_pipe( pipe_slow ); 3726 %} 3727 3728 instruct Repl8L_mem(legVecZ dst, memory mem) %{ 3729 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3730 match(Set dst (ReplicateL (LoadL mem))); 3731 format %{ "movq $dst,$mem\n\t" 3732 "punpcklqdq $dst,$dst\n\t" 3733 "vinserti128_high $dst,$dst\t" 3734 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate8L" %} 3735 ins_encode %{ 3736 __ movq($dst$$XMMRegister, $mem$$Address); 3737 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3738 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3739 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3740 %} 3741 ins_pipe( pipe_slow ); 3742 %} 3743 3744 instruct Repl2F_mem(vecD dst, memory mem) %{ 3745 predicate(n->as_Vector()->length() == 2 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3746 match(Set dst (ReplicateF (LoadF mem))); 3747 format %{ "pshufd $dst,$mem,0x00\t! replicate2F" %} 3748 ins_encode %{ 3749 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3750 %} 3751 ins_pipe( pipe_slow ); 3752 %} 3753 3754 instruct Repl4F_mem(vecX dst, memory mem) %{ 3755 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3756 match(Set dst (ReplicateF (LoadF mem))); 3757 format %{ "pshufd $dst,$mem,0x00\t! replicate4F" %} 3758 ins_encode %{ 3759 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3760 %} 3761 ins_pipe( pipe_slow ); 3762 %} 3763 3764 instruct Repl8F(vecY dst, vlRegF src) %{ 3765 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3766 match(Set dst (ReplicateF src)); 3767 format %{ "pshufd $dst,$src,0x00\n\t" 3768 "vinsertf128_high $dst,$dst\t! replicate8F" %} 3769 ins_encode %{ 3770 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3771 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3772 %} 3773 ins_pipe( pipe_slow ); 3774 %} 3775 3776 instruct Repl8F_mem(vecY dst, memory mem) %{ 3777 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3778 match(Set dst (ReplicateF (LoadF mem))); 3779 format %{ "pshufd $dst,$mem,0x00\n\t" 3780 "vinsertf128_high $dst,$dst\t! replicate8F" %} 3781 ins_encode %{ 3782 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3783 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3784 %} 3785 ins_pipe( pipe_slow ); 3786 %} 3787 3788 instruct Repl16F(legVecZ dst, vlRegF src) %{ 3789 predicate(n->as_Vector()->length() == 16 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3790 match(Set dst (ReplicateF src)); 3791 format %{ "pshufd $dst,$src,0x00\n\t" 3792 "vinsertf128_high $dst,$dst\t" 3793 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate16F" %} 3794 ins_encode %{ 3795 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3796 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3797 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3798 %} 3799 ins_pipe( pipe_slow ); 3800 %} 3801 3802 instruct Repl16F_mem(legVecZ dst, memory mem) %{ 3803 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vl()); 3804 match(Set dst (ReplicateF (LoadF mem))); 3805 format %{ "pshufd $dst,$mem,0x00\n\t" 3806 "vinsertf128_high $dst,$dst\t" 3807 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate16F" %} 3808 ins_encode %{ 3809 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3810 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3811 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3812 %} 3813 ins_pipe( pipe_slow ); 3814 %} 3815 3816 instruct Repl2F_zero(vecD dst, immF0 zero) %{ 3817 predicate(n->as_Vector()->length() == 2 && UseAVX < 3); 3818 match(Set dst (ReplicateF zero)); 3819 format %{ "xorps $dst,$dst\t! replicate2F zero" %} 3820 ins_encode %{ 3821 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 3822 %} 3823 ins_pipe( fpu_reg_reg ); 3824 %} 3825 3826 instruct Repl4F_zero(vecX dst, immF0 zero) %{ 3827 predicate(n->as_Vector()->length() == 4 && UseAVX < 3); 3828 match(Set dst (ReplicateF zero)); 3829 format %{ "xorps $dst,$dst\t! replicate4F zero" %} 3830 ins_encode %{ 3831 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 3832 %} 3833 ins_pipe( fpu_reg_reg ); 3834 %} 3835 3836 instruct Repl8F_zero(vecY dst, immF0 zero) %{ 3837 predicate(n->as_Vector()->length() == 8 && UseAVX < 3); 3838 match(Set dst (ReplicateF zero)); 3839 format %{ "vxorps $dst,$dst,$dst\t! replicate8F zero" %} 3840 ins_encode %{ 3841 int vector_len = 1; 3842 __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3843 %} 3844 ins_pipe( fpu_reg_reg ); 3845 %} 3846 3847 instruct Repl2D_mem(vecX dst, memory mem) %{ 3848 predicate(n->as_Vector()->length() == 2 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3849 match(Set dst (ReplicateD (LoadD mem))); 3850 format %{ "pshufd $dst,$mem,0x44\t! replicate2D" %} 3851 ins_encode %{ 3852 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44); 3853 %} 3854 ins_pipe( pipe_slow ); 3855 %} 3856 3857 instruct Repl4D(vecY dst, vlRegD src) %{ 3858 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3859 match(Set dst (ReplicateD src)); 3860 format %{ "pshufd $dst,$src,0x44\n\t" 3861 "vinsertf128_high $dst,$dst\t! replicate4D" %} 3862 ins_encode %{ 3863 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 3864 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3865 %} 3866 ins_pipe( pipe_slow ); 3867 %} 3868 3869 instruct Repl4D_mem(vecY dst, memory mem) %{ 3870 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3871 match(Set dst (ReplicateD (LoadD mem))); 3872 format %{ "pshufd $dst,$mem,0x44\n\t" 3873 "vinsertf128_high $dst,$dst\t! replicate4D" %} 3874 ins_encode %{ 3875 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44); 3876 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3877 %} 3878 ins_pipe( pipe_slow ); 3879 %} 3880 3881 instruct Repl8D(legVecZ dst, vlRegD src) %{ 3882 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3883 match(Set dst (ReplicateD src)); 3884 format %{ "pshufd $dst,$src,0x44\n\t" 3885 "vinsertf128_high $dst,$dst\t" 3886 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate8D" %} 3887 ins_encode %{ 3888 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 3889 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3890 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3891 %} 3892 ins_pipe( pipe_slow ); 3893 %} 3894 3895 instruct Repl8D_mem(legVecZ dst, memory mem) %{ 3896 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3897 match(Set dst (ReplicateD (LoadD mem))); 3898 format %{ "pshufd $dst,$mem,0x44\n\t" 3899 "vinsertf128_high $dst,$dst\t" 3900 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate8D" %} 3901 ins_encode %{ 3902 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44); 3903 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3904 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3905 %} 3906 ins_pipe( pipe_slow ); 3907 %} 3908 3909 // Replicate double (8 byte) scalar zero to be vector 3910 instruct Repl2D_zero(vecX dst, immD0 zero) %{ 3911 predicate(n->as_Vector()->length() == 2 && UseAVX < 3); 3912 match(Set dst (ReplicateD zero)); 3913 format %{ "xorpd $dst,$dst\t! replicate2D zero" %} 3914 ins_encode %{ 3915 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister); 3916 %} 3917 ins_pipe( fpu_reg_reg ); 3918 %} 3919 3920 instruct Repl4D_zero(vecY dst, immD0 zero) %{ 3921 predicate(n->as_Vector()->length() == 4 && UseAVX < 3); 3922 match(Set dst (ReplicateD zero)); 3923 format %{ "vxorpd $dst,$dst,$dst,vect256\t! replicate4D zero" %} 3924 ins_encode %{ 3925 int vector_len = 1; 3926 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3927 %} 3928 ins_pipe( fpu_reg_reg ); 3929 %} 3930 3931 // ====================GENERIC REPLICATE========================================== 3932 3933 // Replicate byte scalar to be vector 3934 instruct Repl4B(vecS dst, rRegI src) %{ 3935 predicate(n->as_Vector()->length() == 4); 3936 match(Set dst (ReplicateB src)); 3937 format %{ "movd $dst,$src\n\t" 3938 "punpcklbw $dst,$dst\n\t" 3939 "pshuflw $dst,$dst,0x00\t! replicate4B" %} 3940 ins_encode %{ 3941 __ movdl($dst$$XMMRegister, $src$$Register); 3942 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3943 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3944 %} 3945 ins_pipe( pipe_slow ); 3946 %} 3947 3948 instruct Repl8B(vecD dst, rRegI src) %{ 3949 predicate(n->as_Vector()->length() == 8); 3950 match(Set dst (ReplicateB src)); 3951 format %{ "movd $dst,$src\n\t" 3952 "punpcklbw $dst,$dst\n\t" 3953 "pshuflw $dst,$dst,0x00\t! replicate8B" %} 3954 ins_encode %{ 3955 __ movdl($dst$$XMMRegister, $src$$Register); 3956 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3957 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3958 %} 3959 ins_pipe( pipe_slow ); 3960 %} 3961 3962 // Replicate byte scalar immediate to be vector by loading from const table. 3963 instruct Repl4B_imm(vecS dst, immI con) %{ 3964 predicate(n->as_Vector()->length() == 4); 3965 match(Set dst (ReplicateB con)); 3966 format %{ "movdl $dst,[$constantaddress]\t! replicate4B($con)" %} 3967 ins_encode %{ 3968 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 1))); 3969 %} 3970 ins_pipe( pipe_slow ); 3971 %} 3972 3973 instruct Repl8B_imm(vecD dst, immI con) %{ 3974 predicate(n->as_Vector()->length() == 8); 3975 match(Set dst (ReplicateB con)); 3976 format %{ "movq $dst,[$constantaddress]\t! replicate8B($con)" %} 3977 ins_encode %{ 3978 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3979 %} 3980 ins_pipe( pipe_slow ); 3981 %} 3982 3983 // Replicate byte scalar zero to be vector 3984 instruct Repl4B_zero(vecS dst, immI0 zero) %{ 3985 predicate(n->as_Vector()->length() == 4); 3986 match(Set dst (ReplicateB zero)); 3987 format %{ "pxor $dst,$dst\t! replicate4B zero" %} 3988 ins_encode %{ 3989 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3990 %} 3991 ins_pipe( fpu_reg_reg ); 3992 %} 3993 3994 instruct Repl8B_zero(vecD dst, immI0 zero) %{ 3995 predicate(n->as_Vector()->length() == 8); 3996 match(Set dst (ReplicateB zero)); 3997 format %{ "pxor $dst,$dst\t! replicate8B zero" %} 3998 ins_encode %{ 3999 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4000 %} 4001 ins_pipe( fpu_reg_reg ); 4002 %} 4003 4004 instruct Repl16B_zero(vecX dst, immI0 zero) %{ 4005 predicate(n->as_Vector()->length() == 16); 4006 match(Set dst (ReplicateB zero)); 4007 format %{ "pxor $dst,$dst\t! replicate16B zero" %} 4008 ins_encode %{ 4009 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4010 %} 4011 ins_pipe( fpu_reg_reg ); 4012 %} 4013 4014 instruct Repl32B_zero(vecY dst, immI0 zero) %{ 4015 predicate(n->as_Vector()->length() == 32); 4016 match(Set dst (ReplicateB zero)); 4017 format %{ "vpxor $dst,$dst,$dst\t! replicate32B zero" %} 4018 ins_encode %{ 4019 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 4020 int vector_len = 1; 4021 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4022 %} 4023 ins_pipe( fpu_reg_reg ); 4024 %} 4025 4026 // Replicate char/short (2 byte) scalar to be vector 4027 instruct Repl2S(vecS dst, rRegI src) %{ 4028 predicate(n->as_Vector()->length() == 2); 4029 match(Set dst (ReplicateS src)); 4030 format %{ "movd $dst,$src\n\t" 4031 "pshuflw $dst,$dst,0x00\t! replicate2S" %} 4032 ins_encode %{ 4033 __ movdl($dst$$XMMRegister, $src$$Register); 4034 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4035 %} 4036 ins_pipe( fpu_reg_reg ); 4037 %} 4038 4039 // Replicate char/short (2 byte) scalar immediate to be vector by loading from const table. 4040 instruct Repl2S_imm(vecS dst, immI con) %{ 4041 predicate(n->as_Vector()->length() == 2); 4042 match(Set dst (ReplicateS con)); 4043 format %{ "movdl $dst,[$constantaddress]\t! replicate2S($con)" %} 4044 ins_encode %{ 4045 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 2))); 4046 %} 4047 ins_pipe( fpu_reg_reg ); 4048 %} 4049 4050 instruct Repl4S_imm(vecD dst, immI con) %{ 4051 predicate(n->as_Vector()->length() == 4); 4052 match(Set dst (ReplicateS con)); 4053 format %{ "movq $dst,[$constantaddress]\t! replicate4S($con)" %} 4054 ins_encode %{ 4055 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 4056 %} 4057 ins_pipe( fpu_reg_reg ); 4058 %} 4059 4060 // Replicate char/short (2 byte) scalar zero to be vector 4061 instruct Repl2S_zero(vecS dst, immI0 zero) %{ 4062 predicate(n->as_Vector()->length() == 2); 4063 match(Set dst (ReplicateS zero)); 4064 format %{ "pxor $dst,$dst\t! replicate2S zero" %} 4065 ins_encode %{ 4066 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4067 %} 4068 ins_pipe( fpu_reg_reg ); 4069 %} 4070 4071 instruct Repl4S_zero(vecD dst, immI0 zero) %{ 4072 predicate(n->as_Vector()->length() == 4); 4073 match(Set dst (ReplicateS zero)); 4074 format %{ "pxor $dst,$dst\t! replicate4S zero" %} 4075 ins_encode %{ 4076 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4077 %} 4078 ins_pipe( fpu_reg_reg ); 4079 %} 4080 4081 instruct Repl8S_zero(vecX dst, immI0 zero) %{ 4082 predicate(n->as_Vector()->length() == 8); 4083 match(Set dst (ReplicateS zero)); 4084 format %{ "pxor $dst,$dst\t! replicate8S zero" %} 4085 ins_encode %{ 4086 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4087 %} 4088 ins_pipe( fpu_reg_reg ); 4089 %} 4090 4091 instruct Repl16S_zero(vecY dst, immI0 zero) %{ 4092 predicate(n->as_Vector()->length() == 16); 4093 match(Set dst (ReplicateS zero)); 4094 format %{ "vpxor $dst,$dst,$dst\t! replicate16S zero" %} 4095 ins_encode %{ 4096 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 4097 int vector_len = 1; 4098 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4099 %} 4100 ins_pipe( fpu_reg_reg ); 4101 %} 4102 4103 // Replicate integer (4 byte) scalar to be vector 4104 instruct Repl2I(vecD dst, rRegI src) %{ 4105 predicate(n->as_Vector()->length() == 2); 4106 match(Set dst (ReplicateI src)); 4107 format %{ "movd $dst,$src\n\t" 4108 "pshufd $dst,$dst,0x00\t! replicate2I" %} 4109 ins_encode %{ 4110 __ movdl($dst$$XMMRegister, $src$$Register); 4111 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4112 %} 4113 ins_pipe( fpu_reg_reg ); 4114 %} 4115 4116 // Integer could be loaded into xmm register directly from memory. 4117 instruct Repl2I_mem(vecD dst, memory mem) %{ 4118 predicate(n->as_Vector()->length() == 2); 4119 match(Set dst (ReplicateI (LoadI mem))); 4120 format %{ "movd $dst,$mem\n\t" 4121 "pshufd $dst,$dst,0x00\t! replicate2I" %} 4122 ins_encode %{ 4123 __ movdl($dst$$XMMRegister, $mem$$Address); 4124 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4125 %} 4126 ins_pipe( fpu_reg_reg ); 4127 %} 4128 4129 // Replicate integer (4 byte) scalar immediate to be vector by loading from const table. 4130 instruct Repl2I_imm(vecD dst, immI con) %{ 4131 predicate(n->as_Vector()->length() == 2); 4132 match(Set dst (ReplicateI con)); 4133 format %{ "movq $dst,[$constantaddress]\t! replicate2I($con)" %} 4134 ins_encode %{ 4135 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4136 %} 4137 ins_pipe( fpu_reg_reg ); 4138 %} 4139 4140 // Replicate integer (4 byte) scalar zero to be vector 4141 instruct Repl2I_zero(vecD dst, immI0 zero) %{ 4142 predicate(n->as_Vector()->length() == 2); 4143 match(Set dst (ReplicateI zero)); 4144 format %{ "pxor $dst,$dst\t! replicate2I" %} 4145 ins_encode %{ 4146 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4147 %} 4148 ins_pipe( fpu_reg_reg ); 4149 %} 4150 4151 instruct Repl4I_zero(vecX dst, immI0 zero) %{ 4152 predicate(n->as_Vector()->length() == 4); 4153 match(Set dst (ReplicateI zero)); 4154 format %{ "pxor $dst,$dst\t! replicate4I zero)" %} 4155 ins_encode %{ 4156 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4157 %} 4158 ins_pipe( fpu_reg_reg ); 4159 %} 4160 4161 instruct Repl8I_zero(vecY dst, immI0 zero) %{ 4162 predicate(n->as_Vector()->length() == 8); 4163 match(Set dst (ReplicateI zero)); 4164 format %{ "vpxor $dst,$dst,$dst\t! replicate8I zero" %} 4165 ins_encode %{ 4166 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 4167 int vector_len = 1; 4168 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4169 %} 4170 ins_pipe( fpu_reg_reg ); 4171 %} 4172 4173 // Replicate long (8 byte) scalar to be vector 4174 #ifdef _LP64 4175 instruct Repl2L(vecX dst, rRegL src) %{ 4176 predicate(n->as_Vector()->length() == 2); 4177 match(Set dst (ReplicateL src)); 4178 format %{ "movdq $dst,$src\n\t" 4179 "punpcklqdq $dst,$dst\t! replicate2L" %} 4180 ins_encode %{ 4181 __ movdq($dst$$XMMRegister, $src$$Register); 4182 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4183 %} 4184 ins_pipe( pipe_slow ); 4185 %} 4186 #else // _LP64 4187 instruct Repl2L(vecX dst, eRegL src, vecX tmp) %{ 4188 predicate(n->as_Vector()->length() == 2); 4189 match(Set dst (ReplicateL src)); 4190 effect(TEMP dst, USE src, TEMP tmp); 4191 format %{ "movdl $dst,$src.lo\n\t" 4192 "movdl $tmp,$src.hi\n\t" 4193 "punpckldq $dst,$tmp\n\t" 4194 "punpcklqdq $dst,$dst\t! replicate2L"%} 4195 ins_encode %{ 4196 __ movdl($dst$$XMMRegister, $src$$Register); 4197 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4198 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4199 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4200 %} 4201 ins_pipe( pipe_slow ); 4202 %} 4203 #endif // _LP64 4204 4205 // Replicate long (8 byte) scalar immediate to be vector by loading from const table. 4206 instruct Repl2L_imm(vecX dst, immL con) %{ 4207 predicate(n->as_Vector()->length() == 2); 4208 match(Set dst (ReplicateL con)); 4209 format %{ "movq $dst,[$constantaddress]\n\t" 4210 "punpcklqdq $dst,$dst\t! replicate2L($con)" %} 4211 ins_encode %{ 4212 __ movq($dst$$XMMRegister, $constantaddress($con)); 4213 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4214 %} 4215 ins_pipe( pipe_slow ); 4216 %} 4217 4218 // Replicate long (8 byte) scalar zero to be vector 4219 instruct Repl2L_zero(vecX dst, immL0 zero) %{ 4220 predicate(n->as_Vector()->length() == 2); 4221 match(Set dst (ReplicateL zero)); 4222 format %{ "pxor $dst,$dst\t! replicate2L zero" %} 4223 ins_encode %{ 4224 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4225 %} 4226 ins_pipe( fpu_reg_reg ); 4227 %} 4228 4229 instruct Repl4L_zero(vecY dst, immL0 zero) %{ 4230 predicate(n->as_Vector()->length() == 4); 4231 match(Set dst (ReplicateL zero)); 4232 format %{ "vpxor $dst,$dst,$dst\t! replicate4L zero" %} 4233 ins_encode %{ 4234 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 4235 int vector_len = 1; 4236 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4237 %} 4238 ins_pipe( fpu_reg_reg ); 4239 %} 4240 4241 // Replicate float (4 byte) scalar to be vector 4242 instruct Repl2F(vecD dst, vlRegF src) %{ 4243 predicate(n->as_Vector()->length() == 2); 4244 match(Set dst (ReplicateF src)); 4245 format %{ "pshufd $dst,$dst,0x00\t! replicate2F" %} 4246 ins_encode %{ 4247 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 4248 %} 4249 ins_pipe( fpu_reg_reg ); 4250 %} 4251 4252 instruct Repl4F(vecX dst, vlRegF src) %{ 4253 predicate(n->as_Vector()->length() == 4); 4254 match(Set dst (ReplicateF src)); 4255 format %{ "pshufd $dst,$dst,0x00\t! replicate4F" %} 4256 ins_encode %{ 4257 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 4258 %} 4259 ins_pipe( pipe_slow ); 4260 %} 4261 4262 // Replicate double (8 bytes) scalar to be vector 4263 instruct Repl2D(vecX dst, vlRegD src) %{ 4264 predicate(n->as_Vector()->length() == 2); 4265 match(Set dst (ReplicateD src)); 4266 format %{ "pshufd $dst,$src,0x44\t! replicate2D" %} 4267 ins_encode %{ 4268 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 4269 %} 4270 ins_pipe( pipe_slow ); 4271 %} 4272 4273 // ====================EVEX REPLICATE============================================= 4274 4275 instruct Repl4B_mem_evex(vecS dst, memory mem) %{ 4276 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4277 match(Set dst (ReplicateB (LoadB mem))); 4278 format %{ "vpbroadcastb $dst,$mem\t! replicate4B" %} 4279 ins_encode %{ 4280 int vector_len = 0; 4281 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 4282 %} 4283 ins_pipe( pipe_slow ); 4284 %} 4285 4286 instruct Repl8B_mem_evex(vecD dst, memory mem) %{ 4287 predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4288 match(Set dst (ReplicateB (LoadB mem))); 4289 format %{ "vpbroadcastb $dst,$mem\t! replicate8B" %} 4290 ins_encode %{ 4291 int vector_len = 0; 4292 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 4293 %} 4294 ins_pipe( pipe_slow ); 4295 %} 4296 4297 instruct Repl16B_evex(vecX dst, rRegI src) %{ 4298 predicate(n->as_Vector()->length() == 16 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4299 match(Set dst (ReplicateB src)); 4300 format %{ "evpbroadcastb $dst,$src\t! replicate16B" %} 4301 ins_encode %{ 4302 int vector_len = 0; 4303 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 4304 %} 4305 ins_pipe( pipe_slow ); 4306 %} 4307 4308 instruct Repl16B_mem_evex(vecX dst, memory mem) %{ 4309 predicate(n->as_Vector()->length() == 16 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4310 match(Set dst (ReplicateB (LoadB mem))); 4311 format %{ "vpbroadcastb $dst,$mem\t! replicate16B" %} 4312 ins_encode %{ 4313 int vector_len = 0; 4314 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 4315 %} 4316 ins_pipe( pipe_slow ); 4317 %} 4318 4319 instruct Repl32B_evex(vecY dst, rRegI src) %{ 4320 predicate(n->as_Vector()->length() == 32 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4321 match(Set dst (ReplicateB src)); 4322 format %{ "evpbroadcastb $dst,$src\t! replicate32B" %} 4323 ins_encode %{ 4324 int vector_len = 1; 4325 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 4326 %} 4327 ins_pipe( pipe_slow ); 4328 %} 4329 4330 instruct Repl32B_mem_evex(vecY dst, memory mem) %{ 4331 predicate(n->as_Vector()->length() == 32 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4332 match(Set dst (ReplicateB (LoadB mem))); 4333 format %{ "vpbroadcastb $dst,$mem\t! replicate32B" %} 4334 ins_encode %{ 4335 int vector_len = 1; 4336 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 4337 %} 4338 ins_pipe( pipe_slow ); 4339 %} 4340 4341 instruct Repl64B_evex(vecZ dst, rRegI src) %{ 4342 predicate(n->as_Vector()->length() == 64 && UseAVX > 2 && VM_Version::supports_avx512bw()); 4343 match(Set dst (ReplicateB src)); 4344 format %{ "evpbroadcastb $dst,$src\t! upper replicate64B" %} 4345 ins_encode %{ 4346 int vector_len = 2; 4347 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 4348 %} 4349 ins_pipe( pipe_slow ); 4350 %} 4351 4352 instruct Repl64B_mem_evex(vecZ dst, memory mem) %{ 4353 predicate(n->as_Vector()->length() == 64 && UseAVX > 2 && VM_Version::supports_avx512bw()); 4354 match(Set dst (ReplicateB (LoadB mem))); 4355 format %{ "vpbroadcastb $dst,$mem\t! replicate64B" %} 4356 ins_encode %{ 4357 int vector_len = 2; 4358 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 4359 %} 4360 ins_pipe( pipe_slow ); 4361 %} 4362 4363 instruct Repl16B_imm_evex(vecX dst, immI con) %{ 4364 predicate(n->as_Vector()->length() == 16 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4365 match(Set dst (ReplicateB con)); 4366 format %{ "movq $dst,[$constantaddress]\n\t" 4367 "vpbroadcastb $dst,$dst\t! replicate16B" %} 4368 ins_encode %{ 4369 int vector_len = 0; 4370 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 4371 __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4372 %} 4373 ins_pipe( pipe_slow ); 4374 %} 4375 4376 instruct Repl32B_imm_evex(vecY dst, immI con) %{ 4377 predicate(n->as_Vector()->length() == 32 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4378 match(Set dst (ReplicateB con)); 4379 format %{ "movq $dst,[$constantaddress]\n\t" 4380 "vpbroadcastb $dst,$dst\t! replicate32B" %} 4381 ins_encode %{ 4382 int vector_len = 1; 4383 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 4384 __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4385 %} 4386 ins_pipe( pipe_slow ); 4387 %} 4388 4389 instruct Repl64B_imm_evex(vecZ dst, immI con) %{ 4390 predicate(n->as_Vector()->length() == 64 && UseAVX > 2 && VM_Version::supports_avx512bw()); 4391 match(Set dst (ReplicateB con)); 4392 format %{ "movq $dst,[$constantaddress]\n\t" 4393 "vpbroadcastb $dst,$dst\t! upper replicate64B" %} 4394 ins_encode %{ 4395 int vector_len = 2; 4396 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 4397 __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4398 %} 4399 ins_pipe( pipe_slow ); 4400 %} 4401 4402 instruct Repl64B_zero_evex(vecZ dst, immI0 zero) %{ 4403 predicate(n->as_Vector()->length() == 64 && UseAVX > 2); 4404 match(Set dst (ReplicateB zero)); 4405 format %{ "vpxor $dst k0,$dst,$dst\t! replicate64B zero" %} 4406 ins_encode %{ 4407 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 4408 int vector_len = 2; 4409 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4410 %} 4411 ins_pipe( fpu_reg_reg ); 4412 %} 4413 4414 instruct Repl4S_evex(vecD dst, rRegI src) %{ 4415 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4416 match(Set dst (ReplicateS src)); 4417 format %{ "evpbroadcastw $dst,$src\t! replicate4S" %} 4418 ins_encode %{ 4419 int vector_len = 0; 4420 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 4421 %} 4422 ins_pipe( pipe_slow ); 4423 %} 4424 4425 instruct Repl4S_mem_evex(vecD dst, memory mem) %{ 4426 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4427 match(Set dst (ReplicateS (LoadS mem))); 4428 format %{ "vpbroadcastw $dst,$mem\t! replicate4S" %} 4429 ins_encode %{ 4430 int vector_len = 0; 4431 __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 4432 %} 4433 ins_pipe( pipe_slow ); 4434 %} 4435 4436 instruct Repl8S_evex(vecX dst, rRegI src) %{ 4437 predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4438 match(Set dst (ReplicateS src)); 4439 format %{ "evpbroadcastw $dst,$src\t! replicate8S" %} 4440 ins_encode %{ 4441 int vector_len = 0; 4442 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 4443 %} 4444 ins_pipe( pipe_slow ); 4445 %} 4446 4447 instruct Repl8S_mem_evex(vecX dst, memory mem) %{ 4448 predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4449 match(Set dst (ReplicateS (LoadS mem))); 4450 format %{ "vpbroadcastw $dst,$mem\t! replicate8S" %} 4451 ins_encode %{ 4452 int vector_len = 0; 4453 __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 4454 %} 4455 ins_pipe( pipe_slow ); 4456 %} 4457 4458 instruct Repl16S_evex(vecY dst, rRegI src) %{ 4459 predicate(n->as_Vector()->length() == 16 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4460 match(Set dst (ReplicateS src)); 4461 format %{ "evpbroadcastw $dst,$src\t! replicate16S" %} 4462 ins_encode %{ 4463 int vector_len = 1; 4464 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 4465 %} 4466 ins_pipe( pipe_slow ); 4467 %} 4468 4469 instruct Repl16S_mem_evex(vecY dst, memory mem) %{ 4470 predicate(n->as_Vector()->length() == 16 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4471 match(Set dst (ReplicateS (LoadS mem))); 4472 format %{ "vpbroadcastw $dst,$mem\t! replicate16S" %} 4473 ins_encode %{ 4474 int vector_len = 1; 4475 __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 4476 %} 4477 ins_pipe( pipe_slow ); 4478 %} 4479 4480 instruct Repl32S_evex(vecZ dst, rRegI src) %{ 4481 predicate(n->as_Vector()->length() == 32 && UseAVX > 2 && VM_Version::supports_avx512bw()); 4482 match(Set dst (ReplicateS src)); 4483 format %{ "evpbroadcastw $dst,$src\t! replicate32S" %} 4484 ins_encode %{ 4485 int vector_len = 2; 4486 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 4487 %} 4488 ins_pipe( pipe_slow ); 4489 %} 4490 4491 instruct Repl32S_mem_evex(vecZ dst, memory mem) %{ 4492 predicate(n->as_Vector()->length() == 32 && UseAVX > 2 && VM_Version::supports_avx512bw()); 4493 match(Set dst (ReplicateS (LoadS mem))); 4494 format %{ "vpbroadcastw $dst,$mem\t! replicate32S" %} 4495 ins_encode %{ 4496 int vector_len = 2; 4497 __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 4498 %} 4499 ins_pipe( pipe_slow ); 4500 %} 4501 4502 instruct Repl8S_imm_evex(vecX dst, immI con) %{ 4503 predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4504 match(Set dst (ReplicateS con)); 4505 format %{ "movq $dst,[$constantaddress]\n\t" 4506 "vpbroadcastw $dst,$dst\t! replicate8S" %} 4507 ins_encode %{ 4508 int vector_len = 0; 4509 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 4510 __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4511 %} 4512 ins_pipe( pipe_slow ); 4513 %} 4514 4515 instruct Repl16S_imm_evex(vecY dst, immI con) %{ 4516 predicate(n->as_Vector()->length() == 16 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4517 match(Set dst (ReplicateS con)); 4518 format %{ "movq $dst,[$constantaddress]\n\t" 4519 "vpbroadcastw $dst,$dst\t! replicate16S" %} 4520 ins_encode %{ 4521 int vector_len = 1; 4522 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 4523 __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4524 %} 4525 ins_pipe( pipe_slow ); 4526 %} 4527 4528 instruct Repl32S_imm_evex(vecZ dst, immI con) %{ 4529 predicate(n->as_Vector()->length() == 32 && UseAVX > 2 && VM_Version::supports_avx512bw()); 4530 match(Set dst (ReplicateS con)); 4531 format %{ "movq $dst,[$constantaddress]\n\t" 4532 "vpbroadcastw $dst,$dst\t! replicate32S" %} 4533 ins_encode %{ 4534 int vector_len = 2; 4535 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 4536 __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4537 %} 4538 ins_pipe( pipe_slow ); 4539 %} 4540 4541 instruct Repl32S_zero_evex(vecZ dst, immI0 zero) %{ 4542 predicate(n->as_Vector()->length() == 32 && UseAVX > 2); 4543 match(Set dst (ReplicateS zero)); 4544 format %{ "vpxor $dst k0,$dst,$dst\t! replicate32S zero" %} 4545 ins_encode %{ 4546 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 4547 int vector_len = 2; 4548 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4549 %} 4550 ins_pipe( fpu_reg_reg ); 4551 %} 4552 4553 instruct Repl4I_evex(vecX dst, rRegI src) %{ 4554 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4555 match(Set dst (ReplicateI src)); 4556 format %{ "evpbroadcastd $dst,$src\t! replicate4I" %} 4557 ins_encode %{ 4558 int vector_len = 0; 4559 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 4560 %} 4561 ins_pipe( pipe_slow ); 4562 %} 4563 4564 instruct Repl4I_mem_evex(vecX dst, memory mem) %{ 4565 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4566 match(Set dst (ReplicateI (LoadI mem))); 4567 format %{ "vpbroadcastd $dst,$mem\t! replicate4I" %} 4568 ins_encode %{ 4569 int vector_len = 0; 4570 __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 4571 %} 4572 ins_pipe( pipe_slow ); 4573 %} 4574 4575 instruct Repl8I_evex(vecY dst, rRegI src) %{ 4576 predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4577 match(Set dst (ReplicateI src)); 4578 format %{ "evpbroadcastd $dst,$src\t! replicate8I" %} 4579 ins_encode %{ 4580 int vector_len = 1; 4581 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 4582 %} 4583 ins_pipe( pipe_slow ); 4584 %} 4585 4586 instruct Repl8I_mem_evex(vecY dst, memory mem) %{ 4587 predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4588 match(Set dst (ReplicateI (LoadI mem))); 4589 format %{ "vpbroadcastd $dst,$mem\t! replicate8I" %} 4590 ins_encode %{ 4591 int vector_len = 1; 4592 __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 4593 %} 4594 ins_pipe( pipe_slow ); 4595 %} 4596 4597 instruct Repl16I_evex(vecZ dst, rRegI src) %{ 4598 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4599 match(Set dst (ReplicateI src)); 4600 format %{ "evpbroadcastd $dst,$src\t! replicate16I" %} 4601 ins_encode %{ 4602 int vector_len = 2; 4603 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 4604 %} 4605 ins_pipe( pipe_slow ); 4606 %} 4607 4608 instruct Repl16I_mem_evex(vecZ dst, memory mem) %{ 4609 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4610 match(Set dst (ReplicateI (LoadI mem))); 4611 format %{ "vpbroadcastd $dst,$mem\t! replicate16I" %} 4612 ins_encode %{ 4613 int vector_len = 2; 4614 __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 4615 %} 4616 ins_pipe( pipe_slow ); 4617 %} 4618 4619 instruct Repl4I_imm_evex(vecX dst, immI con) %{ 4620 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4621 match(Set dst (ReplicateI con)); 4622 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 4623 "vpbroadcastd $dst,$dst\t! replicate4I" %} 4624 ins_encode %{ 4625 int vector_len = 0; 4626 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4627 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4628 %} 4629 ins_pipe( pipe_slow ); 4630 %} 4631 4632 instruct Repl8I_imm_evex(vecY dst, immI con) %{ 4633 predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4634 match(Set dst (ReplicateI con)); 4635 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 4636 "vpbroadcastd $dst,$dst\t! replicate8I" %} 4637 ins_encode %{ 4638 int vector_len = 1; 4639 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4640 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4641 %} 4642 ins_pipe( pipe_slow ); 4643 %} 4644 4645 instruct Repl16I_imm_evex(vecZ dst, immI con) %{ 4646 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4647 match(Set dst (ReplicateI con)); 4648 format %{ "movq $dst,[$constantaddress]\t! replicate16I($con)\n\t" 4649 "vpbroadcastd $dst,$dst\t! replicate16I" %} 4650 ins_encode %{ 4651 int vector_len = 2; 4652 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4653 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4654 %} 4655 ins_pipe( pipe_slow ); 4656 %} 4657 4658 instruct Repl16I_zero_evex(vecZ dst, immI0 zero) %{ 4659 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4660 match(Set dst (ReplicateI zero)); 4661 format %{ "vpxor $dst k0,$dst,$dst\t! replicate16I zero" %} 4662 ins_encode %{ 4663 // Use vxorpd since AVX does not have vpxor for 512-bit (AVX2 will have it). 4664 int vector_len = 2; 4665 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4666 %} 4667 ins_pipe( fpu_reg_reg ); 4668 %} 4669 4670 // Replicate long (8 byte) scalar to be vector 4671 #ifdef _LP64 4672 instruct Repl4L_evex(vecY dst, rRegL src) %{ 4673 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4674 match(Set dst (ReplicateL src)); 4675 format %{ "evpbroadcastq $dst,$src\t! replicate4L" %} 4676 ins_encode %{ 4677 int vector_len = 1; 4678 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vector_len); 4679 %} 4680 ins_pipe( pipe_slow ); 4681 %} 4682 4683 instruct Repl8L_evex(vecZ dst, rRegL src) %{ 4684 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4685 match(Set dst (ReplicateL src)); 4686 format %{ "evpbroadcastq $dst,$src\t! replicate8L" %} 4687 ins_encode %{ 4688 int vector_len = 2; 4689 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vector_len); 4690 %} 4691 ins_pipe( pipe_slow ); 4692 %} 4693 #else // _LP64 4694 instruct Repl4L_evex(vecY dst, eRegL src, regD tmp) %{ 4695 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4696 match(Set dst (ReplicateL src)); 4697 effect(TEMP dst, USE src, TEMP tmp); 4698 format %{ "movdl $dst,$src.lo\n\t" 4699 "movdl $tmp,$src.hi\n\t" 4700 "punpckldq $dst,$tmp\n\t" 4701 "vpbroadcastq $dst,$dst\t! replicate4L" %} 4702 ins_encode %{ 4703 int vector_len = 1; 4704 __ movdl($dst$$XMMRegister, $src$$Register); 4705 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4706 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4707 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4708 %} 4709 ins_pipe( pipe_slow ); 4710 %} 4711 4712 instruct Repl8L_evex(legVecZ dst, eRegL src, legVecZ tmp) %{ 4713 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4714 match(Set dst (ReplicateL src)); 4715 effect(TEMP dst, USE src, TEMP tmp); 4716 format %{ "movdl $dst,$src.lo\n\t" 4717 "movdl $tmp,$src.hi\n\t" 4718 "punpckldq $dst,$tmp\n\t" 4719 "vpbroadcastq $dst,$dst\t! replicate8L" %} 4720 ins_encode %{ 4721 int vector_len = 2; 4722 __ movdl($dst$$XMMRegister, $src$$Register); 4723 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4724 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4725 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4726 %} 4727 ins_pipe( pipe_slow ); 4728 %} 4729 #endif // _LP64 4730 4731 instruct Repl4L_imm_evex(vecY dst, immL con) %{ 4732 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4733 match(Set dst (ReplicateL con)); 4734 format %{ "movq $dst,[$constantaddress]\n\t" 4735 "vpbroadcastq $dst,$dst\t! replicate4L" %} 4736 ins_encode %{ 4737 int vector_len = 1; 4738 __ movq($dst$$XMMRegister, $constantaddress($con)); 4739 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4740 %} 4741 ins_pipe( pipe_slow ); 4742 %} 4743 4744 instruct Repl8L_imm_evex(vecZ dst, immL con) %{ 4745 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4746 match(Set dst (ReplicateL con)); 4747 format %{ "movq $dst,[$constantaddress]\n\t" 4748 "vpbroadcastq $dst,$dst\t! replicate8L" %} 4749 ins_encode %{ 4750 int vector_len = 2; 4751 __ movq($dst$$XMMRegister, $constantaddress($con)); 4752 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4753 %} 4754 ins_pipe( pipe_slow ); 4755 %} 4756 4757 instruct Repl2L_mem_evex(vecX dst, memory mem) %{ 4758 predicate(n->as_Vector()->length() == 2 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4759 match(Set dst (ReplicateL (LoadL mem))); 4760 format %{ "vpbroadcastd $dst,$mem\t! replicate2L" %} 4761 ins_encode %{ 4762 int vector_len = 0; 4763 __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4764 %} 4765 ins_pipe( pipe_slow ); 4766 %} 4767 4768 instruct Repl4L_mem_evex(vecY dst, memory mem) %{ 4769 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4770 match(Set dst (ReplicateL (LoadL mem))); 4771 format %{ "vpbroadcastd $dst,$mem\t! replicate4L" %} 4772 ins_encode %{ 4773 int vector_len = 1; 4774 __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4775 %} 4776 ins_pipe( pipe_slow ); 4777 %} 4778 4779 instruct Repl8L_mem_evex(vecZ dst, memory mem) %{ 4780 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4781 match(Set dst (ReplicateL (LoadL mem))); 4782 format %{ "vpbroadcastd $dst,$mem\t! replicate8L" %} 4783 ins_encode %{ 4784 int vector_len = 2; 4785 __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4786 %} 4787 ins_pipe( pipe_slow ); 4788 %} 4789 4790 instruct Repl8L_zero_evex(vecZ dst, immL0 zero) %{ 4791 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4792 match(Set dst (ReplicateL zero)); 4793 format %{ "vpxor $dst k0,$dst,$dst\t! replicate8L zero" %} 4794 ins_encode %{ 4795 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 4796 int vector_len = 2; 4797 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4798 %} 4799 ins_pipe( fpu_reg_reg ); 4800 %} 4801 4802 instruct Repl8F_evex(vecY dst, regF src) %{ 4803 predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4804 match(Set dst (ReplicateF src)); 4805 format %{ "vpbroadcastss $dst,$src\t! replicate8F" %} 4806 ins_encode %{ 4807 int vector_len = 1; 4808 __ vpbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4809 %} 4810 ins_pipe( pipe_slow ); 4811 %} 4812 4813 instruct Repl8F_mem_evex(vecY dst, memory mem) %{ 4814 predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4815 match(Set dst (ReplicateF (LoadF mem))); 4816 format %{ "vbroadcastss $dst,$mem\t! replicate8F" %} 4817 ins_encode %{ 4818 int vector_len = 1; 4819 __ vpbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len); 4820 %} 4821 ins_pipe( pipe_slow ); 4822 %} 4823 4824 instruct Repl16F_evex(vecZ dst, regF src) %{ 4825 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4826 match(Set dst (ReplicateF src)); 4827 format %{ "vpbroadcastss $dst,$src\t! replicate16F" %} 4828 ins_encode %{ 4829 int vector_len = 2; 4830 __ vpbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4831 %} 4832 ins_pipe( pipe_slow ); 4833 %} 4834 4835 instruct Repl16F_mem_evex(vecZ dst, memory mem) %{ 4836 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4837 match(Set dst (ReplicateF (LoadF mem))); 4838 format %{ "vbroadcastss $dst,$mem\t! replicate16F" %} 4839 ins_encode %{ 4840 int vector_len = 2; 4841 __ vpbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len); 4842 %} 4843 ins_pipe( pipe_slow ); 4844 %} 4845 4846 instruct Repl2F_zero_evex(vecD dst, immF0 zero) %{ 4847 predicate(n->as_Vector()->length() == 2 && UseAVX > 2); 4848 match(Set dst (ReplicateF zero)); 4849 format %{ "vpxor $dst k0,$dst,$dst\t! replicate2F zero" %} 4850 ins_encode %{ 4851 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4852 int vector_len = 2; 4853 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4854 %} 4855 ins_pipe( fpu_reg_reg ); 4856 %} 4857 4858 instruct Repl4F_zero_evex(vecX dst, immF0 zero) %{ 4859 predicate(n->as_Vector()->length() == 4 && UseAVX > 2); 4860 match(Set dst (ReplicateF zero)); 4861 format %{ "vpxor $dst k0,$dst,$dst\t! replicate4F zero" %} 4862 ins_encode %{ 4863 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4864 int vector_len = 2; 4865 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4866 %} 4867 ins_pipe( fpu_reg_reg ); 4868 %} 4869 4870 instruct Repl8F_zero_evex(vecY dst, immF0 zero) %{ 4871 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4872 match(Set dst (ReplicateF zero)); 4873 format %{ "vpxor $dst k0,$dst,$dst\t! replicate8F zero" %} 4874 ins_encode %{ 4875 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4876 int vector_len = 2; 4877 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4878 %} 4879 ins_pipe( fpu_reg_reg ); 4880 %} 4881 4882 instruct Repl16F_zero_evex(vecZ dst, immF0 zero) %{ 4883 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4884 match(Set dst (ReplicateF zero)); 4885 format %{ "vpxor $dst k0,$dst,$dst\t! replicate16F zero" %} 4886 ins_encode %{ 4887 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4888 int vector_len = 2; 4889 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4890 %} 4891 ins_pipe( fpu_reg_reg ); 4892 %} 4893 4894 instruct Repl4D_evex(vecY dst, regD src) %{ 4895 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4896 match(Set dst (ReplicateD src)); 4897 format %{ "vpbroadcastsd $dst,$src\t! replicate4D" %} 4898 ins_encode %{ 4899 int vector_len = 1; 4900 __ vpbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4901 %} 4902 ins_pipe( pipe_slow ); 4903 %} 4904 4905 instruct Repl4D_mem_evex(vecY dst, memory mem) %{ 4906 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4907 match(Set dst (ReplicateD (LoadD mem))); 4908 format %{ "vbroadcastsd $dst,$mem\t! replicate4D" %} 4909 ins_encode %{ 4910 int vector_len = 1; 4911 __ vpbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len); 4912 %} 4913 ins_pipe( pipe_slow ); 4914 %} 4915 4916 instruct Repl8D_evex(vecZ dst, regD src) %{ 4917 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4918 match(Set dst (ReplicateD src)); 4919 format %{ "vpbroadcastsd $dst,$src\t! replicate8D" %} 4920 ins_encode %{ 4921 int vector_len = 2; 4922 __ vpbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4923 %} 4924 ins_pipe( pipe_slow ); 4925 %} 4926 4927 instruct Repl8D_mem_evex(vecZ dst, memory mem) %{ 4928 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4929 match(Set dst (ReplicateD (LoadD mem))); 4930 format %{ "vbroadcastsd $dst,$mem\t! replicate8D" %} 4931 ins_encode %{ 4932 int vector_len = 2; 4933 __ vpbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len); 4934 %} 4935 ins_pipe( pipe_slow ); 4936 %} 4937 4938 instruct Repl2D_zero_evex(vecX dst, immD0 zero) %{ 4939 predicate(n->as_Vector()->length() == 2 && UseAVX > 2); 4940 match(Set dst (ReplicateD zero)); 4941 format %{ "vpxor $dst k0,$dst,$dst\t! replicate2D zero" %} 4942 ins_encode %{ 4943 // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation 4944 int vector_len = 2; 4945 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4946 %} 4947 ins_pipe( fpu_reg_reg ); 4948 %} 4949 4950 instruct Repl4D_zero_evex(vecY dst, immD0 zero) %{ 4951 predicate(n->as_Vector()->length() == 4 && UseAVX > 2); 4952 match(Set dst (ReplicateD zero)); 4953 format %{ "vpxor $dst k0,$dst,$dst\t! replicate4D zero" %} 4954 ins_encode %{ 4955 // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation 4956 int vector_len = 2; 4957 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4958 %} 4959 ins_pipe( fpu_reg_reg ); 4960 %} 4961 4962 instruct Repl8D_zero_evex(vecZ dst, immD0 zero) %{ 4963 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4964 match(Set dst (ReplicateD zero)); 4965 format %{ "vpxor $dst k0,$dst,$dst,vect512\t! replicate8D zero" %} 4966 ins_encode %{ 4967 // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation 4968 int vector_len = 2; 4969 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4970 %} 4971 ins_pipe( fpu_reg_reg ); 4972 %} 4973 4974 // ====================REDUCTION ARITHMETIC======================================= 4975 4976 instruct rsadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, vecD tmp, vecD tmp2) %{ 4977 predicate(UseSSE > 2 && UseAVX == 0); 4978 match(Set dst (AddReductionVI src1 src2)); 4979 effect(TEMP tmp2, TEMP tmp); 4980 format %{ "movdqu $tmp2,$src2\n\t" 4981 "phaddd $tmp2,$tmp2\n\t" 4982 "movd $tmp,$src1\n\t" 4983 "paddd $tmp,$tmp2\n\t" 4984 "movd $dst,$tmp\t! add reduction2I" %} 4985 ins_encode %{ 4986 __ movdqu($tmp2$$XMMRegister, $src2$$XMMRegister); 4987 __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister); 4988 __ movdl($tmp$$XMMRegister, $src1$$Register); 4989 __ paddd($tmp$$XMMRegister, $tmp2$$XMMRegister); 4990 __ movdl($dst$$Register, $tmp$$XMMRegister); 4991 %} 4992 ins_pipe( pipe_slow ); 4993 %} 4994 4995 instruct rvadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, vecD tmp, vecD tmp2) %{ 4996 predicate(VM_Version::supports_avxonly()); 4997 match(Set dst (AddReductionVI src1 src2)); 4998 effect(TEMP tmp, TEMP tmp2); 4999 format %{ "vphaddd $tmp,$src2,$src2\n\t" 5000 "movd $tmp2,$src1\n\t" 5001 "vpaddd $tmp2,$tmp2,$tmp\n\t" 5002 "movd $dst,$tmp2\t! add reduction2I" %} 5003 ins_encode %{ 5004 int vector_len = 0; 5005 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 5006 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5007 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 5008 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5009 %} 5010 ins_pipe( pipe_slow ); 5011 %} 5012 5013 instruct rvadd2I_reduction_reg_evex(rRegI dst, rRegI src1, vecD src2, vecD tmp, vecD tmp2) %{ 5014 predicate(UseAVX > 2); 5015 match(Set dst (AddReductionVI src1 src2)); 5016 effect(TEMP tmp, TEMP tmp2); 5017 format %{ "pshufd $tmp2,$src2,0x1\n\t" 5018 "vpaddd $tmp,$src2,$tmp2\n\t" 5019 "movd $tmp2,$src1\n\t" 5020 "vpaddd $tmp2,$tmp,$tmp2\n\t" 5021 "movd $dst,$tmp2\t! add reduction2I" %} 5022 ins_encode %{ 5023 int vector_len = 0; 5024 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5025 __ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5026 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5027 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5028 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5029 %} 5030 ins_pipe( pipe_slow ); 5031 %} 5032 5033 instruct rsadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, vecX tmp, vecX tmp2) %{ 5034 predicate(UseSSE > 2 && UseAVX == 0); 5035 match(Set dst (AddReductionVI src1 src2)); 5036 effect(TEMP tmp, TEMP tmp2); 5037 format %{ "movdqu $tmp,$src2\n\t" 5038 "phaddd $tmp,$tmp\n\t" 5039 "phaddd $tmp,$tmp\n\t" 5040 "movd $tmp2,$src1\n\t" 5041 "paddd $tmp2,$tmp\n\t" 5042 "movd $dst,$tmp2\t! add reduction4I" %} 5043 ins_encode %{ 5044 __ movdqu($tmp$$XMMRegister, $src2$$XMMRegister); 5045 __ phaddd($tmp$$XMMRegister, $tmp$$XMMRegister); 5046 __ phaddd($tmp$$XMMRegister, $tmp$$XMMRegister); 5047 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5048 __ paddd($tmp2$$XMMRegister, $tmp$$XMMRegister); 5049 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5050 %} 5051 ins_pipe( pipe_slow ); 5052 %} 5053 5054 instruct rvadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, vecX tmp, vecX tmp2) %{ 5055 predicate(VM_Version::supports_avxonly()); 5056 match(Set dst (AddReductionVI src1 src2)); 5057 effect(TEMP tmp, TEMP tmp2); 5058 format %{ "vphaddd $tmp,$src2,$src2\n\t" 5059 "vphaddd $tmp,$tmp,$tmp\n\t" 5060 "movd $tmp2,$src1\n\t" 5061 "vpaddd $tmp2,$tmp2,$tmp\n\t" 5062 "movd $dst,$tmp2\t! add reduction4I" %} 5063 ins_encode %{ 5064 int vector_len = 0; 5065 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 5066 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); 5067 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5068 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 5069 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5070 %} 5071 ins_pipe( pipe_slow ); 5072 %} 5073 5074 instruct rvadd4I_reduction_reg_evex(rRegI dst, rRegI src1, vecX src2, vecX tmp, vecX tmp2) %{ 5075 predicate(UseAVX > 2); 5076 match(Set dst (AddReductionVI src1 src2)); 5077 effect(TEMP tmp, TEMP tmp2); 5078 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5079 "vpaddd $tmp,$src2,$tmp2\n\t" 5080 "pshufd $tmp2,$tmp,0x1\n\t" 5081 "vpaddd $tmp,$tmp,$tmp2\n\t" 5082 "movd $tmp2,$src1\n\t" 5083 "vpaddd $tmp2,$tmp,$tmp2\n\t" 5084 "movd $dst,$tmp2\t! add reduction4I" %} 5085 ins_encode %{ 5086 int vector_len = 0; 5087 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5088 __ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5089 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5090 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5091 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5092 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5093 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5094 %} 5095 ins_pipe( pipe_slow ); 5096 %} 5097 5098 instruct rvadd8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, vecY tmp, vecY tmp2) %{ 5099 predicate(VM_Version::supports_avxonly()); 5100 match(Set dst (AddReductionVI src1 src2)); 5101 effect(TEMP tmp, TEMP tmp2); 5102 format %{ "vphaddd $tmp,$src2,$src2\n\t" 5103 "vphaddd $tmp,$tmp,$tmp2\n\t" 5104 "vextracti128_high $tmp2,$tmp\n\t" 5105 "vpaddd $tmp,$tmp,$tmp2\n\t" 5106 "movd $tmp2,$src1\n\t" 5107 "vpaddd $tmp2,$tmp2,$tmp\n\t" 5108 "movd $dst,$tmp2\t! add reduction8I" %} 5109 ins_encode %{ 5110 int vector_len = 1; 5111 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 5112 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5113 __ vextracti128_high($tmp2$$XMMRegister, $tmp$$XMMRegister); 5114 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5115 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5116 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5117 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5118 %} 5119 ins_pipe( pipe_slow ); 5120 %} 5121 5122 instruct rvadd8I_reduction_reg_evex(rRegI dst, rRegI src1, vecY src2, vecY tmp, vecY tmp2) %{ 5123 predicate(UseAVX > 2); 5124 match(Set dst (AddReductionVI src1 src2)); 5125 effect(TEMP tmp, TEMP tmp2); 5126 format %{ "vextracti128_high $tmp,$src2\n\t" 5127 "vpaddd $tmp,$tmp,$src2\n\t" 5128 "pshufd $tmp2,$tmp,0xE\n\t" 5129 "vpaddd $tmp,$tmp,$tmp2\n\t" 5130 "pshufd $tmp2,$tmp,0x1\n\t" 5131 "vpaddd $tmp,$tmp,$tmp2\n\t" 5132 "movd $tmp2,$src1\n\t" 5133 "vpaddd $tmp2,$tmp,$tmp2\n\t" 5134 "movd $dst,$tmp2\t! add reduction8I" %} 5135 ins_encode %{ 5136 int vector_len = 0; 5137 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 5138 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 5139 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5140 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5141 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5142 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5143 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5144 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5145 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5146 %} 5147 ins_pipe( pipe_slow ); 5148 %} 5149 5150 instruct rvadd16I_reduction_reg_evex(rRegI dst, rRegI src1, legVecZ src2, legVecZ tmp, legVecZ tmp2, legVecZ tmp3) %{ 5151 predicate(UseAVX > 2); 5152 match(Set dst (AddReductionVI src1 src2)); 5153 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 5154 format %{ "vextracti64x4_high $tmp3,$src2\n\t" 5155 "vpaddd $tmp3,$tmp3,$src2\n\t" 5156 "vextracti128_high $tmp,$tmp3\n\t" 5157 "vpaddd $tmp,$tmp,$tmp3\n\t" 5158 "pshufd $tmp2,$tmp,0xE\n\t" 5159 "vpaddd $tmp,$tmp,$tmp2\n\t" 5160 "pshufd $tmp2,$tmp,0x1\n\t" 5161 "vpaddd $tmp,$tmp,$tmp2\n\t" 5162 "movd $tmp2,$src1\n\t" 5163 "vpaddd $tmp2,$tmp,$tmp2\n\t" 5164 "movd $dst,$tmp2\t! mul reduction16I" %} 5165 ins_encode %{ 5166 __ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister); 5167 __ vpaddd($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); 5168 __ vextracti128_high($tmp$$XMMRegister, $tmp3$$XMMRegister); 5169 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); 5170 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5171 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5172 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5173 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5174 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5175 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5176 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5177 %} 5178 ins_pipe( pipe_slow ); 5179 %} 5180 5181 #ifdef _LP64 5182 instruct rvadd2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, vecX tmp, vecX tmp2) %{ 5183 predicate(UseAVX > 2); 5184 match(Set dst (AddReductionVL src1 src2)); 5185 effect(TEMP tmp, TEMP tmp2); 5186 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5187 "vpaddq $tmp,$src2,$tmp2\n\t" 5188 "movdq $tmp2,$src1\n\t" 5189 "vpaddq $tmp2,$tmp,$tmp2\n\t" 5190 "movdq $dst,$tmp2\t! add reduction2L" %} 5191 ins_encode %{ 5192 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5193 __ vpaddq($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, 0); 5194 __ movdq($tmp2$$XMMRegister, $src1$$Register); 5195 __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5196 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5197 %} 5198 ins_pipe( pipe_slow ); 5199 %} 5200 5201 instruct rvadd4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, vecY tmp, vecY tmp2) %{ 5202 predicate(UseAVX > 2); 5203 match(Set dst (AddReductionVL src1 src2)); 5204 effect(TEMP tmp, TEMP tmp2); 5205 format %{ "vextracti128_high $tmp,$src2\n\t" 5206 "vpaddq $tmp2,$tmp,$src2\n\t" 5207 "pshufd $tmp,$tmp2,0xE\n\t" 5208 "vpaddq $tmp2,$tmp2,$tmp\n\t" 5209 "movdq $tmp,$src1\n\t" 5210 "vpaddq $tmp2,$tmp2,$tmp\n\t" 5211 "movdq $dst,$tmp2\t! add reduction4L" %} 5212 ins_encode %{ 5213 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 5214 __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); 5215 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5216 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5217 __ movdq($tmp$$XMMRegister, $src1$$Register); 5218 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5219 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5220 %} 5221 ins_pipe( pipe_slow ); 5222 %} 5223 5224 instruct rvadd8L_reduction_reg(rRegL dst, rRegL src1, legVecZ src2, legVecZ tmp, legVecZ tmp2) %{ 5225 predicate(UseAVX > 2); 5226 match(Set dst (AddReductionVL src1 src2)); 5227 effect(TEMP tmp, TEMP tmp2); 5228 format %{ "vextracti64x4_high $tmp2,$src2\n\t" 5229 "vpaddq $tmp2,$tmp2,$src2\n\t" 5230 "vextracti128_high $tmp,$tmp2\n\t" 5231 "vpaddq $tmp2,$tmp2,$tmp\n\t" 5232 "pshufd $tmp,$tmp2,0xE\n\t" 5233 "vpaddq $tmp2,$tmp2,$tmp\n\t" 5234 "movdq $tmp,$src1\n\t" 5235 "vpaddq $tmp2,$tmp2,$tmp\n\t" 5236 "movdq $dst,$tmp2\t! add reduction8L" %} 5237 ins_encode %{ 5238 __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5239 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 5240 __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); 5241 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5242 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5243 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5244 __ movdq($tmp$$XMMRegister, $src1$$Register); 5245 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5246 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5247 %} 5248 ins_pipe( pipe_slow ); 5249 %} 5250 #endif 5251 5252 instruct rsadd2F_reduction_reg(regF dst, vecD src2, vecD tmp) %{ 5253 predicate(UseSSE >= 1 && UseAVX == 0); 5254 match(Set dst (AddReductionVF dst src2)); 5255 effect(TEMP dst, TEMP tmp); 5256 format %{ "addss $dst,$src2\n\t" 5257 "pshufd $tmp,$src2,0x01\n\t" 5258 "addss $dst,$tmp\t! add reduction2F" %} 5259 ins_encode %{ 5260 __ addss($dst$$XMMRegister, $src2$$XMMRegister); 5261 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5262 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 5263 %} 5264 ins_pipe( pipe_slow ); 5265 %} 5266 5267 instruct rvadd2F_reduction_reg(regF dst, vecD src2, vecD tmp) %{ 5268 predicate(UseAVX > 0); 5269 match(Set dst (AddReductionVF dst src2)); 5270 effect(TEMP dst, TEMP tmp); 5271 format %{ "vaddss $dst,$dst,$src2\n\t" 5272 "pshufd $tmp,$src2,0x01\n\t" 5273 "vaddss $dst,$dst,$tmp\t! add reduction2F" %} 5274 ins_encode %{ 5275 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5276 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5277 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5278 %} 5279 ins_pipe( pipe_slow ); 5280 %} 5281 5282 instruct rsadd4F_reduction_reg(regF dst, vecX src2, vecX tmp) %{ 5283 predicate(UseSSE >= 1 && UseAVX == 0); 5284 match(Set dst (AddReductionVF dst src2)); 5285 effect(TEMP dst, TEMP tmp); 5286 format %{ "addss $dst,$src2\n\t" 5287 "pshufd $tmp,$src2,0x01\n\t" 5288 "addss $dst,$tmp\n\t" 5289 "pshufd $tmp,$src2,0x02\n\t" 5290 "addss $dst,$tmp\n\t" 5291 "pshufd $tmp,$src2,0x03\n\t" 5292 "addss $dst,$tmp\t! add reduction4F" %} 5293 ins_encode %{ 5294 __ addss($dst$$XMMRegister, $src2$$XMMRegister); 5295 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5296 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 5297 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5298 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 5299 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5300 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 5301 %} 5302 ins_pipe( pipe_slow ); 5303 %} 5304 5305 instruct rvadd4F_reduction_reg(regF dst, vecX src2, vecX tmp) %{ 5306 predicate(UseAVX > 0); 5307 match(Set dst (AddReductionVF dst src2)); 5308 effect(TEMP tmp, TEMP dst); 5309 format %{ "vaddss $dst,dst,$src2\n\t" 5310 "pshufd $tmp,$src2,0x01\n\t" 5311 "vaddss $dst,$dst,$tmp\n\t" 5312 "pshufd $tmp,$src2,0x02\n\t" 5313 "vaddss $dst,$dst,$tmp\n\t" 5314 "pshufd $tmp,$src2,0x03\n\t" 5315 "vaddss $dst,$dst,$tmp\t! add reduction4F" %} 5316 ins_encode %{ 5317 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5318 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5319 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5320 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5321 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5322 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5323 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5324 %} 5325 ins_pipe( pipe_slow ); 5326 %} 5327 5328 instruct radd8F_reduction_reg(regF dst, vecY src2, vecY tmp, vecY tmp2) %{ 5329 predicate(UseAVX > 0); 5330 match(Set dst (AddReductionVF dst src2)); 5331 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5332 format %{ "vaddss $dst,$dst,$src2\n\t" 5333 "pshufd $tmp,$src2,0x01\n\t" 5334 "vaddss $dst,$dst,$tmp\n\t" 5335 "pshufd $tmp,$src2,0x02\n\t" 5336 "vaddss $dst,$dst,$tmp\n\t" 5337 "pshufd $tmp,$src2,0x03\n\t" 5338 "vaddss $dst,$dst,$tmp\n\t" 5339 "vextractf128_high $tmp2,$src2\n\t" 5340 "vaddss $dst,$dst,$tmp2\n\t" 5341 "pshufd $tmp,$tmp2,0x01\n\t" 5342 "vaddss $dst,$dst,$tmp\n\t" 5343 "pshufd $tmp,$tmp2,0x02\n\t" 5344 "vaddss $dst,$dst,$tmp\n\t" 5345 "pshufd $tmp,$tmp2,0x03\n\t" 5346 "vaddss $dst,$dst,$tmp\t! add reduction8F" %} 5347 ins_encode %{ 5348 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5349 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5350 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5351 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5352 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5353 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5354 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5355 __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5356 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5357 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5358 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5359 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5360 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5361 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5362 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5363 %} 5364 ins_pipe( pipe_slow ); 5365 %} 5366 5367 instruct radd16F_reduction_reg(regF dst, legVecZ src2, legVecZ tmp, legVecZ tmp2) %{ 5368 predicate(UseAVX > 2); 5369 match(Set dst (AddReductionVF dst src2)); 5370 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5371 format %{ "vaddss $dst,$dst,$src2\n\t" 5372 "pshufd $tmp,$src2,0x01\n\t" 5373 "vaddss $dst,$dst,$tmp\n\t" 5374 "pshufd $tmp,$src2,0x02\n\t" 5375 "vaddss $dst,$dst,$tmp\n\t" 5376 "pshufd $tmp,$src2,0x03\n\t" 5377 "vaddss $dst,$dst,$tmp\n\t" 5378 "vextractf32x4 $tmp2,$src2,0x1\n\t" 5379 "vaddss $dst,$dst,$tmp2\n\t" 5380 "pshufd $tmp,$tmp2,0x01\n\t" 5381 "vaddss $dst,$dst,$tmp\n\t" 5382 "pshufd $tmp,$tmp2,0x02\n\t" 5383 "vaddss $dst,$dst,$tmp\n\t" 5384 "pshufd $tmp,$tmp2,0x03\n\t" 5385 "vaddss $dst,$dst,$tmp\n\t" 5386 "vextractf32x4 $tmp2,$src2,0x2\n\t" 5387 "vaddss $dst,$dst,$tmp2\n\t" 5388 "pshufd $tmp,$tmp2,0x01\n\t" 5389 "vaddss $dst,$dst,$tmp\n\t" 5390 "pshufd $tmp,$tmp2,0x02\n\t" 5391 "vaddss $dst,$dst,$tmp\n\t" 5392 "pshufd $tmp,$tmp2,0x03\n\t" 5393 "vaddss $dst,$dst,$tmp\n\t" 5394 "vextractf32x4 $tmp2,$src2,0x3\n\t" 5395 "vaddss $dst,$dst,$tmp2\n\t" 5396 "pshufd $tmp,$tmp2,0x01\n\t" 5397 "vaddss $dst,$dst,$tmp\n\t" 5398 "pshufd $tmp,$tmp2,0x02\n\t" 5399 "vaddss $dst,$dst,$tmp\n\t" 5400 "pshufd $tmp,$tmp2,0x03\n\t" 5401 "vaddss $dst,$dst,$tmp\t! add reduction16F" %} 5402 ins_encode %{ 5403 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5404 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5405 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5406 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5407 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5408 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5409 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5410 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5411 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5412 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5413 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5414 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5415 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5416 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5417 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5418 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 5419 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5420 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5421 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5422 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5423 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5424 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5425 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5426 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 5427 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5428 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5429 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5430 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5431 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5432 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5433 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5434 %} 5435 ins_pipe( pipe_slow ); 5436 %} 5437 5438 instruct rsadd2D_reduction_reg(regD dst, vecX src2, vecX tmp) %{ 5439 predicate(UseSSE >= 1 && UseAVX == 0); 5440 match(Set dst (AddReductionVD dst src2)); 5441 effect(TEMP tmp, TEMP dst); 5442 format %{ "addsd $dst,$src2\n\t" 5443 "pshufd $tmp,$src2,0xE\n\t" 5444 "addsd $dst,$tmp\t! add reduction2D" %} 5445 ins_encode %{ 5446 __ addsd($dst$$XMMRegister, $src2$$XMMRegister); 5447 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5448 __ addsd($dst$$XMMRegister, $tmp$$XMMRegister); 5449 %} 5450 ins_pipe( pipe_slow ); 5451 %} 5452 5453 instruct rvadd2D_reduction_reg(regD dst, vecX src2, vecX tmp) %{ 5454 predicate(UseAVX > 0); 5455 match(Set dst (AddReductionVD dst src2)); 5456 effect(TEMP tmp, TEMP dst); 5457 format %{ "vaddsd $dst,$dst,$src2\n\t" 5458 "pshufd $tmp,$src2,0xE\n\t" 5459 "vaddsd $dst,$dst,$tmp\t! add reduction2D" %} 5460 ins_encode %{ 5461 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5462 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5463 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5464 %} 5465 ins_pipe( pipe_slow ); 5466 %} 5467 5468 instruct rvadd4D_reduction_reg(regD dst, vecY src2, vecX tmp, vecX tmp2) %{ 5469 predicate(UseAVX > 0); 5470 match(Set dst (AddReductionVD dst src2)); 5471 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5472 format %{ "vaddsd $dst,$dst,$src2\n\t" 5473 "pshufd $tmp,$src2,0xE\n\t" 5474 "vaddsd $dst,$dst,$tmp\n\t" 5475 "vextractf128 $tmp2,$src2,0x1\n\t" 5476 "vaddsd $dst,$dst,$tmp2\n\t" 5477 "pshufd $tmp,$tmp2,0xE\n\t" 5478 "vaddsd $dst,$dst,$tmp\t! add reduction4D" %} 5479 ins_encode %{ 5480 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5481 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5482 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5483 __ vextractf128($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5484 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5485 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5486 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5487 %} 5488 ins_pipe( pipe_slow ); 5489 %} 5490 5491 instruct rvadd8D_reduction_reg(regD dst, legVecZ src2, legVecZ tmp, legVecZ tmp2) %{ 5492 predicate(UseAVX > 2); 5493 match(Set dst (AddReductionVD dst src2)); 5494 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5495 format %{ "vaddsd $dst,$dst,$src2\n\t" 5496 "pshufd $tmp,$src2,0xE\n\t" 5497 "vaddsd $dst,$dst,$tmp\n\t" 5498 "vextractf32x4 $tmp2,$src2,0x1\n\t" 5499 "vaddsd $dst,$dst,$tmp2\n\t" 5500 "pshufd $tmp,$tmp2,0xE\n\t" 5501 "vaddsd $dst,$dst,$tmp\n\t" 5502 "vextractf32x4 $tmp2,$src2,0x2\n\t" 5503 "vaddsd $dst,$dst,$tmp2\n\t" 5504 "pshufd $tmp,$tmp2,0xE\n\t" 5505 "vaddsd $dst,$dst,$tmp\n\t" 5506 "vextractf32x4 $tmp2,$src2,0x3\n\t" 5507 "vaddsd $dst,$dst,$tmp2\n\t" 5508 "pshufd $tmp,$tmp2,0xE\n\t" 5509 "vaddsd $dst,$dst,$tmp\t! add reduction8D" %} 5510 ins_encode %{ 5511 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5512 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5513 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5514 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5515 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5516 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5517 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5518 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 5519 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5520 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5521 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5522 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 5523 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5524 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5525 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5526 %} 5527 ins_pipe( pipe_slow ); 5528 %} 5529 5530 instruct rsmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, vecD tmp, vecD tmp2) %{ 5531 predicate(UseSSE > 3 && UseAVX == 0); 5532 match(Set dst (MulReductionVI src1 src2)); 5533 effect(TEMP tmp, TEMP tmp2); 5534 format %{ "pshufd $tmp2,$src2,0x1\n\t" 5535 "pmulld $tmp2,$src2\n\t" 5536 "movd $tmp,$src1\n\t" 5537 "pmulld $tmp2,$tmp\n\t" 5538 "movd $dst,$tmp2\t! mul reduction2I" %} 5539 ins_encode %{ 5540 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5541 __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister); 5542 __ movdl($tmp$$XMMRegister, $src1$$Register); 5543 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 5544 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5545 %} 5546 ins_pipe( pipe_slow ); 5547 %} 5548 5549 instruct rvmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, vecD tmp, vecD tmp2) %{ 5550 predicate(UseAVX > 0); 5551 match(Set dst (MulReductionVI src1 src2)); 5552 effect(TEMP tmp, TEMP tmp2); 5553 format %{ "pshufd $tmp2,$src2,0x1\n\t" 5554 "vpmulld $tmp,$src2,$tmp2\n\t" 5555 "movd $tmp2,$src1\n\t" 5556 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5557 "movd $dst,$tmp2\t! mul reduction2I" %} 5558 ins_encode %{ 5559 int vector_len = 0; 5560 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5561 __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5562 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5563 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5564 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5565 %} 5566 ins_pipe( pipe_slow ); 5567 %} 5568 5569 instruct rsmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, vecX tmp, vecX tmp2) %{ 5570 predicate(UseSSE > 3 && UseAVX == 0); 5571 match(Set dst (MulReductionVI src1 src2)); 5572 effect(TEMP tmp, TEMP tmp2); 5573 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5574 "pmulld $tmp2,$src2\n\t" 5575 "pshufd $tmp,$tmp2,0x1\n\t" 5576 "pmulld $tmp2,$tmp\n\t" 5577 "movd $tmp,$src1\n\t" 5578 "pmulld $tmp2,$tmp\n\t" 5579 "movd $dst,$tmp2\t! mul reduction4I" %} 5580 ins_encode %{ 5581 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5582 __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister); 5583 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x1); 5584 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 5585 __ movdl($tmp$$XMMRegister, $src1$$Register); 5586 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 5587 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5588 %} 5589 ins_pipe( pipe_slow ); 5590 %} 5591 5592 instruct rvmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, vecX tmp, vecX tmp2) %{ 5593 predicate(UseAVX > 0); 5594 match(Set dst (MulReductionVI src1 src2)); 5595 effect(TEMP tmp, TEMP tmp2); 5596 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5597 "vpmulld $tmp,$src2,$tmp2\n\t" 5598 "pshufd $tmp2,$tmp,0x1\n\t" 5599 "vpmulld $tmp,$tmp,$tmp2\n\t" 5600 "movd $tmp2,$src1\n\t" 5601 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5602 "movd $dst,$tmp2\t! mul reduction4I" %} 5603 ins_encode %{ 5604 int vector_len = 0; 5605 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5606 __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5607 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5608 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5609 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5610 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5611 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5612 %} 5613 ins_pipe( pipe_slow ); 5614 %} 5615 5616 instruct rvmul8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, vecY tmp, vecY tmp2) %{ 5617 predicate(UseAVX > 1); 5618 match(Set dst (MulReductionVI src1 src2)); 5619 effect(TEMP tmp, TEMP tmp2); 5620 format %{ "vextracti128_high $tmp,$src2\n\t" 5621 "vpmulld $tmp,$tmp,$src2\n\t" 5622 "pshufd $tmp2,$tmp,0xE\n\t" 5623 "vpmulld $tmp,$tmp,$tmp2\n\t" 5624 "pshufd $tmp2,$tmp,0x1\n\t" 5625 "vpmulld $tmp,$tmp,$tmp2\n\t" 5626 "movd $tmp2,$src1\n\t" 5627 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5628 "movd $dst,$tmp2\t! mul reduction8I" %} 5629 ins_encode %{ 5630 int vector_len = 0; 5631 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 5632 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 5633 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5634 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5635 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5636 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5637 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5638 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5639 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5640 %} 5641 ins_pipe( pipe_slow ); 5642 %} 5643 5644 instruct rvmul16I_reduction_reg(rRegI dst, rRegI src1, legVecZ src2, legVecZ tmp, legVecZ tmp2, legVecZ tmp3) %{ 5645 predicate(UseAVX > 2); 5646 match(Set dst (MulReductionVI src1 src2)); 5647 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 5648 format %{ "vextracti64x4_high $tmp3,$src2\n\t" 5649 "vpmulld $tmp3,$tmp3,$src2\n\t" 5650 "vextracti128_high $tmp,$tmp3\n\t" 5651 "vpmulld $tmp,$tmp,$src2\n\t" 5652 "pshufd $tmp2,$tmp,0xE\n\t" 5653 "vpmulld $tmp,$tmp,$tmp2\n\t" 5654 "pshufd $tmp2,$tmp,0x1\n\t" 5655 "vpmulld $tmp,$tmp,$tmp2\n\t" 5656 "movd $tmp2,$src1\n\t" 5657 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5658 "movd $dst,$tmp2\t! mul reduction16I" %} 5659 ins_encode %{ 5660 __ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister); 5661 __ vpmulld($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); 5662 __ vextracti128_high($tmp$$XMMRegister, $tmp3$$XMMRegister); 5663 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); 5664 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5665 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5666 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5667 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5668 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5669 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5670 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5671 %} 5672 ins_pipe( pipe_slow ); 5673 %} 5674 5675 #ifdef _LP64 5676 instruct rvmul2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, vecX tmp, vecX tmp2) %{ 5677 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 5678 match(Set dst (MulReductionVL src1 src2)); 5679 effect(TEMP tmp, TEMP tmp2); 5680 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5681 "vpmullq $tmp,$src2,$tmp2\n\t" 5682 "movdq $tmp2,$src1\n\t" 5683 "vpmullq $tmp2,$tmp,$tmp2\n\t" 5684 "movdq $dst,$tmp2\t! mul reduction2L" %} 5685 ins_encode %{ 5686 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5687 __ vpmullq($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, 0); 5688 __ movdq($tmp2$$XMMRegister, $src1$$Register); 5689 __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5690 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5691 %} 5692 ins_pipe( pipe_slow ); 5693 %} 5694 5695 instruct rvmul4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, vecY tmp, vecY tmp2) %{ 5696 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 5697 match(Set dst (MulReductionVL src1 src2)); 5698 effect(TEMP tmp, TEMP tmp2); 5699 format %{ "vextracti128_high $tmp,$src2\n\t" 5700 "vpmullq $tmp2,$tmp,$src2\n\t" 5701 "pshufd $tmp,$tmp2,0xE\n\t" 5702 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5703 "movdq $tmp,$src1\n\t" 5704 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5705 "movdq $dst,$tmp2\t! mul reduction4L" %} 5706 ins_encode %{ 5707 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 5708 __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); 5709 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5710 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5711 __ movdq($tmp$$XMMRegister, $src1$$Register); 5712 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5713 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5714 %} 5715 ins_pipe( pipe_slow ); 5716 %} 5717 5718 instruct rvmul8L_reduction_reg(rRegL dst, rRegL src1, legVecZ src2, legVecZ tmp, legVecZ tmp2) %{ 5719 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 5720 match(Set dst (MulReductionVL src1 src2)); 5721 effect(TEMP tmp, TEMP tmp2); 5722 format %{ "vextracti64x4_high $tmp2,$src2\n\t" 5723 "vpmullq $tmp2,$tmp2,$src2\n\t" 5724 "vextracti128_high $tmp,$tmp2\n\t" 5725 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5726 "pshufd $tmp,$tmp2,0xE\n\t" 5727 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5728 "movdq $tmp,$src1\n\t" 5729 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5730 "movdq $dst,$tmp2\t! mul reduction8L" %} 5731 ins_encode %{ 5732 __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5733 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 5734 __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); 5735 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5736 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5737 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5738 __ movdq($tmp$$XMMRegister, $src1$$Register); 5739 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5740 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5741 %} 5742 ins_pipe( pipe_slow ); 5743 %} 5744 #endif 5745 5746 instruct rsmul2F_reduction(regF dst, vecD src2, vecD tmp) %{ 5747 predicate(UseSSE >= 1 && UseAVX == 0); 5748 match(Set dst (MulReductionVF dst src2)); 5749 effect(TEMP dst, TEMP tmp); 5750 format %{ "mulss $dst,$src2\n\t" 5751 "pshufd $tmp,$src2,0x01\n\t" 5752 "mulss $dst,$tmp\t! mul reduction2F" %} 5753 ins_encode %{ 5754 __ mulss($dst$$XMMRegister, $src2$$XMMRegister); 5755 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5756 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5757 %} 5758 ins_pipe( pipe_slow ); 5759 %} 5760 5761 instruct rvmul2F_reduction_reg(regF dst, vecD src2, vecD tmp) %{ 5762 predicate(UseAVX > 0); 5763 match(Set dst (MulReductionVF dst src2)); 5764 effect(TEMP tmp, TEMP dst); 5765 format %{ "vmulss $dst,$dst,$src2\n\t" 5766 "pshufd $tmp,$src2,0x01\n\t" 5767 "vmulss $dst,$dst,$tmp\t! mul reduction2F" %} 5768 ins_encode %{ 5769 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5770 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5771 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5772 %} 5773 ins_pipe( pipe_slow ); 5774 %} 5775 5776 instruct rsmul4F_reduction_reg(regF dst, vecX src2, vecX tmp) %{ 5777 predicate(UseSSE >= 1 && UseAVX == 0); 5778 match(Set dst (MulReductionVF dst src2)); 5779 effect(TEMP dst, TEMP tmp); 5780 format %{ "mulss $dst,$src2\n\t" 5781 "pshufd $tmp,$src2,0x01\n\t" 5782 "mulss $dst,$tmp\n\t" 5783 "pshufd $tmp,$src2,0x02\n\t" 5784 "mulss $dst,$tmp\n\t" 5785 "pshufd $tmp,$src2,0x03\n\t" 5786 "mulss $dst,$tmp\t! mul reduction4F" %} 5787 ins_encode %{ 5788 __ mulss($dst$$XMMRegister, $src2$$XMMRegister); 5789 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5790 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5791 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5792 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5793 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5794 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5795 %} 5796 ins_pipe( pipe_slow ); 5797 %} 5798 5799 instruct rvmul4F_reduction_reg(regF dst, vecX src2, vecX tmp) %{ 5800 predicate(UseAVX > 0); 5801 match(Set dst (MulReductionVF dst src2)); 5802 effect(TEMP tmp, TEMP dst); 5803 format %{ "vmulss $dst,$dst,$src2\n\t" 5804 "pshufd $tmp,$src2,0x01\n\t" 5805 "vmulss $dst,$dst,$tmp\n\t" 5806 "pshufd $tmp,$src2,0x02\n\t" 5807 "vmulss $dst,$dst,$tmp\n\t" 5808 "pshufd $tmp,$src2,0x03\n\t" 5809 "vmulss $dst,$dst,$tmp\t! mul reduction4F" %} 5810 ins_encode %{ 5811 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5812 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5813 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5814 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5815 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5816 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5817 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5818 %} 5819 ins_pipe( pipe_slow ); 5820 %} 5821 5822 instruct rvmul8F_reduction_reg(regF dst, vecY src2, vecY tmp, vecY tmp2) %{ 5823 predicate(UseAVX > 0); 5824 match(Set dst (MulReductionVF dst src2)); 5825 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5826 format %{ "vmulss $dst,$dst,$src2\n\t" 5827 "pshufd $tmp,$src2,0x01\n\t" 5828 "vmulss $dst,$dst,$tmp\n\t" 5829 "pshufd $tmp,$src2,0x02\n\t" 5830 "vmulss $dst,$dst,$tmp\n\t" 5831 "pshufd $tmp,$src2,0x03\n\t" 5832 "vmulss $dst,$dst,$tmp\n\t" 5833 "vextractf128_high $tmp2,$src2\n\t" 5834 "vmulss $dst,$dst,$tmp2\n\t" 5835 "pshufd $tmp,$tmp2,0x01\n\t" 5836 "vmulss $dst,$dst,$tmp\n\t" 5837 "pshufd $tmp,$tmp2,0x02\n\t" 5838 "vmulss $dst,$dst,$tmp\n\t" 5839 "pshufd $tmp,$tmp2,0x03\n\t" 5840 "vmulss $dst,$dst,$tmp\t! mul reduction8F" %} 5841 ins_encode %{ 5842 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5843 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5844 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5845 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5846 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5847 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5848 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5849 __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5850 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5851 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5852 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5853 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5854 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5855 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5856 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5857 %} 5858 ins_pipe( pipe_slow ); 5859 %} 5860 5861 instruct rvmul16F_reduction_reg(regF dst, legVecZ src2, legVecZ tmp, legVecZ tmp2) %{ 5862 predicate(UseAVX > 2); 5863 match(Set dst (MulReductionVF dst src2)); 5864 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5865 format %{ "vmulss $dst,$dst,$src2\n\t" 5866 "pshufd $tmp,$src2,0x01\n\t" 5867 "vmulss $dst,$dst,$tmp\n\t" 5868 "pshufd $tmp,$src2,0x02\n\t" 5869 "vmulss $dst,$dst,$tmp\n\t" 5870 "pshufd $tmp,$src2,0x03\n\t" 5871 "vmulss $dst,$dst,$tmp\n\t" 5872 "vextractf32x4 $tmp2,$src2,0x1\n\t" 5873 "vmulss $dst,$dst,$tmp2\n\t" 5874 "pshufd $tmp,$tmp2,0x01\n\t" 5875 "vmulss $dst,$dst,$tmp\n\t" 5876 "pshufd $tmp,$tmp2,0x02\n\t" 5877 "vmulss $dst,$dst,$tmp\n\t" 5878 "pshufd $tmp,$tmp2,0x03\n\t" 5879 "vmulss $dst,$dst,$tmp\n\t" 5880 "vextractf32x4 $tmp2,$src2,0x2\n\t" 5881 "vmulss $dst,$dst,$tmp2\n\t" 5882 "pshufd $tmp,$tmp2,0x01\n\t" 5883 "vmulss $dst,$dst,$tmp\n\t" 5884 "pshufd $tmp,$tmp2,0x02\n\t" 5885 "vmulss $dst,$dst,$tmp\n\t" 5886 "pshufd $tmp,$tmp2,0x03\n\t" 5887 "vmulss $dst,$dst,$tmp\n\t" 5888 "vextractf32x4 $tmp2,$src2,0x3\n\t" 5889 "vmulss $dst,$dst,$tmp2\n\t" 5890 "pshufd $tmp,$tmp2,0x01\n\t" 5891 "vmulss $dst,$dst,$tmp\n\t" 5892 "pshufd $tmp,$tmp2,0x02\n\t" 5893 "vmulss $dst,$dst,$tmp\n\t" 5894 "pshufd $tmp,$tmp2,0x03\n\t" 5895 "vmulss $dst,$dst,$tmp\t! mul reduction16F" %} 5896 ins_encode %{ 5897 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5898 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5899 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5900 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5901 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5902 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5903 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5904 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5905 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5906 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5907 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5908 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5909 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5910 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5911 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5912 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 5913 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5914 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5915 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5916 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5917 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5918 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5919 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5920 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 5921 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5922 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5923 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5924 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5925 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5926 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5927 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5928 %} 5929 ins_pipe( pipe_slow ); 5930 %} 5931 5932 instruct rsmul2D_reduction_reg(regD dst, vecX src2, vecX tmp) %{ 5933 predicate(UseSSE >= 1 && UseAVX == 0); 5934 match(Set dst (MulReductionVD dst src2)); 5935 effect(TEMP dst, TEMP tmp); 5936 format %{ "mulsd $dst,$src2\n\t" 5937 "pshufd $tmp,$src2,0xE\n\t" 5938 "mulsd $dst,$tmp\t! mul reduction2D" %} 5939 ins_encode %{ 5940 __ mulsd($dst$$XMMRegister, $src2$$XMMRegister); 5941 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5942 __ mulsd($dst$$XMMRegister, $tmp$$XMMRegister); 5943 %} 5944 ins_pipe( pipe_slow ); 5945 %} 5946 5947 instruct rvmul2D_reduction_reg(regD dst, vecX src2, vecX tmp) %{ 5948 predicate(UseAVX > 0); 5949 match(Set dst (MulReductionVD dst src2)); 5950 effect(TEMP tmp, TEMP dst); 5951 format %{ "vmulsd $dst,$dst,$src2\n\t" 5952 "pshufd $tmp,$src2,0xE\n\t" 5953 "vmulsd $dst,$dst,$tmp\t! mul reduction2D" %} 5954 ins_encode %{ 5955 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5956 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5957 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5958 %} 5959 ins_pipe( pipe_slow ); 5960 %} 5961 5962 instruct rvmul4D_reduction_reg(regD dst, vecY src2, vecY tmp, vecY tmp2) %{ 5963 predicate(UseAVX > 0); 5964 match(Set dst (MulReductionVD dst src2)); 5965 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5966 format %{ "vmulsd $dst,$dst,$src2\n\t" 5967 "pshufd $tmp,$src2,0xE\n\t" 5968 "vmulsd $dst,$dst,$tmp\n\t" 5969 "vextractf128_high $tmp2,$src2\n\t" 5970 "vmulsd $dst,$dst,$tmp2\n\t" 5971 "pshufd $tmp,$tmp2,0xE\n\t" 5972 "vmulsd $dst,$dst,$tmp\t! mul reduction4D" %} 5973 ins_encode %{ 5974 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5975 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5976 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5977 __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5978 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5979 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5980 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5981 %} 5982 ins_pipe( pipe_slow ); 5983 %} 5984 5985 instruct rvmul8D_reduction_reg(regD dst, legVecZ src2, legVecZ tmp, legVecZ tmp2) %{ 5986 predicate(UseAVX > 2); 5987 match(Set dst (MulReductionVD dst src2)); 5988 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5989 format %{ "vmulsd $dst,$dst,$src2\n\t" 5990 "pshufd $tmp,$src2,0xE\n\t" 5991 "vmulsd $dst,$dst,$tmp\n\t" 5992 "vextractf32x4 $tmp2,$src2,0x1\n\t" 5993 "vmulsd $dst,$dst,$tmp2\n\t" 5994 "pshufd $tmp,$src2,0xE\n\t" 5995 "vmulsd $dst,$dst,$tmp\n\t" 5996 "vextractf32x4 $tmp2,$src2,0x2\n\t" 5997 "vmulsd $dst,$dst,$tmp2\n\t" 5998 "pshufd $tmp,$tmp2,0xE\n\t" 5999 "vmulsd $dst,$dst,$tmp\n\t" 6000 "vextractf32x4 $tmp2,$src2,0x3\n\t" 6001 "vmulsd $dst,$dst,$tmp2\n\t" 6002 "pshufd $tmp,$tmp2,0xE\n\t" 6003 "vmulsd $dst,$dst,$tmp\t! mul reduction8D" %} 6004 ins_encode %{ 6005 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 6006 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 6007 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6008 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 6009 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 6010 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 6011 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6012 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 6013 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 6014 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 6015 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6016 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 6017 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 6018 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 6019 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6020 %} 6021 ins_pipe( pipe_slow ); 6022 %} 6023 6024 // ====================VECTOR ARITHMETIC======================================= 6025 6026 // --------------------------------- ADD -------------------------------------- 6027 6028 // Bytes vector add 6029 instruct vadd4B(vecS dst, vecS src) %{ 6030 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 6031 match(Set dst (AddVB dst src)); 6032 format %{ "paddb $dst,$src\t! add packed4B" %} 6033 ins_encode %{ 6034 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 6035 %} 6036 ins_pipe( pipe_slow ); 6037 %} 6038 6039 instruct vadd4B_reg(vecS dst, vecS src1, vecS src2) %{ 6040 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6041 match(Set dst (AddVB src1 src2)); 6042 format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %} 6043 ins_encode %{ 6044 int vector_len = 0; 6045 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6046 %} 6047 ins_pipe( pipe_slow ); 6048 %} 6049 6050 6051 instruct vadd4B_mem(vecS dst, vecS src, memory mem) %{ 6052 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6053 match(Set dst (AddVB src (LoadVector mem))); 6054 format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %} 6055 ins_encode %{ 6056 int vector_len = 0; 6057 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6058 %} 6059 ins_pipe( pipe_slow ); 6060 %} 6061 6062 instruct vadd8B(vecD dst, vecD src) %{ 6063 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 6064 match(Set dst (AddVB dst src)); 6065 format %{ "paddb $dst,$src\t! add packed8B" %} 6066 ins_encode %{ 6067 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 6068 %} 6069 ins_pipe( pipe_slow ); 6070 %} 6071 6072 instruct vadd8B_reg(vecD dst, vecD src1, vecD src2) %{ 6073 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6074 match(Set dst (AddVB src1 src2)); 6075 format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %} 6076 ins_encode %{ 6077 int vector_len = 0; 6078 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6079 %} 6080 ins_pipe( pipe_slow ); 6081 %} 6082 6083 6084 instruct vadd8B_mem(vecD dst, vecD src, memory mem) %{ 6085 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6086 match(Set dst (AddVB src (LoadVector mem))); 6087 format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %} 6088 ins_encode %{ 6089 int vector_len = 0; 6090 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6091 %} 6092 ins_pipe( pipe_slow ); 6093 %} 6094 6095 instruct vadd16B(vecX dst, vecX src) %{ 6096 predicate(UseAVX == 0 && n->as_Vector()->length() == 16); 6097 match(Set dst (AddVB dst src)); 6098 format %{ "paddb $dst,$src\t! add packed16B" %} 6099 ins_encode %{ 6100 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 6101 %} 6102 ins_pipe( pipe_slow ); 6103 %} 6104 6105 instruct vadd16B_reg(vecX dst, vecX src1, vecX src2) %{ 6106 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 6107 match(Set dst (AddVB src1 src2)); 6108 format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %} 6109 ins_encode %{ 6110 int vector_len = 0; 6111 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6112 %} 6113 ins_pipe( pipe_slow ); 6114 %} 6115 6116 instruct vadd16B_mem(vecX dst, vecX src, memory mem) %{ 6117 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 6118 match(Set dst (AddVB src (LoadVector mem))); 6119 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} 6120 ins_encode %{ 6121 int vector_len = 0; 6122 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6123 %} 6124 ins_pipe( pipe_slow ); 6125 %} 6126 6127 instruct vadd32B_reg(vecY dst, vecY src1, vecY src2) %{ 6128 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 6129 match(Set dst (AddVB src1 src2)); 6130 format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %} 6131 ins_encode %{ 6132 int vector_len = 1; 6133 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6134 %} 6135 ins_pipe( pipe_slow ); 6136 %} 6137 6138 instruct vadd32B_mem(vecY dst, vecY src, memory mem) %{ 6139 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 6140 match(Set dst (AddVB src (LoadVector mem))); 6141 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} 6142 ins_encode %{ 6143 int vector_len = 1; 6144 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6145 %} 6146 ins_pipe( pipe_slow ); 6147 %} 6148 6149 instruct vadd64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6150 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 6151 match(Set dst (AddVB src1 src2)); 6152 format %{ "vpaddb $dst,$src1,$src2\t! add packed64B" %} 6153 ins_encode %{ 6154 int vector_len = 2; 6155 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6156 %} 6157 ins_pipe( pipe_slow ); 6158 %} 6159 6160 instruct vadd64B_mem(vecZ dst, vecZ src, memory mem) %{ 6161 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 6162 match(Set dst (AddVB src (LoadVector mem))); 6163 format %{ "vpaddb $dst,$src,$mem\t! add packed64B" %} 6164 ins_encode %{ 6165 int vector_len = 2; 6166 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6167 %} 6168 ins_pipe( pipe_slow ); 6169 %} 6170 6171 // Shorts/Chars vector add 6172 instruct vadd2S(vecS dst, vecS src) %{ 6173 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 6174 match(Set dst (AddVS dst src)); 6175 format %{ "paddw $dst,$src\t! add packed2S" %} 6176 ins_encode %{ 6177 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 6178 %} 6179 ins_pipe( pipe_slow ); 6180 %} 6181 6182 instruct vadd2S_reg(vecS dst, vecS src1, vecS src2) %{ 6183 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6184 match(Set dst (AddVS src1 src2)); 6185 format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %} 6186 ins_encode %{ 6187 int vector_len = 0; 6188 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6189 %} 6190 ins_pipe( pipe_slow ); 6191 %} 6192 6193 instruct vadd2S_mem(vecS dst, vecS src, memory mem) %{ 6194 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6195 match(Set dst (AddVS src (LoadVector mem))); 6196 format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %} 6197 ins_encode %{ 6198 int vector_len = 0; 6199 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6200 %} 6201 ins_pipe( pipe_slow ); 6202 %} 6203 6204 instruct vadd4S(vecD dst, vecD src) %{ 6205 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 6206 match(Set dst (AddVS dst src)); 6207 format %{ "paddw $dst,$src\t! add packed4S" %} 6208 ins_encode %{ 6209 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 6210 %} 6211 ins_pipe( pipe_slow ); 6212 %} 6213 6214 instruct vadd4S_reg(vecD dst, vecD src1, vecD src2) %{ 6215 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6216 match(Set dst (AddVS src1 src2)); 6217 format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %} 6218 ins_encode %{ 6219 int vector_len = 0; 6220 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6221 %} 6222 ins_pipe( pipe_slow ); 6223 %} 6224 6225 instruct vadd4S_mem(vecD dst, vecD src, memory mem) %{ 6226 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6227 match(Set dst (AddVS src (LoadVector mem))); 6228 format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %} 6229 ins_encode %{ 6230 int vector_len = 0; 6231 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6232 %} 6233 ins_pipe( pipe_slow ); 6234 %} 6235 6236 instruct vadd8S(vecX dst, vecX src) %{ 6237 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 6238 match(Set dst (AddVS dst src)); 6239 format %{ "paddw $dst,$src\t! add packed8S" %} 6240 ins_encode %{ 6241 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 6242 %} 6243 ins_pipe( pipe_slow ); 6244 %} 6245 6246 instruct vadd8S_reg(vecX dst, vecX src1, vecX src2) %{ 6247 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6248 match(Set dst (AddVS src1 src2)); 6249 format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %} 6250 ins_encode %{ 6251 int vector_len = 0; 6252 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6253 %} 6254 ins_pipe( pipe_slow ); 6255 %} 6256 6257 instruct vadd8S_mem(vecX dst, vecX src, memory mem) %{ 6258 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6259 match(Set dst (AddVS src (LoadVector mem))); 6260 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} 6261 ins_encode %{ 6262 int vector_len = 0; 6263 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6264 %} 6265 ins_pipe( pipe_slow ); 6266 %} 6267 6268 instruct vadd16S_reg(vecY dst, vecY src1, vecY src2) %{ 6269 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 6270 match(Set dst (AddVS src1 src2)); 6271 format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %} 6272 ins_encode %{ 6273 int vector_len = 1; 6274 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6275 %} 6276 ins_pipe( pipe_slow ); 6277 %} 6278 6279 instruct vadd16S_mem(vecY dst, vecY src, memory mem) %{ 6280 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 6281 match(Set dst (AddVS src (LoadVector mem))); 6282 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} 6283 ins_encode %{ 6284 int vector_len = 1; 6285 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6286 %} 6287 ins_pipe( pipe_slow ); 6288 %} 6289 6290 instruct vadd32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6291 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6292 match(Set dst (AddVS src1 src2)); 6293 format %{ "vpaddw $dst,$src1,$src2\t! add packed32S" %} 6294 ins_encode %{ 6295 int vector_len = 2; 6296 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6297 %} 6298 ins_pipe( pipe_slow ); 6299 %} 6300 6301 instruct vadd32S_mem(vecZ dst, vecZ src, memory mem) %{ 6302 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6303 match(Set dst (AddVS src (LoadVector mem))); 6304 format %{ "vpaddw $dst,$src,$mem\t! add packed32S" %} 6305 ins_encode %{ 6306 int vector_len = 2; 6307 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6308 %} 6309 ins_pipe( pipe_slow ); 6310 %} 6311 6312 // Integers vector add 6313 instruct vadd2I(vecD dst, vecD src) %{ 6314 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 6315 match(Set dst (AddVI dst src)); 6316 format %{ "paddd $dst,$src\t! add packed2I" %} 6317 ins_encode %{ 6318 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 6319 %} 6320 ins_pipe( pipe_slow ); 6321 %} 6322 6323 instruct vadd2I_reg(vecD dst, vecD src1, vecD src2) %{ 6324 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6325 match(Set dst (AddVI src1 src2)); 6326 format %{ "vpaddd $dst,$src1,$src2\t! add packed2I" %} 6327 ins_encode %{ 6328 int vector_len = 0; 6329 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6330 %} 6331 ins_pipe( pipe_slow ); 6332 %} 6333 6334 instruct vadd2I_mem(vecD dst, vecD src, memory mem) %{ 6335 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6336 match(Set dst (AddVI src (LoadVector mem))); 6337 format %{ "vpaddd $dst,$src,$mem\t! add packed2I" %} 6338 ins_encode %{ 6339 int vector_len = 0; 6340 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6341 %} 6342 ins_pipe( pipe_slow ); 6343 %} 6344 6345 instruct vadd4I(vecX dst, vecX src) %{ 6346 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 6347 match(Set dst (AddVI dst src)); 6348 format %{ "paddd $dst,$src\t! add packed4I" %} 6349 ins_encode %{ 6350 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 6351 %} 6352 ins_pipe( pipe_slow ); 6353 %} 6354 6355 instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{ 6356 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6357 match(Set dst (AddVI src1 src2)); 6358 format %{ "vpaddd $dst,$src1,$src2\t! add packed4I" %} 6359 ins_encode %{ 6360 int vector_len = 0; 6361 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6362 %} 6363 ins_pipe( pipe_slow ); 6364 %} 6365 6366 instruct vadd4I_mem(vecX dst, vecX src, memory mem) %{ 6367 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6368 match(Set dst (AddVI src (LoadVector mem))); 6369 format %{ "vpaddd $dst,$src,$mem\t! add packed4I" %} 6370 ins_encode %{ 6371 int vector_len = 0; 6372 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6373 %} 6374 ins_pipe( pipe_slow ); 6375 %} 6376 6377 instruct vadd8I_reg(vecY dst, vecY src1, vecY src2) %{ 6378 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 6379 match(Set dst (AddVI src1 src2)); 6380 format %{ "vpaddd $dst,$src1,$src2\t! add packed8I" %} 6381 ins_encode %{ 6382 int vector_len = 1; 6383 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6384 %} 6385 ins_pipe( pipe_slow ); 6386 %} 6387 6388 instruct vadd8I_mem(vecY dst, vecY src, memory mem) %{ 6389 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 6390 match(Set dst (AddVI src (LoadVector mem))); 6391 format %{ "vpaddd $dst,$src,$mem\t! add packed8I" %} 6392 ins_encode %{ 6393 int vector_len = 1; 6394 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6395 %} 6396 ins_pipe( pipe_slow ); 6397 %} 6398 6399 instruct vadd16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6400 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6401 match(Set dst (AddVI src1 src2)); 6402 format %{ "vpaddd $dst,$src1,$src2\t! add packed16I" %} 6403 ins_encode %{ 6404 int vector_len = 2; 6405 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6406 %} 6407 ins_pipe( pipe_slow ); 6408 %} 6409 6410 instruct vadd16I_mem(vecZ dst, vecZ src, memory mem) %{ 6411 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6412 match(Set dst (AddVI src (LoadVector mem))); 6413 format %{ "vpaddd $dst,$src,$mem\t! add packed16I" %} 6414 ins_encode %{ 6415 int vector_len = 2; 6416 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6417 %} 6418 ins_pipe( pipe_slow ); 6419 %} 6420 6421 // Longs vector add 6422 instruct vadd2L(vecX dst, vecX src) %{ 6423 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 6424 match(Set dst (AddVL dst src)); 6425 format %{ "paddq $dst,$src\t! add packed2L" %} 6426 ins_encode %{ 6427 __ paddq($dst$$XMMRegister, $src$$XMMRegister); 6428 %} 6429 ins_pipe( pipe_slow ); 6430 %} 6431 6432 instruct vadd2L_reg(vecX dst, vecX src1, vecX src2) %{ 6433 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6434 match(Set dst (AddVL src1 src2)); 6435 format %{ "vpaddq $dst,$src1,$src2\t! add packed2L" %} 6436 ins_encode %{ 6437 int vector_len = 0; 6438 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6439 %} 6440 ins_pipe( pipe_slow ); 6441 %} 6442 6443 instruct vadd2L_mem(vecX dst, vecX src, memory mem) %{ 6444 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6445 match(Set dst (AddVL src (LoadVector mem))); 6446 format %{ "vpaddq $dst,$src,$mem\t! add packed2L" %} 6447 ins_encode %{ 6448 int vector_len = 0; 6449 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6450 %} 6451 ins_pipe( pipe_slow ); 6452 %} 6453 6454 instruct vadd4L_reg(vecY dst, vecY src1, vecY src2) %{ 6455 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 6456 match(Set dst (AddVL src1 src2)); 6457 format %{ "vpaddq $dst,$src1,$src2\t! add packed4L" %} 6458 ins_encode %{ 6459 int vector_len = 1; 6460 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6461 %} 6462 ins_pipe( pipe_slow ); 6463 %} 6464 6465 instruct vadd4L_mem(vecY dst, vecY src, memory mem) %{ 6466 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 6467 match(Set dst (AddVL src (LoadVector mem))); 6468 format %{ "vpaddq $dst,$src,$mem\t! add packed4L" %} 6469 ins_encode %{ 6470 int vector_len = 1; 6471 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6472 %} 6473 ins_pipe( pipe_slow ); 6474 %} 6475 6476 instruct vadd8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6477 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6478 match(Set dst (AddVL src1 src2)); 6479 format %{ "vpaddq $dst,$src1,$src2\t! add packed8L" %} 6480 ins_encode %{ 6481 int vector_len = 2; 6482 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6483 %} 6484 ins_pipe( pipe_slow ); 6485 %} 6486 6487 instruct vadd8L_mem(vecZ dst, vecZ src, memory mem) %{ 6488 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6489 match(Set dst (AddVL src (LoadVector mem))); 6490 format %{ "vpaddq $dst,$src,$mem\t! add packed8L" %} 6491 ins_encode %{ 6492 int vector_len = 2; 6493 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6494 %} 6495 ins_pipe( pipe_slow ); 6496 %} 6497 6498 // Floats vector add 6499 instruct vadd2F(vecD dst, vecD src) %{ 6500 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 6501 match(Set dst (AddVF dst src)); 6502 format %{ "addps $dst,$src\t! add packed2F" %} 6503 ins_encode %{ 6504 __ addps($dst$$XMMRegister, $src$$XMMRegister); 6505 %} 6506 ins_pipe( pipe_slow ); 6507 %} 6508 6509 instruct vadd2F_reg(vecD dst, vecD src1, vecD src2) %{ 6510 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6511 match(Set dst (AddVF src1 src2)); 6512 format %{ "vaddps $dst,$src1,$src2\t! add packed2F" %} 6513 ins_encode %{ 6514 int vector_len = 0; 6515 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6516 %} 6517 ins_pipe( pipe_slow ); 6518 %} 6519 6520 instruct vadd2F_mem(vecD dst, vecD src, memory mem) %{ 6521 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6522 match(Set dst (AddVF src (LoadVector mem))); 6523 format %{ "vaddps $dst,$src,$mem\t! add packed2F" %} 6524 ins_encode %{ 6525 int vector_len = 0; 6526 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6527 %} 6528 ins_pipe( pipe_slow ); 6529 %} 6530 6531 instruct vadd4F(vecX dst, vecX src) %{ 6532 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 6533 match(Set dst (AddVF dst src)); 6534 format %{ "addps $dst,$src\t! add packed4F" %} 6535 ins_encode %{ 6536 __ addps($dst$$XMMRegister, $src$$XMMRegister); 6537 %} 6538 ins_pipe( pipe_slow ); 6539 %} 6540 6541 instruct vadd4F_reg(vecX dst, vecX src1, vecX src2) %{ 6542 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6543 match(Set dst (AddVF src1 src2)); 6544 format %{ "vaddps $dst,$src1,$src2\t! add packed4F" %} 6545 ins_encode %{ 6546 int vector_len = 0; 6547 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6548 %} 6549 ins_pipe( pipe_slow ); 6550 %} 6551 6552 instruct vadd4F_mem(vecX dst, vecX src, memory mem) %{ 6553 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6554 match(Set dst (AddVF src (LoadVector mem))); 6555 format %{ "vaddps $dst,$src,$mem\t! add packed4F" %} 6556 ins_encode %{ 6557 int vector_len = 0; 6558 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6559 %} 6560 ins_pipe( pipe_slow ); 6561 %} 6562 6563 instruct vadd8F_reg(vecY dst, vecY src1, vecY src2) %{ 6564 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6565 match(Set dst (AddVF src1 src2)); 6566 format %{ "vaddps $dst,$src1,$src2\t! add packed8F" %} 6567 ins_encode %{ 6568 int vector_len = 1; 6569 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6570 %} 6571 ins_pipe( pipe_slow ); 6572 %} 6573 6574 instruct vadd8F_mem(vecY dst, vecY src, memory mem) %{ 6575 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6576 match(Set dst (AddVF src (LoadVector mem))); 6577 format %{ "vaddps $dst,$src,$mem\t! add packed8F" %} 6578 ins_encode %{ 6579 int vector_len = 1; 6580 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6581 %} 6582 ins_pipe( pipe_slow ); 6583 %} 6584 6585 instruct vadd16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6586 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6587 match(Set dst (AddVF src1 src2)); 6588 format %{ "vaddps $dst,$src1,$src2\t! add packed16F" %} 6589 ins_encode %{ 6590 int vector_len = 2; 6591 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6592 %} 6593 ins_pipe( pipe_slow ); 6594 %} 6595 6596 instruct vadd16F_mem(vecZ dst, vecZ src, memory mem) %{ 6597 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6598 match(Set dst (AddVF src (LoadVector mem))); 6599 format %{ "vaddps $dst,$src,$mem\t! add packed16F" %} 6600 ins_encode %{ 6601 int vector_len = 2; 6602 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6603 %} 6604 ins_pipe( pipe_slow ); 6605 %} 6606 6607 // Doubles vector add 6608 instruct vadd2D(vecX dst, vecX src) %{ 6609 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 6610 match(Set dst (AddVD dst src)); 6611 format %{ "addpd $dst,$src\t! add packed2D" %} 6612 ins_encode %{ 6613 __ addpd($dst$$XMMRegister, $src$$XMMRegister); 6614 %} 6615 ins_pipe( pipe_slow ); 6616 %} 6617 6618 instruct vadd2D_reg(vecX dst, vecX src1, vecX src2) %{ 6619 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6620 match(Set dst (AddVD src1 src2)); 6621 format %{ "vaddpd $dst,$src1,$src2\t! add packed2D" %} 6622 ins_encode %{ 6623 int vector_len = 0; 6624 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6625 %} 6626 ins_pipe( pipe_slow ); 6627 %} 6628 6629 instruct vadd2D_mem(vecX dst, vecX src, memory mem) %{ 6630 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6631 match(Set dst (AddVD src (LoadVector mem))); 6632 format %{ "vaddpd $dst,$src,$mem\t! add packed2D" %} 6633 ins_encode %{ 6634 int vector_len = 0; 6635 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6636 %} 6637 ins_pipe( pipe_slow ); 6638 %} 6639 6640 instruct vadd4D_reg(vecY dst, vecY src1, vecY src2) %{ 6641 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6642 match(Set dst (AddVD src1 src2)); 6643 format %{ "vaddpd $dst,$src1,$src2\t! add packed4D" %} 6644 ins_encode %{ 6645 int vector_len = 1; 6646 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6647 %} 6648 ins_pipe( pipe_slow ); 6649 %} 6650 6651 instruct vadd4D_mem(vecY dst, vecY src, memory mem) %{ 6652 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6653 match(Set dst (AddVD src (LoadVector mem))); 6654 format %{ "vaddpd $dst,$src,$mem\t! add packed4D" %} 6655 ins_encode %{ 6656 int vector_len = 1; 6657 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6658 %} 6659 ins_pipe( pipe_slow ); 6660 %} 6661 6662 instruct vadd8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6663 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6664 match(Set dst (AddVD src1 src2)); 6665 format %{ "vaddpd $dst,$src1,$src2\t! add packed8D" %} 6666 ins_encode %{ 6667 int vector_len = 2; 6668 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6669 %} 6670 ins_pipe( pipe_slow ); 6671 %} 6672 6673 instruct vadd8D_mem(vecZ dst, vecZ src, memory mem) %{ 6674 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6675 match(Set dst (AddVD src (LoadVector mem))); 6676 format %{ "vaddpd $dst,$src,$mem\t! add packed8D" %} 6677 ins_encode %{ 6678 int vector_len = 2; 6679 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6680 %} 6681 ins_pipe( pipe_slow ); 6682 %} 6683 6684 // --------------------------------- SUB -------------------------------------- 6685 6686 // Bytes vector sub 6687 instruct vsub4B(vecS dst, vecS src) %{ 6688 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 6689 match(Set dst (SubVB dst src)); 6690 format %{ "psubb $dst,$src\t! sub packed4B" %} 6691 ins_encode %{ 6692 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 6693 %} 6694 ins_pipe( pipe_slow ); 6695 %} 6696 6697 instruct vsub4B_reg(vecS dst, vecS src1, vecS src2) %{ 6698 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6699 match(Set dst (SubVB src1 src2)); 6700 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} 6701 ins_encode %{ 6702 int vector_len = 0; 6703 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6704 %} 6705 ins_pipe( pipe_slow ); 6706 %} 6707 6708 instruct vsub4B_mem(vecS dst, vecS src, memory mem) %{ 6709 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6710 match(Set dst (SubVB src (LoadVector mem))); 6711 format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %} 6712 ins_encode %{ 6713 int vector_len = 0; 6714 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6715 %} 6716 ins_pipe( pipe_slow ); 6717 %} 6718 6719 instruct vsub8B(vecD dst, vecD src) %{ 6720 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 6721 match(Set dst (SubVB dst src)); 6722 format %{ "psubb $dst,$src\t! sub packed8B" %} 6723 ins_encode %{ 6724 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 6725 %} 6726 ins_pipe( pipe_slow ); 6727 %} 6728 6729 instruct vsub8B_reg(vecD dst, vecD src1, vecD src2) %{ 6730 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6731 match(Set dst (SubVB src1 src2)); 6732 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} 6733 ins_encode %{ 6734 int vector_len = 0; 6735 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6736 %} 6737 ins_pipe( pipe_slow ); 6738 %} 6739 6740 instruct vsub8B_mem(vecD dst, vecD src, memory mem) %{ 6741 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6742 match(Set dst (SubVB src (LoadVector mem))); 6743 format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %} 6744 ins_encode %{ 6745 int vector_len = 0; 6746 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6747 %} 6748 ins_pipe( pipe_slow ); 6749 %} 6750 6751 instruct vsub16B(vecX dst, vecX src) %{ 6752 predicate(UseAVX == 0 && n->as_Vector()->length() == 16); 6753 match(Set dst (SubVB dst src)); 6754 format %{ "psubb $dst,$src\t! sub packed16B" %} 6755 ins_encode %{ 6756 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 6757 %} 6758 ins_pipe( pipe_slow ); 6759 %} 6760 6761 instruct vsub16B_reg(vecX dst, vecX src1, vecX src2) %{ 6762 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 6763 match(Set dst (SubVB src1 src2)); 6764 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} 6765 ins_encode %{ 6766 int vector_len = 0; 6767 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6768 %} 6769 ins_pipe( pipe_slow ); 6770 %} 6771 6772 instruct vsub16B_mem(vecX dst, vecX src, memory mem) %{ 6773 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 6774 match(Set dst (SubVB src (LoadVector mem))); 6775 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} 6776 ins_encode %{ 6777 int vector_len = 0; 6778 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6779 %} 6780 ins_pipe( pipe_slow ); 6781 %} 6782 6783 instruct vsub32B_reg(vecY dst, vecY src1, vecY src2) %{ 6784 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 6785 match(Set dst (SubVB src1 src2)); 6786 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} 6787 ins_encode %{ 6788 int vector_len = 1; 6789 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6790 %} 6791 ins_pipe( pipe_slow ); 6792 %} 6793 6794 instruct vsub32B_mem(vecY dst, vecY src, memory mem) %{ 6795 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 6796 match(Set dst (SubVB src (LoadVector mem))); 6797 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} 6798 ins_encode %{ 6799 int vector_len = 1; 6800 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6801 %} 6802 ins_pipe( pipe_slow ); 6803 %} 6804 6805 instruct vsub64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6806 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 6807 match(Set dst (SubVB src1 src2)); 6808 format %{ "vpsubb $dst,$src1,$src2\t! sub packed64B" %} 6809 ins_encode %{ 6810 int vector_len = 2; 6811 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6812 %} 6813 ins_pipe( pipe_slow ); 6814 %} 6815 6816 instruct vsub64B_mem(vecZ dst, vecZ src, memory mem) %{ 6817 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 6818 match(Set dst (SubVB src (LoadVector mem))); 6819 format %{ "vpsubb $dst,$src,$mem\t! sub packed64B" %} 6820 ins_encode %{ 6821 int vector_len = 2; 6822 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6823 %} 6824 ins_pipe( pipe_slow ); 6825 %} 6826 6827 // Shorts/Chars vector sub 6828 instruct vsub2S(vecS dst, vecS src) %{ 6829 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 6830 match(Set dst (SubVS dst src)); 6831 format %{ "psubw $dst,$src\t! sub packed2S" %} 6832 ins_encode %{ 6833 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 6834 %} 6835 ins_pipe( pipe_slow ); 6836 %} 6837 6838 instruct vsub2S_reg(vecS dst, vecS src1, vecS src2) %{ 6839 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6840 match(Set dst (SubVS src1 src2)); 6841 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} 6842 ins_encode %{ 6843 int vector_len = 0; 6844 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6845 %} 6846 ins_pipe( pipe_slow ); 6847 %} 6848 6849 instruct vsub2S_mem(vecS dst, vecS src, memory mem) %{ 6850 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6851 match(Set dst (SubVS src (LoadVector mem))); 6852 format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %} 6853 ins_encode %{ 6854 int vector_len = 0; 6855 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6856 %} 6857 ins_pipe( pipe_slow ); 6858 %} 6859 6860 instruct vsub4S(vecD dst, vecD src) %{ 6861 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 6862 match(Set dst (SubVS dst src)); 6863 format %{ "psubw $dst,$src\t! sub packed4S" %} 6864 ins_encode %{ 6865 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 6866 %} 6867 ins_pipe( pipe_slow ); 6868 %} 6869 6870 instruct vsub4S_reg(vecD dst, vecD src1, vecD src2) %{ 6871 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6872 match(Set dst (SubVS src1 src2)); 6873 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} 6874 ins_encode %{ 6875 int vector_len = 0; 6876 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6877 %} 6878 ins_pipe( pipe_slow ); 6879 %} 6880 6881 instruct vsub4S_mem(vecD dst, vecD src, memory mem) %{ 6882 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6883 match(Set dst (SubVS src (LoadVector mem))); 6884 format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %} 6885 ins_encode %{ 6886 int vector_len = 0; 6887 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6888 %} 6889 ins_pipe( pipe_slow ); 6890 %} 6891 6892 instruct vsub8S(vecX dst, vecX src) %{ 6893 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 6894 match(Set dst (SubVS dst src)); 6895 format %{ "psubw $dst,$src\t! sub packed8S" %} 6896 ins_encode %{ 6897 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 6898 %} 6899 ins_pipe( pipe_slow ); 6900 %} 6901 6902 instruct vsub8S_reg(vecX dst, vecX src1, vecX src2) %{ 6903 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6904 match(Set dst (SubVS src1 src2)); 6905 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} 6906 ins_encode %{ 6907 int vector_len = 0; 6908 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6909 %} 6910 ins_pipe( pipe_slow ); 6911 %} 6912 6913 instruct vsub8S_mem(vecX dst, vecX src, memory mem) %{ 6914 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6915 match(Set dst (SubVS src (LoadVector mem))); 6916 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} 6917 ins_encode %{ 6918 int vector_len = 0; 6919 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6920 %} 6921 ins_pipe( pipe_slow ); 6922 %} 6923 6924 instruct vsub16S_reg(vecY dst, vecY src1, vecY src2) %{ 6925 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 6926 match(Set dst (SubVS src1 src2)); 6927 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} 6928 ins_encode %{ 6929 int vector_len = 1; 6930 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6931 %} 6932 ins_pipe( pipe_slow ); 6933 %} 6934 6935 instruct vsub16S_mem(vecY dst, vecY src, memory mem) %{ 6936 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 6937 match(Set dst (SubVS src (LoadVector mem))); 6938 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} 6939 ins_encode %{ 6940 int vector_len = 1; 6941 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6942 %} 6943 ins_pipe( pipe_slow ); 6944 %} 6945 6946 instruct vsub32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6947 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6948 match(Set dst (SubVS src1 src2)); 6949 format %{ "vpsubw $dst,$src1,$src2\t! sub packed32S" %} 6950 ins_encode %{ 6951 int vector_len = 2; 6952 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6953 %} 6954 ins_pipe( pipe_slow ); 6955 %} 6956 6957 instruct vsub32S_mem(vecZ dst, vecZ src, memory mem) %{ 6958 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6959 match(Set dst (SubVS src (LoadVector mem))); 6960 format %{ "vpsubw $dst,$src,$mem\t! sub packed32S" %} 6961 ins_encode %{ 6962 int vector_len = 2; 6963 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6964 %} 6965 ins_pipe( pipe_slow ); 6966 %} 6967 6968 // Integers vector sub 6969 instruct vsub2I(vecD dst, vecD src) %{ 6970 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 6971 match(Set dst (SubVI dst src)); 6972 format %{ "psubd $dst,$src\t! sub packed2I" %} 6973 ins_encode %{ 6974 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 6975 %} 6976 ins_pipe( pipe_slow ); 6977 %} 6978 6979 instruct vsub2I_reg(vecD dst, vecD src1, vecD src2) %{ 6980 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6981 match(Set dst (SubVI src1 src2)); 6982 format %{ "vpsubd $dst,$src1,$src2\t! sub packed2I" %} 6983 ins_encode %{ 6984 int vector_len = 0; 6985 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6986 %} 6987 ins_pipe( pipe_slow ); 6988 %} 6989 6990 instruct vsub2I_mem(vecD dst, vecD src, memory mem) %{ 6991 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6992 match(Set dst (SubVI src (LoadVector mem))); 6993 format %{ "vpsubd $dst,$src,$mem\t! sub packed2I" %} 6994 ins_encode %{ 6995 int vector_len = 0; 6996 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6997 %} 6998 ins_pipe( pipe_slow ); 6999 %} 7000 7001 instruct vsub4I(vecX dst, vecX src) %{ 7002 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 7003 match(Set dst (SubVI dst src)); 7004 format %{ "psubd $dst,$src\t! sub packed4I" %} 7005 ins_encode %{ 7006 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 7007 %} 7008 ins_pipe( pipe_slow ); 7009 %} 7010 7011 instruct vsub4I_reg(vecX dst, vecX src1, vecX src2) %{ 7012 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7013 match(Set dst (SubVI src1 src2)); 7014 format %{ "vpsubd $dst,$src1,$src2\t! sub packed4I" %} 7015 ins_encode %{ 7016 int vector_len = 0; 7017 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7018 %} 7019 ins_pipe( pipe_slow ); 7020 %} 7021 7022 instruct vsub4I_mem(vecX dst, vecX src, memory mem) %{ 7023 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7024 match(Set dst (SubVI src (LoadVector mem))); 7025 format %{ "vpsubd $dst,$src,$mem\t! sub packed4I" %} 7026 ins_encode %{ 7027 int vector_len = 0; 7028 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7029 %} 7030 ins_pipe( pipe_slow ); 7031 %} 7032 7033 instruct vsub8I_reg(vecY dst, vecY src1, vecY src2) %{ 7034 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 7035 match(Set dst (SubVI src1 src2)); 7036 format %{ "vpsubd $dst,$src1,$src2\t! sub packed8I" %} 7037 ins_encode %{ 7038 int vector_len = 1; 7039 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7040 %} 7041 ins_pipe( pipe_slow ); 7042 %} 7043 7044 instruct vsub8I_mem(vecY dst, vecY src, memory mem) %{ 7045 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 7046 match(Set dst (SubVI src (LoadVector mem))); 7047 format %{ "vpsubd $dst,$src,$mem\t! sub packed8I" %} 7048 ins_encode %{ 7049 int vector_len = 1; 7050 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7051 %} 7052 ins_pipe( pipe_slow ); 7053 %} 7054 7055 instruct vsub16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7056 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7057 match(Set dst (SubVI src1 src2)); 7058 format %{ "vpsubd $dst,$src1,$src2\t! sub packed16I" %} 7059 ins_encode %{ 7060 int vector_len = 2; 7061 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7062 %} 7063 ins_pipe( pipe_slow ); 7064 %} 7065 7066 instruct vsub16I_mem(vecZ dst, vecZ src, memory mem) %{ 7067 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7068 match(Set dst (SubVI src (LoadVector mem))); 7069 format %{ "vpsubd $dst,$src,$mem\t! sub packed16I" %} 7070 ins_encode %{ 7071 int vector_len = 2; 7072 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7073 %} 7074 ins_pipe( pipe_slow ); 7075 %} 7076 7077 // Longs vector sub 7078 instruct vsub2L(vecX dst, vecX src) %{ 7079 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 7080 match(Set dst (SubVL dst src)); 7081 format %{ "psubq $dst,$src\t! sub packed2L" %} 7082 ins_encode %{ 7083 __ psubq($dst$$XMMRegister, $src$$XMMRegister); 7084 %} 7085 ins_pipe( pipe_slow ); 7086 %} 7087 7088 instruct vsub2L_reg(vecX dst, vecX src1, vecX src2) %{ 7089 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7090 match(Set dst (SubVL src1 src2)); 7091 format %{ "vpsubq $dst,$src1,$src2\t! sub packed2L" %} 7092 ins_encode %{ 7093 int vector_len = 0; 7094 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7095 %} 7096 ins_pipe( pipe_slow ); 7097 %} 7098 7099 instruct vsub2L_mem(vecX dst, vecX src, memory mem) %{ 7100 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7101 match(Set dst (SubVL src (LoadVector mem))); 7102 format %{ "vpsubq $dst,$src,$mem\t! sub packed2L" %} 7103 ins_encode %{ 7104 int vector_len = 0; 7105 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7106 %} 7107 ins_pipe( pipe_slow ); 7108 %} 7109 7110 instruct vsub4L_reg(vecY dst, vecY src1, vecY src2) %{ 7111 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 7112 match(Set dst (SubVL src1 src2)); 7113 format %{ "vpsubq $dst,$src1,$src2\t! sub packed4L" %} 7114 ins_encode %{ 7115 int vector_len = 1; 7116 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7117 %} 7118 ins_pipe( pipe_slow ); 7119 %} 7120 7121 instruct vsub4L_mem(vecY dst, vecY src, memory mem) %{ 7122 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 7123 match(Set dst (SubVL src (LoadVector mem))); 7124 format %{ "vpsubq $dst,$src,$mem\t! sub packed4L" %} 7125 ins_encode %{ 7126 int vector_len = 1; 7127 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7128 %} 7129 ins_pipe( pipe_slow ); 7130 %} 7131 7132 instruct vsub8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7133 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7134 match(Set dst (SubVL src1 src2)); 7135 format %{ "vpsubq $dst,$src1,$src2\t! sub packed8L" %} 7136 ins_encode %{ 7137 int vector_len = 2; 7138 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7139 %} 7140 ins_pipe( pipe_slow ); 7141 %} 7142 7143 instruct vsub8L_mem(vecZ dst, vecZ src, memory mem) %{ 7144 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7145 match(Set dst (SubVL src (LoadVector mem))); 7146 format %{ "vpsubq $dst,$src,$mem\t! sub packed8L" %} 7147 ins_encode %{ 7148 int vector_len = 2; 7149 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7150 %} 7151 ins_pipe( pipe_slow ); 7152 %} 7153 7154 // Floats vector sub 7155 instruct vsub2F(vecD dst, vecD src) %{ 7156 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 7157 match(Set dst (SubVF dst src)); 7158 format %{ "subps $dst,$src\t! sub packed2F" %} 7159 ins_encode %{ 7160 __ subps($dst$$XMMRegister, $src$$XMMRegister); 7161 %} 7162 ins_pipe( pipe_slow ); 7163 %} 7164 7165 instruct vsub2F_reg(vecD dst, vecD src1, vecD src2) %{ 7166 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7167 match(Set dst (SubVF src1 src2)); 7168 format %{ "vsubps $dst,$src1,$src2\t! sub packed2F" %} 7169 ins_encode %{ 7170 int vector_len = 0; 7171 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7172 %} 7173 ins_pipe( pipe_slow ); 7174 %} 7175 7176 instruct vsub2F_mem(vecD dst, vecD src, memory mem) %{ 7177 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7178 match(Set dst (SubVF src (LoadVector mem))); 7179 format %{ "vsubps $dst,$src,$mem\t! sub packed2F" %} 7180 ins_encode %{ 7181 int vector_len = 0; 7182 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7183 %} 7184 ins_pipe( pipe_slow ); 7185 %} 7186 7187 instruct vsub4F(vecX dst, vecX src) %{ 7188 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 7189 match(Set dst (SubVF dst src)); 7190 format %{ "subps $dst,$src\t! sub packed4F" %} 7191 ins_encode %{ 7192 __ subps($dst$$XMMRegister, $src$$XMMRegister); 7193 %} 7194 ins_pipe( pipe_slow ); 7195 %} 7196 7197 instruct vsub4F_reg(vecX dst, vecX src1, vecX src2) %{ 7198 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7199 match(Set dst (SubVF src1 src2)); 7200 format %{ "vsubps $dst,$src1,$src2\t! sub packed4F" %} 7201 ins_encode %{ 7202 int vector_len = 0; 7203 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7204 %} 7205 ins_pipe( pipe_slow ); 7206 %} 7207 7208 instruct vsub4F_mem(vecX dst, vecX src, memory mem) %{ 7209 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7210 match(Set dst (SubVF src (LoadVector mem))); 7211 format %{ "vsubps $dst,$src,$mem\t! sub packed4F" %} 7212 ins_encode %{ 7213 int vector_len = 0; 7214 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7215 %} 7216 ins_pipe( pipe_slow ); 7217 %} 7218 7219 instruct vsub8F_reg(vecY dst, vecY src1, vecY src2) %{ 7220 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7221 match(Set dst (SubVF src1 src2)); 7222 format %{ "vsubps $dst,$src1,$src2\t! sub packed8F" %} 7223 ins_encode %{ 7224 int vector_len = 1; 7225 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7226 %} 7227 ins_pipe( pipe_slow ); 7228 %} 7229 7230 instruct vsub8F_mem(vecY dst, vecY src, memory mem) %{ 7231 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7232 match(Set dst (SubVF src (LoadVector mem))); 7233 format %{ "vsubps $dst,$src,$mem\t! sub packed8F" %} 7234 ins_encode %{ 7235 int vector_len = 1; 7236 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7237 %} 7238 ins_pipe( pipe_slow ); 7239 %} 7240 7241 instruct vsub16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7242 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7243 match(Set dst (SubVF src1 src2)); 7244 format %{ "vsubps $dst,$src1,$src2\t! sub packed16F" %} 7245 ins_encode %{ 7246 int vector_len = 2; 7247 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7248 %} 7249 ins_pipe( pipe_slow ); 7250 %} 7251 7252 instruct vsub16F_mem(vecZ dst, vecZ src, memory mem) %{ 7253 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7254 match(Set dst (SubVF src (LoadVector mem))); 7255 format %{ "vsubps $dst,$src,$mem\t! sub packed16F" %} 7256 ins_encode %{ 7257 int vector_len = 2; 7258 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7259 %} 7260 ins_pipe( pipe_slow ); 7261 %} 7262 7263 // Doubles vector sub 7264 instruct vsub2D(vecX dst, vecX src) %{ 7265 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 7266 match(Set dst (SubVD dst src)); 7267 format %{ "subpd $dst,$src\t! sub packed2D" %} 7268 ins_encode %{ 7269 __ subpd($dst$$XMMRegister, $src$$XMMRegister); 7270 %} 7271 ins_pipe( pipe_slow ); 7272 %} 7273 7274 instruct vsub2D_reg(vecX dst, vecX src1, vecX src2) %{ 7275 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7276 match(Set dst (SubVD src1 src2)); 7277 format %{ "vsubpd $dst,$src1,$src2\t! sub packed2D" %} 7278 ins_encode %{ 7279 int vector_len = 0; 7280 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7281 %} 7282 ins_pipe( pipe_slow ); 7283 %} 7284 7285 instruct vsub2D_mem(vecX dst, vecX src, memory mem) %{ 7286 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7287 match(Set dst (SubVD src (LoadVector mem))); 7288 format %{ "vsubpd $dst,$src,$mem\t! sub packed2D" %} 7289 ins_encode %{ 7290 int vector_len = 0; 7291 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7292 %} 7293 ins_pipe( pipe_slow ); 7294 %} 7295 7296 instruct vsub4D_reg(vecY dst, vecY src1, vecY src2) %{ 7297 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7298 match(Set dst (SubVD src1 src2)); 7299 format %{ "vsubpd $dst,$src1,$src2\t! sub packed4D" %} 7300 ins_encode %{ 7301 int vector_len = 1; 7302 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7303 %} 7304 ins_pipe( pipe_slow ); 7305 %} 7306 7307 instruct vsub4D_mem(vecY dst, vecY src, memory mem) %{ 7308 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7309 match(Set dst (SubVD src (LoadVector mem))); 7310 format %{ "vsubpd $dst,$src,$mem\t! sub packed4D" %} 7311 ins_encode %{ 7312 int vector_len = 1; 7313 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7314 %} 7315 ins_pipe( pipe_slow ); 7316 %} 7317 7318 instruct vsub8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7319 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7320 match(Set dst (SubVD src1 src2)); 7321 format %{ "vsubpd $dst,$src1,$src2\t! sub packed8D" %} 7322 ins_encode %{ 7323 int vector_len = 2; 7324 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7325 %} 7326 ins_pipe( pipe_slow ); 7327 %} 7328 7329 instruct vsub8D_mem(vecZ dst, vecZ src, memory mem) %{ 7330 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7331 match(Set dst (SubVD src (LoadVector mem))); 7332 format %{ "vsubpd $dst,$src,$mem\t! sub packed8D" %} 7333 ins_encode %{ 7334 int vector_len = 2; 7335 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7336 %} 7337 ins_pipe( pipe_slow ); 7338 %} 7339 7340 // --------------------------------- MUL -------------------------------------- 7341 7342 // Byte vector mul 7343 instruct mul4B_reg(vecS dst, vecS src1, vecS src2, vecS tmp, rRegI scratch) %{ 7344 predicate(UseSSE > 3 && n->as_Vector()->length() == 4); 7345 match(Set dst (MulVB src1 src2)); 7346 effect(TEMP dst, TEMP tmp, TEMP scratch); 7347 format %{"pmovsxbw $tmp,$src1\n\t" 7348 "pmovsxbw $dst,$src2\n\t" 7349 "pmullw $tmp,$dst\n\t" 7350 "movdqu $dst,[0x00ff00ff0x00ff00ff]\n\t" 7351 "pand $dst,$tmp\n\t" 7352 "packuswb $dst,$dst\t! mul packed4B" %} 7353 ins_encode %{ 7354 __ pmovsxbw($tmp$$XMMRegister, $src1$$XMMRegister); 7355 __ pmovsxbw($dst$$XMMRegister, $src2$$XMMRegister); 7356 __ pmullw($tmp$$XMMRegister, $dst$$XMMRegister); 7357 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 7358 __ pand($dst$$XMMRegister, $tmp$$XMMRegister); 7359 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister); 7360 %} 7361 ins_pipe( pipe_slow ); 7362 %} 7363 7364 instruct mul8B_reg(vecD dst, vecD src1, vecD src2, vecD tmp, rRegI scratch) %{ 7365 predicate(UseSSE > 3 && n->as_Vector()->length() == 8); 7366 match(Set dst (MulVB src1 src2)); 7367 effect(TEMP dst, TEMP tmp, TEMP scratch); 7368 format %{"pmovsxbw $tmp,$src1\n\t" 7369 "pmovsxbw $dst,$src2\n\t" 7370 "pmullw $tmp,$dst\n\t" 7371 "movdqu $dst,[0x00ff00ff0x00ff00ff]\n\t" 7372 "pand $dst,$tmp\n\t" 7373 "packuswb $dst,$dst\t! mul packed8B" %} 7374 ins_encode %{ 7375 __ pmovsxbw($tmp$$XMMRegister, $src1$$XMMRegister); 7376 __ pmovsxbw($dst$$XMMRegister, $src2$$XMMRegister); 7377 __ pmullw($tmp$$XMMRegister, $dst$$XMMRegister); 7378 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 7379 __ pand($dst$$XMMRegister, $tmp$$XMMRegister); 7380 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister); 7381 %} 7382 ins_pipe( pipe_slow ); 7383 %} 7384 7385 instruct mul16B_reg(vecX dst, vecX src1, vecX src2, vecX tmp1, vecX tmp2, rRegI scratch) %{ 7386 predicate(UseSSE > 3 && n->as_Vector()->length() == 16); 7387 match(Set dst (MulVB src1 src2)); 7388 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch); 7389 format %{"pmovsxbw $tmp1,$src1\n\t" 7390 "pmovsxbw $tmp2,$src2\n\t" 7391 "pmullw $tmp1,$tmp2\n\t" 7392 "pshufd $tmp2,$src1,0xEE\n\t" 7393 "pshufd $dst,$src2,0xEE\n\t" 7394 "pmovsxbw $tmp2,$tmp2\n\t" 7395 "pmovsxbw $dst,$dst\n\t" 7396 "pmullw $tmp2,$dst\n\t" 7397 "movdqu $dst,[0x00ff00ff0x00ff00ff]\n\t" 7398 "pand $tmp2,$dst\n\t" 7399 "pand $dst,$tmp1\n\t" 7400 "packuswb $dst,$tmp2\t! mul packed16B" %} 7401 ins_encode %{ 7402 __ pmovsxbw($tmp1$$XMMRegister, $src1$$XMMRegister); 7403 __ pmovsxbw($tmp2$$XMMRegister, $src2$$XMMRegister); 7404 __ pmullw($tmp1$$XMMRegister, $tmp2$$XMMRegister); 7405 __ pshufd($tmp2$$XMMRegister, $src1$$XMMRegister, 0xEE); 7406 __ pshufd($dst$$XMMRegister, $src2$$XMMRegister, 0xEE); 7407 __ pmovsxbw($tmp2$$XMMRegister, $tmp2$$XMMRegister); 7408 __ pmovsxbw($dst$$XMMRegister, $dst$$XMMRegister); 7409 __ pmullw($tmp2$$XMMRegister, $dst$$XMMRegister); 7410 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 7411 __ pand($tmp2$$XMMRegister, $dst$$XMMRegister); 7412 __ pand($dst$$XMMRegister, $tmp1$$XMMRegister); 7413 __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister); 7414 %} 7415 ins_pipe( pipe_slow ); 7416 %} 7417 7418 instruct vmul16B_reg_avx(vecX dst, vecX src1, vecX src2, vecX tmp, rRegI scratch) %{ 7419 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 7420 match(Set dst (MulVB src1 src2)); 7421 effect(TEMP dst, TEMP tmp, TEMP scratch); 7422 format %{"vpmovsxbw $tmp,$src1\n\t" 7423 "vpmovsxbw $dst,$src2\n\t" 7424 "vpmullw $tmp,$tmp,$dst\n\t" 7425 "vmovdqu $dst,[0x00ff00ff0x00ff00ff]\n\t" 7426 "vpand $dst,$dst,$tmp\n\t" 7427 "vextracti128_high $tmp,$dst\n\t" 7428 "vpackuswb $dst,$dst,$dst\n\t! mul packed16B" %} 7429 ins_encode %{ 7430 int vector_len = 1; 7431 __ vpmovsxbw($tmp$$XMMRegister, $src1$$XMMRegister, vector_len); 7432 __ vpmovsxbw($dst$$XMMRegister, $src2$$XMMRegister, vector_len); 7433 __ vpmullw($tmp$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, vector_len); 7434 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 7435 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 7436 __ vextracti128_high($tmp$$XMMRegister, $dst$$XMMRegister); 7437 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, 0); 7438 %} 7439 ins_pipe( pipe_slow ); 7440 %} 7441 7442 instruct vmul32B_reg_avx(vecY dst, vecY src1, vecY src2, vecY tmp1, vecY tmp2, rRegI scratch) %{ 7443 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 7444 match(Set dst (MulVB src1 src2)); 7445 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch); 7446 format %{"vextracti128_high $tmp1,$src1\n\t" 7447 "vextracti128_high $dst,$src2\n\t" 7448 "vpmovsxbw $tmp1,$tmp1\n\t" 7449 "vpmovsxbw $dst,$dst\n\t" 7450 "vpmullw $tmp1,$tmp1,$dst\n\t" 7451 "vpmovsxbw $tmp2,$src1\n\t" 7452 "vpmovsxbw $dst,$src2\n\t" 7453 "vpmullw $tmp2,$tmp2,$dst\n\t" 7454 "vmovdqu $dst, [0x00ff00ff0x00ff00ff]\n\t" 7455 "vpbroadcastd $dst, $dst\n\t" 7456 "vpand $tmp1,$tmp1,$dst\n\t" 7457 "vpand $dst,$dst,$tmp2\n\t" 7458 "vpackuswb $dst,$dst,$tmp1\n\t" 7459 "vpermq $dst, $dst, 0xD8\t! mul packed32B" %} 7460 ins_encode %{ 7461 int vector_len = 1; 7462 __ vextracti128_high($tmp1$$XMMRegister, $src1$$XMMRegister); 7463 __ vextracti128_high($dst$$XMMRegister, $src2$$XMMRegister); 7464 __ vpmovsxbw($tmp1$$XMMRegister, $tmp1$$XMMRegister, vector_len); 7465 __ vpmovsxbw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 7466 __ vpmullw($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vector_len); 7467 __ vpmovsxbw($tmp2$$XMMRegister, $src1$$XMMRegister, vector_len); 7468 __ vpmovsxbw($dst$$XMMRegister, $src2$$XMMRegister, vector_len); 7469 __ vpmullw($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len); 7470 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 7471 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 7472 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vector_len); 7473 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister, vector_len); 7474 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp1$$XMMRegister, vector_len); 7475 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vector_len); 7476 %} 7477 ins_pipe( pipe_slow ); 7478 %} 7479 7480 instruct vmul64B_reg_avx(vecZ dst, vecZ src1, vecZ src2, vecZ tmp1, vecZ tmp2, rRegI scratch) %{ 7481 predicate(UseAVX > 2 && n->as_Vector()->length() == 64); 7482 match(Set dst (MulVB src1 src2)); 7483 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch); 7484 format %{"vextracti64x4_high $tmp1,$src1\n\t" 7485 "vextracti64x4_high $dst,$src2\n\t" 7486 "vpmovsxbw $tmp1,$tmp1\n\t" 7487 "vpmovsxbw $dst,$dst\n\t" 7488 "vpmullw $tmp1,$tmp1,$dst\n\t" 7489 "vpmovsxbw $tmp2,$src1\n\t" 7490 "vpmovsxbw $dst,$src2\n\t" 7491 "vpmullw $tmp2,$tmp2,$dst\n\t" 7492 "vmovdqu $dst, [0x00ff00ff0x00ff00ff]\n\t" 7493 "vpbroadcastd $dst, $dst\n\t" 7494 "vpand $tmp1,$tmp1,$dst\n\t" 7495 "vpand $tmp2,$tmp2,$dst\n\t" 7496 "vpackuswb $dst,$tmp1,$tmp2\n\t" 7497 "evmovdquq $tmp2,[0x0604020007050301]\n\t" 7498 "vpermq $dst,$tmp2,$dst,0x01\t! mul packed64B" %} 7499 7500 ins_encode %{ 7501 int vector_len = 2; 7502 __ vextracti64x4_high($tmp1$$XMMRegister, $src1$$XMMRegister); 7503 __ vextracti64x4_high($dst$$XMMRegister, $src2$$XMMRegister); 7504 __ vpmovsxbw($tmp1$$XMMRegister, $tmp1$$XMMRegister, vector_len); 7505 __ vpmovsxbw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 7506 __ vpmullw($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vector_len); 7507 __ vpmovsxbw($tmp2$$XMMRegister, $src1$$XMMRegister, vector_len); 7508 __ vpmovsxbw($dst$$XMMRegister, $src2$$XMMRegister, vector_len); 7509 __ vpmullw($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len); 7510 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 7511 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 7512 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vector_len); 7513 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len); 7514 __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vector_len); 7515 __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vector_len, $scratch$$Register); 7516 __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len); 7517 7518 %} 7519 ins_pipe( pipe_slow ); 7520 %} 7521 7522 // Shorts/Chars vector mul 7523 instruct vmul2S(vecS dst, vecS src) %{ 7524 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 7525 match(Set dst (MulVS dst src)); 7526 format %{ "pmullw $dst,$src\t! mul packed2S" %} 7527 ins_encode %{ 7528 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 7529 %} 7530 ins_pipe( pipe_slow ); 7531 %} 7532 7533 instruct vmul2S_reg(vecS dst, vecS src1, vecS src2) %{ 7534 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7535 match(Set dst (MulVS src1 src2)); 7536 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} 7537 ins_encode %{ 7538 int vector_len = 0; 7539 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7540 %} 7541 ins_pipe( pipe_slow ); 7542 %} 7543 7544 instruct vmul2S_mem(vecS dst, vecS src, memory mem) %{ 7545 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7546 match(Set dst (MulVS src (LoadVector mem))); 7547 format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %} 7548 ins_encode %{ 7549 int vector_len = 0; 7550 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7551 %} 7552 ins_pipe( pipe_slow ); 7553 %} 7554 7555 instruct vmul4S(vecD dst, vecD src) %{ 7556 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 7557 match(Set dst (MulVS dst src)); 7558 format %{ "pmullw $dst,$src\t! mul packed4S" %} 7559 ins_encode %{ 7560 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 7561 %} 7562 ins_pipe( pipe_slow ); 7563 %} 7564 7565 instruct vmul4S_reg(vecD dst, vecD src1, vecD src2) %{ 7566 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7567 match(Set dst (MulVS src1 src2)); 7568 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} 7569 ins_encode %{ 7570 int vector_len = 0; 7571 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7572 %} 7573 ins_pipe( pipe_slow ); 7574 %} 7575 7576 instruct vmul4S_mem(vecD dst, vecD src, memory mem) %{ 7577 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7578 match(Set dst (MulVS src (LoadVector mem))); 7579 format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %} 7580 ins_encode %{ 7581 int vector_len = 0; 7582 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7583 %} 7584 ins_pipe( pipe_slow ); 7585 %} 7586 7587 instruct vmul8S(vecX dst, vecX src) %{ 7588 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 7589 match(Set dst (MulVS dst src)); 7590 format %{ "pmullw $dst,$src\t! mul packed8S" %} 7591 ins_encode %{ 7592 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 7593 %} 7594 ins_pipe( pipe_slow ); 7595 %} 7596 7597 instruct vmul8S_reg(vecX dst, vecX src1, vecX src2) %{ 7598 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7599 match(Set dst (MulVS src1 src2)); 7600 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} 7601 ins_encode %{ 7602 int vector_len = 0; 7603 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7604 %} 7605 ins_pipe( pipe_slow ); 7606 %} 7607 7608 instruct vmul8S_mem(vecX dst, vecX src, memory mem) %{ 7609 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7610 match(Set dst (MulVS src (LoadVector mem))); 7611 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} 7612 ins_encode %{ 7613 int vector_len = 0; 7614 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7615 %} 7616 ins_pipe( pipe_slow ); 7617 %} 7618 7619 instruct vmul16S_reg(vecY dst, vecY src1, vecY src2) %{ 7620 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 7621 match(Set dst (MulVS src1 src2)); 7622 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} 7623 ins_encode %{ 7624 int vector_len = 1; 7625 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7626 %} 7627 ins_pipe( pipe_slow ); 7628 %} 7629 7630 instruct vmul16S_mem(vecY dst, vecY src, memory mem) %{ 7631 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 7632 match(Set dst (MulVS src (LoadVector mem))); 7633 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} 7634 ins_encode %{ 7635 int vector_len = 1; 7636 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7637 %} 7638 ins_pipe( pipe_slow ); 7639 %} 7640 7641 instruct vmul32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7642 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 7643 match(Set dst (MulVS src1 src2)); 7644 format %{ "vpmullw $dst,$src1,$src2\t! mul packed32S" %} 7645 ins_encode %{ 7646 int vector_len = 2; 7647 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7648 %} 7649 ins_pipe( pipe_slow ); 7650 %} 7651 7652 instruct vmul32S_mem(vecZ dst, vecZ src, memory mem) %{ 7653 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 7654 match(Set dst (MulVS src (LoadVector mem))); 7655 format %{ "vpmullw $dst,$src,$mem\t! mul packed32S" %} 7656 ins_encode %{ 7657 int vector_len = 2; 7658 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7659 %} 7660 ins_pipe( pipe_slow ); 7661 %} 7662 7663 // Integers vector mul (sse4_1) 7664 instruct vmul2I(vecD dst, vecD src) %{ 7665 predicate(UseSSE > 3 && n->as_Vector()->length() == 2); 7666 match(Set dst (MulVI dst src)); 7667 format %{ "pmulld $dst,$src\t! mul packed2I" %} 7668 ins_encode %{ 7669 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 7670 %} 7671 ins_pipe( pipe_slow ); 7672 %} 7673 7674 instruct vmul2I_reg(vecD dst, vecD src1, vecD src2) %{ 7675 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7676 match(Set dst (MulVI src1 src2)); 7677 format %{ "vpmulld $dst,$src1,$src2\t! mul packed2I" %} 7678 ins_encode %{ 7679 int vector_len = 0; 7680 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7681 %} 7682 ins_pipe( pipe_slow ); 7683 %} 7684 7685 instruct vmul2I_mem(vecD dst, vecD src, memory mem) %{ 7686 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7687 match(Set dst (MulVI src (LoadVector mem))); 7688 format %{ "vpmulld $dst,$src,$mem\t! mul packed2I" %} 7689 ins_encode %{ 7690 int vector_len = 0; 7691 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7692 %} 7693 ins_pipe( pipe_slow ); 7694 %} 7695 7696 instruct vmul4I(vecX dst, vecX src) %{ 7697 predicate(UseSSE > 3 && n->as_Vector()->length() == 4); 7698 match(Set dst (MulVI dst src)); 7699 format %{ "pmulld $dst,$src\t! mul packed4I" %} 7700 ins_encode %{ 7701 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 7702 %} 7703 ins_pipe( pipe_slow ); 7704 %} 7705 7706 instruct vmul4I_reg(vecX dst, vecX src1, vecX src2) %{ 7707 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7708 match(Set dst (MulVI src1 src2)); 7709 format %{ "vpmulld $dst,$src1,$src2\t! mul packed4I" %} 7710 ins_encode %{ 7711 int vector_len = 0; 7712 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7713 %} 7714 ins_pipe( pipe_slow ); 7715 %} 7716 7717 instruct vmul4I_mem(vecX dst, vecX src, memory mem) %{ 7718 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7719 match(Set dst (MulVI src (LoadVector mem))); 7720 format %{ "vpmulld $dst,$src,$mem\t! mul packed4I" %} 7721 ins_encode %{ 7722 int vector_len = 0; 7723 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7724 %} 7725 ins_pipe( pipe_slow ); 7726 %} 7727 7728 instruct vmul2L_reg(vecX dst, vecX src1, vecX src2) %{ 7729 predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && VM_Version::supports_avx512dq()); 7730 match(Set dst (MulVL src1 src2)); 7731 format %{ "vpmullq $dst,$src1,$src2\t! mul packed2L" %} 7732 ins_encode %{ 7733 int vector_len = 0; 7734 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7735 %} 7736 ins_pipe( pipe_slow ); 7737 %} 7738 7739 instruct vmul2L_mem(vecX dst, vecX src, memory mem) %{ 7740 predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && VM_Version::supports_avx512dq()); 7741 match(Set dst (MulVL src (LoadVector mem))); 7742 format %{ "vpmullq $dst,$src,$mem\t! mul packed2L" %} 7743 ins_encode %{ 7744 int vector_len = 0; 7745 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7746 %} 7747 ins_pipe( pipe_slow ); 7748 %} 7749 7750 instruct vmul4L_reg(vecY dst, vecY src1, vecY src2) %{ 7751 predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && VM_Version::supports_avx512dq()); 7752 match(Set dst (MulVL src1 src2)); 7753 format %{ "vpmullq $dst,$src1,$src2\t! mul packed4L" %} 7754 ins_encode %{ 7755 int vector_len = 1; 7756 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7757 %} 7758 ins_pipe( pipe_slow ); 7759 %} 7760 7761 instruct vmul4L_mem(vecY dst, vecY src, memory mem) %{ 7762 predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && VM_Version::supports_avx512dq()); 7763 match(Set dst (MulVL src (LoadVector mem))); 7764 format %{ "vpmullq $dst,$src,$mem\t! mul packed4L" %} 7765 ins_encode %{ 7766 int vector_len = 1; 7767 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7768 %} 7769 ins_pipe( pipe_slow ); 7770 %} 7771 7772 instruct vmul8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7773 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq()); 7774 match(Set dst (MulVL src1 src2)); 7775 format %{ "vpmullq $dst,$src1,$src2\t! mul packed8L" %} 7776 ins_encode %{ 7777 int vector_len = 2; 7778 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7779 %} 7780 ins_pipe( pipe_slow ); 7781 %} 7782 7783 instruct vmul8L_mem(vecZ dst, vecZ src, memory mem) %{ 7784 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq()); 7785 match(Set dst (MulVL src (LoadVector mem))); 7786 format %{ "vpmullq $dst,$src,$mem\t! mul packed8L" %} 7787 ins_encode %{ 7788 int vector_len = 2; 7789 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7790 %} 7791 ins_pipe( pipe_slow ); 7792 %} 7793 7794 instruct vmul8I_reg(vecY dst, vecY src1, vecY src2) %{ 7795 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 7796 match(Set dst (MulVI src1 src2)); 7797 format %{ "vpmulld $dst,$src1,$src2\t! mul packed8I" %} 7798 ins_encode %{ 7799 int vector_len = 1; 7800 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7801 %} 7802 ins_pipe( pipe_slow ); 7803 %} 7804 7805 instruct vmul8I_mem(vecY dst, vecY src, memory mem) %{ 7806 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 7807 match(Set dst (MulVI src (LoadVector mem))); 7808 format %{ "vpmulld $dst,$src,$mem\t! mul packed8I" %} 7809 ins_encode %{ 7810 int vector_len = 1; 7811 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7812 %} 7813 ins_pipe( pipe_slow ); 7814 %} 7815 7816 instruct vmul16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7817 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7818 match(Set dst (MulVI src1 src2)); 7819 format %{ "vpmulld $dst,$src1,$src2\t! mul packed16I" %} 7820 ins_encode %{ 7821 int vector_len = 2; 7822 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7823 %} 7824 ins_pipe( pipe_slow ); 7825 %} 7826 7827 instruct vmul16I_mem(vecZ dst, vecZ src, memory mem) %{ 7828 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7829 match(Set dst (MulVI src (LoadVector mem))); 7830 format %{ "vpmulld $dst,$src,$mem\t! mul packed16I" %} 7831 ins_encode %{ 7832 int vector_len = 2; 7833 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7834 %} 7835 ins_pipe( pipe_slow ); 7836 %} 7837 7838 // Floats vector mul 7839 instruct vmul2F(vecD dst, vecD src) %{ 7840 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 7841 match(Set dst (MulVF dst src)); 7842 format %{ "mulps $dst,$src\t! mul packed2F" %} 7843 ins_encode %{ 7844 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 7845 %} 7846 ins_pipe( pipe_slow ); 7847 %} 7848 7849 instruct vmul2F_reg(vecD dst, vecD src1, vecD src2) %{ 7850 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7851 match(Set dst (MulVF src1 src2)); 7852 format %{ "vmulps $dst,$src1,$src2\t! mul packed2F" %} 7853 ins_encode %{ 7854 int vector_len = 0; 7855 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7856 %} 7857 ins_pipe( pipe_slow ); 7858 %} 7859 7860 instruct vmul2F_mem(vecD dst, vecD src, memory mem) %{ 7861 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7862 match(Set dst (MulVF src (LoadVector mem))); 7863 format %{ "vmulps $dst,$src,$mem\t! mul packed2F" %} 7864 ins_encode %{ 7865 int vector_len = 0; 7866 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7867 %} 7868 ins_pipe( pipe_slow ); 7869 %} 7870 7871 instruct vmul4F(vecX dst, vecX src) %{ 7872 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 7873 match(Set dst (MulVF dst src)); 7874 format %{ "mulps $dst,$src\t! mul packed4F" %} 7875 ins_encode %{ 7876 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 7877 %} 7878 ins_pipe( pipe_slow ); 7879 %} 7880 7881 instruct vmul4F_reg(vecX dst, vecX src1, vecX src2) %{ 7882 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7883 match(Set dst (MulVF src1 src2)); 7884 format %{ "vmulps $dst,$src1,$src2\t! mul packed4F" %} 7885 ins_encode %{ 7886 int vector_len = 0; 7887 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7888 %} 7889 ins_pipe( pipe_slow ); 7890 %} 7891 7892 instruct vmul4F_mem(vecX dst, vecX src, memory mem) %{ 7893 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7894 match(Set dst (MulVF src (LoadVector mem))); 7895 format %{ "vmulps $dst,$src,$mem\t! mul packed4F" %} 7896 ins_encode %{ 7897 int vector_len = 0; 7898 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7899 %} 7900 ins_pipe( pipe_slow ); 7901 %} 7902 7903 instruct vmul8F_reg(vecY dst, vecY src1, vecY src2) %{ 7904 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7905 match(Set dst (MulVF src1 src2)); 7906 format %{ "vmulps $dst,$src1,$src2\t! mul packed8F" %} 7907 ins_encode %{ 7908 int vector_len = 1; 7909 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7910 %} 7911 ins_pipe( pipe_slow ); 7912 %} 7913 7914 instruct vmul8F_mem(vecY dst, vecY src, memory mem) %{ 7915 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7916 match(Set dst (MulVF src (LoadVector mem))); 7917 format %{ "vmulps $dst,$src,$mem\t! mul packed8F" %} 7918 ins_encode %{ 7919 int vector_len = 1; 7920 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7921 %} 7922 ins_pipe( pipe_slow ); 7923 %} 7924 7925 instruct vmul16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7926 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7927 match(Set dst (MulVF src1 src2)); 7928 format %{ "vmulps $dst,$src1,$src2\t! mul packed16F" %} 7929 ins_encode %{ 7930 int vector_len = 2; 7931 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7932 %} 7933 ins_pipe( pipe_slow ); 7934 %} 7935 7936 instruct vmul16F_mem(vecZ dst, vecZ src, memory mem) %{ 7937 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7938 match(Set dst (MulVF src (LoadVector mem))); 7939 format %{ "vmulps $dst,$src,$mem\t! mul packed16F" %} 7940 ins_encode %{ 7941 int vector_len = 2; 7942 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7943 %} 7944 ins_pipe( pipe_slow ); 7945 %} 7946 7947 // Doubles vector mul 7948 instruct vmul2D(vecX dst, vecX src) %{ 7949 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 7950 match(Set dst (MulVD dst src)); 7951 format %{ "mulpd $dst,$src\t! mul packed2D" %} 7952 ins_encode %{ 7953 __ mulpd($dst$$XMMRegister, $src$$XMMRegister); 7954 %} 7955 ins_pipe( pipe_slow ); 7956 %} 7957 7958 instruct vmul2D_reg(vecX dst, vecX src1, vecX src2) %{ 7959 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7960 match(Set dst (MulVD src1 src2)); 7961 format %{ "vmulpd $dst,$src1,$src2\t! mul packed2D" %} 7962 ins_encode %{ 7963 int vector_len = 0; 7964 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7965 %} 7966 ins_pipe( pipe_slow ); 7967 %} 7968 7969 instruct vmul2D_mem(vecX dst, vecX src, memory mem) %{ 7970 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7971 match(Set dst (MulVD src (LoadVector mem))); 7972 format %{ "vmulpd $dst,$src,$mem\t! mul packed2D" %} 7973 ins_encode %{ 7974 int vector_len = 0; 7975 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7976 %} 7977 ins_pipe( pipe_slow ); 7978 %} 7979 7980 instruct vmul4D_reg(vecY dst, vecY src1, vecY src2) %{ 7981 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7982 match(Set dst (MulVD src1 src2)); 7983 format %{ "vmulpd $dst,$src1,$src2\t! mul packed4D" %} 7984 ins_encode %{ 7985 int vector_len = 1; 7986 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7987 %} 7988 ins_pipe( pipe_slow ); 7989 %} 7990 7991 instruct vmul4D_mem(vecY dst, vecY src, memory mem) %{ 7992 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7993 match(Set dst (MulVD src (LoadVector mem))); 7994 format %{ "vmulpd $dst,$src,$mem\t! mul packed4D" %} 7995 ins_encode %{ 7996 int vector_len = 1; 7997 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7998 %} 7999 ins_pipe( pipe_slow ); 8000 %} 8001 8002 instruct vmul8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8003 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8004 match(Set dst (MulVD src1 src2)); 8005 format %{ "vmulpd $dst k0,$src1,$src2\t! mul packed8D" %} 8006 ins_encode %{ 8007 int vector_len = 2; 8008 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8009 %} 8010 ins_pipe( pipe_slow ); 8011 %} 8012 8013 instruct vmul8D_mem(vecZ dst, vecZ src, memory mem) %{ 8014 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8015 match(Set dst (MulVD src (LoadVector mem))); 8016 format %{ "vmulpd $dst k0,$src,$mem\t! mul packed8D" %} 8017 ins_encode %{ 8018 int vector_len = 2; 8019 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8020 %} 8021 ins_pipe( pipe_slow ); 8022 %} 8023 8024 instruct vcmov8F_reg(legVecY dst, legVecY src1, legVecY src2, immI8 cop, cmpOp_vcmppd copnd) %{ 8025 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8026 match(Set dst (CMoveVF (Binary copnd cop) (Binary src1 src2))); 8027 effect(TEMP dst, USE src1, USE src2); 8028 format %{ "cmpps.$copnd $dst, $src1, $src2 ! vcmovevf, cond=$cop\n\t" 8029 "blendvps $dst,$src1,$src2,$dst ! vcmovevf\n\t" 8030 %} 8031 ins_encode %{ 8032 int vector_len = 1; 8033 int cond = (Assembler::Condition)($copnd$$cmpcode); 8034 __ cmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cond, vector_len); 8035 __ blendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vector_len); 8036 %} 8037 ins_pipe( pipe_slow ); 8038 %} 8039 8040 instruct vcmov4D_reg(legVecY dst, legVecY src1, legVecY src2, immI8 cop, cmpOp_vcmppd copnd) %{ 8041 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8042 match(Set dst (CMoveVD (Binary copnd cop) (Binary src1 src2))); 8043 effect(TEMP dst, USE src1, USE src2); 8044 format %{ "cmppd.$copnd $dst, $src1, $src2 ! vcmovevd, cond=$cop\n\t" 8045 "blendvpd $dst,$src1,$src2,$dst ! vcmovevd\n\t" 8046 %} 8047 ins_encode %{ 8048 int vector_len = 1; 8049 int cond = (Assembler::Condition)($copnd$$cmpcode); 8050 __ cmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cond, vector_len); 8051 __ blendvpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vector_len); 8052 %} 8053 ins_pipe( pipe_slow ); 8054 %} 8055 8056 // --------------------------------- DIV -------------------------------------- 8057 8058 // Floats vector div 8059 instruct vdiv2F(vecD dst, vecD src) %{ 8060 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8061 match(Set dst (DivVF dst src)); 8062 format %{ "divps $dst,$src\t! div packed2F" %} 8063 ins_encode %{ 8064 __ divps($dst$$XMMRegister, $src$$XMMRegister); 8065 %} 8066 ins_pipe( pipe_slow ); 8067 %} 8068 8069 instruct vdiv2F_reg(vecD dst, vecD src1, vecD src2) %{ 8070 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8071 match(Set dst (DivVF src1 src2)); 8072 format %{ "vdivps $dst,$src1,$src2\t! div packed2F" %} 8073 ins_encode %{ 8074 int vector_len = 0; 8075 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8076 %} 8077 ins_pipe( pipe_slow ); 8078 %} 8079 8080 instruct vdiv2F_mem(vecD dst, vecD src, memory mem) %{ 8081 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8082 match(Set dst (DivVF src (LoadVector mem))); 8083 format %{ "vdivps $dst,$src,$mem\t! div packed2F" %} 8084 ins_encode %{ 8085 int vector_len = 0; 8086 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8087 %} 8088 ins_pipe( pipe_slow ); 8089 %} 8090 8091 instruct vdiv4F(vecX dst, vecX src) %{ 8092 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 8093 match(Set dst (DivVF dst src)); 8094 format %{ "divps $dst,$src\t! div packed4F" %} 8095 ins_encode %{ 8096 __ divps($dst$$XMMRegister, $src$$XMMRegister); 8097 %} 8098 ins_pipe( pipe_slow ); 8099 %} 8100 8101 instruct vdiv4F_reg(vecX dst, vecX src1, vecX src2) %{ 8102 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8103 match(Set dst (DivVF src1 src2)); 8104 format %{ "vdivps $dst,$src1,$src2\t! div packed4F" %} 8105 ins_encode %{ 8106 int vector_len = 0; 8107 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8108 %} 8109 ins_pipe( pipe_slow ); 8110 %} 8111 8112 instruct vdiv4F_mem(vecX dst, vecX src, memory mem) %{ 8113 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8114 match(Set dst (DivVF src (LoadVector mem))); 8115 format %{ "vdivps $dst,$src,$mem\t! div packed4F" %} 8116 ins_encode %{ 8117 int vector_len = 0; 8118 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8119 %} 8120 ins_pipe( pipe_slow ); 8121 %} 8122 8123 instruct vdiv8F_reg(vecY dst, vecY src1, vecY src2) %{ 8124 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8125 match(Set dst (DivVF src1 src2)); 8126 format %{ "vdivps $dst,$src1,$src2\t! div packed8F" %} 8127 ins_encode %{ 8128 int vector_len = 1; 8129 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8130 %} 8131 ins_pipe( pipe_slow ); 8132 %} 8133 8134 instruct vdiv8F_mem(vecY dst, vecY src, memory mem) %{ 8135 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8136 match(Set dst (DivVF src (LoadVector mem))); 8137 format %{ "vdivps $dst,$src,$mem\t! div packed8F" %} 8138 ins_encode %{ 8139 int vector_len = 1; 8140 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8141 %} 8142 ins_pipe( pipe_slow ); 8143 %} 8144 8145 instruct vdiv16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8146 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 8147 match(Set dst (DivVF src1 src2)); 8148 format %{ "vdivps $dst,$src1,$src2\t! div packed16F" %} 8149 ins_encode %{ 8150 int vector_len = 2; 8151 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8152 %} 8153 ins_pipe( pipe_slow ); 8154 %} 8155 8156 instruct vdiv16F_mem(vecZ dst, vecZ src, memory mem) %{ 8157 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 8158 match(Set dst (DivVF src (LoadVector mem))); 8159 format %{ "vdivps $dst,$src,$mem\t! div packed16F" %} 8160 ins_encode %{ 8161 int vector_len = 2; 8162 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8163 %} 8164 ins_pipe( pipe_slow ); 8165 %} 8166 8167 // Doubles vector div 8168 instruct vdiv2D(vecX dst, vecX src) %{ 8169 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8170 match(Set dst (DivVD dst src)); 8171 format %{ "divpd $dst,$src\t! div packed2D" %} 8172 ins_encode %{ 8173 __ divpd($dst$$XMMRegister, $src$$XMMRegister); 8174 %} 8175 ins_pipe( pipe_slow ); 8176 %} 8177 8178 instruct vdiv2D_reg(vecX dst, vecX src1, vecX src2) %{ 8179 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8180 match(Set dst (DivVD src1 src2)); 8181 format %{ "vdivpd $dst,$src1,$src2\t! div packed2D" %} 8182 ins_encode %{ 8183 int vector_len = 0; 8184 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8185 %} 8186 ins_pipe( pipe_slow ); 8187 %} 8188 8189 instruct vdiv2D_mem(vecX dst, vecX src, memory mem) %{ 8190 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8191 match(Set dst (DivVD src (LoadVector mem))); 8192 format %{ "vdivpd $dst,$src,$mem\t! div packed2D" %} 8193 ins_encode %{ 8194 int vector_len = 0; 8195 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8196 %} 8197 ins_pipe( pipe_slow ); 8198 %} 8199 8200 instruct vdiv4D_reg(vecY dst, vecY src1, vecY src2) %{ 8201 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8202 match(Set dst (DivVD src1 src2)); 8203 format %{ "vdivpd $dst,$src1,$src2\t! div packed4D" %} 8204 ins_encode %{ 8205 int vector_len = 1; 8206 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8207 %} 8208 ins_pipe( pipe_slow ); 8209 %} 8210 8211 instruct vdiv4D_mem(vecY dst, vecY src, memory mem) %{ 8212 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8213 match(Set dst (DivVD src (LoadVector mem))); 8214 format %{ "vdivpd $dst,$src,$mem\t! div packed4D" %} 8215 ins_encode %{ 8216 int vector_len = 1; 8217 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8218 %} 8219 ins_pipe( pipe_slow ); 8220 %} 8221 8222 instruct vdiv8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8223 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8224 match(Set dst (DivVD src1 src2)); 8225 format %{ "vdivpd $dst,$src1,$src2\t! div packed8D" %} 8226 ins_encode %{ 8227 int vector_len = 2; 8228 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8229 %} 8230 ins_pipe( pipe_slow ); 8231 %} 8232 8233 instruct vdiv8D_mem(vecZ dst, vecZ src, memory mem) %{ 8234 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8235 match(Set dst (DivVD src (LoadVector mem))); 8236 format %{ "vdivpd $dst,$src,$mem\t! div packed8D" %} 8237 ins_encode %{ 8238 int vector_len = 2; 8239 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8240 %} 8241 ins_pipe( pipe_slow ); 8242 %} 8243 8244 // --------------------------------- Sqrt -------------------------------------- 8245 8246 // Floating point vector sqrt 8247 instruct vsqrt2D_reg(vecX dst, vecX src) %{ 8248 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8249 match(Set dst (SqrtVD src)); 8250 format %{ "vsqrtpd $dst,$src\t! sqrt packed2D" %} 8251 ins_encode %{ 8252 int vector_len = 0; 8253 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8254 %} 8255 ins_pipe( pipe_slow ); 8256 %} 8257 8258 instruct vsqrt2D_mem(vecX dst, memory mem) %{ 8259 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8260 match(Set dst (SqrtVD (LoadVector mem))); 8261 format %{ "vsqrtpd $dst,$mem\t! sqrt packed2D" %} 8262 ins_encode %{ 8263 int vector_len = 0; 8264 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); 8265 %} 8266 ins_pipe( pipe_slow ); 8267 %} 8268 8269 instruct vsqrt4D_reg(vecY dst, vecY src) %{ 8270 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8271 match(Set dst (SqrtVD src)); 8272 format %{ "vsqrtpd $dst,$src\t! sqrt packed4D" %} 8273 ins_encode %{ 8274 int vector_len = 1; 8275 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8276 %} 8277 ins_pipe( pipe_slow ); 8278 %} 8279 8280 instruct vsqrt4D_mem(vecY dst, memory mem) %{ 8281 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8282 match(Set dst (SqrtVD (LoadVector mem))); 8283 format %{ "vsqrtpd $dst,$mem\t! sqrt packed4D" %} 8284 ins_encode %{ 8285 int vector_len = 1; 8286 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); 8287 %} 8288 ins_pipe( pipe_slow ); 8289 %} 8290 8291 instruct vsqrt8D_reg(vecZ dst, vecZ src) %{ 8292 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8293 match(Set dst (SqrtVD src)); 8294 format %{ "vsqrtpd $dst,$src\t! sqrt packed8D" %} 8295 ins_encode %{ 8296 int vector_len = 2; 8297 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8298 %} 8299 ins_pipe( pipe_slow ); 8300 %} 8301 8302 instruct vsqrt8D_mem(vecZ dst, memory mem) %{ 8303 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8304 match(Set dst (SqrtVD (LoadVector mem))); 8305 format %{ "vsqrtpd $dst,$mem\t! sqrt packed8D" %} 8306 ins_encode %{ 8307 int vector_len = 2; 8308 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); 8309 %} 8310 ins_pipe( pipe_slow ); 8311 %} 8312 8313 instruct vsqrt2F_reg(vecD dst, vecD src) %{ 8314 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8315 match(Set dst (SqrtVF src)); 8316 format %{ "vsqrtps $dst,$src\t! sqrt packed2F" %} 8317 ins_encode %{ 8318 int vector_len = 0; 8319 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8320 %} 8321 ins_pipe( pipe_slow ); 8322 %} 8323 8324 instruct vsqrt2F_mem(vecD dst, memory mem) %{ 8325 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8326 match(Set dst (SqrtVF (LoadVector mem))); 8327 format %{ "vsqrtps $dst,$mem\t! sqrt packed2F" %} 8328 ins_encode %{ 8329 int vector_len = 0; 8330 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vector_len); 8331 %} 8332 ins_pipe( pipe_slow ); 8333 %} 8334 8335 instruct vsqrt4F_reg(vecX dst, vecX src) %{ 8336 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8337 match(Set dst (SqrtVF src)); 8338 format %{ "vsqrtps $dst,$src\t! sqrt packed4F" %} 8339 ins_encode %{ 8340 int vector_len = 0; 8341 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8342 %} 8343 ins_pipe( pipe_slow ); 8344 %} 8345 8346 instruct vsqrt4F_mem(vecX dst, memory mem) %{ 8347 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8348 match(Set dst (SqrtVF (LoadVector mem))); 8349 format %{ "vsqrtps $dst,$mem\t! sqrt packed4F" %} 8350 ins_encode %{ 8351 int vector_len = 0; 8352 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vector_len); 8353 %} 8354 ins_pipe( pipe_slow ); 8355 %} 8356 8357 instruct vsqrt8F_reg(vecY dst, vecY src) %{ 8358 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8359 match(Set dst (SqrtVF src)); 8360 format %{ "vsqrtps $dst,$src\t! sqrt packed8F" %} 8361 ins_encode %{ 8362 int vector_len = 1; 8363 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8364 %} 8365 ins_pipe( pipe_slow ); 8366 %} 8367 8368 instruct vsqrt8F_mem(vecY dst, memory mem) %{ 8369 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8370 match(Set dst (SqrtVF (LoadVector mem))); 8371 format %{ "vsqrtps $dst,$mem\t! sqrt packed8F" %} 8372 ins_encode %{ 8373 int vector_len = 1; 8374 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vector_len); 8375 %} 8376 ins_pipe( pipe_slow ); 8377 %} 8378 8379 instruct vsqrt16F_reg(vecZ dst, vecZ src) %{ 8380 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8381 match(Set dst (SqrtVF src)); 8382 format %{ "vsqrtps $dst,$src\t! sqrt packed16F" %} 8383 ins_encode %{ 8384 int vector_len = 2; 8385 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8386 %} 8387 ins_pipe( pipe_slow ); 8388 %} 8389 8390 instruct vsqrt16F_mem(vecZ dst, memory mem) %{ 8391 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8392 match(Set dst (SqrtVF (LoadVector mem))); 8393 format %{ "vsqrtps $dst,$mem\t! sqrt packed16F" %} 8394 ins_encode %{ 8395 int vector_len = 2; 8396 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vector_len); 8397 %} 8398 ins_pipe( pipe_slow ); 8399 %} 8400 8401 // ------------------------------ Shift --------------------------------------- 8402 8403 // Left and right shift count vectors are the same on x86 8404 // (only lowest bits of xmm reg are used for count). 8405 instruct vshiftcnt(vecS dst, rRegI cnt) %{ 8406 match(Set dst (LShiftCntV cnt)); 8407 match(Set dst (RShiftCntV cnt)); 8408 format %{ "movdl $dst,$cnt\t! load shift count" %} 8409 ins_encode %{ 8410 __ movdl($dst$$XMMRegister, $cnt$$Register); 8411 %} 8412 ins_pipe( pipe_slow ); 8413 %} 8414 8415 instruct vshiftcntimm(vecS dst, immI8 cnt, rRegI tmp) %{ 8416 match(Set dst cnt); 8417 effect(TEMP tmp); 8418 format %{ "movl $tmp,$cnt\t" 8419 "movdl $dst,$tmp\t! load shift count" %} 8420 ins_encode %{ 8421 __ movl($tmp$$Register, $cnt$$constant); 8422 __ movdl($dst$$XMMRegister, $tmp$$Register); 8423 %} 8424 ins_pipe( pipe_slow ); 8425 %} 8426 8427 // Byte vector shift 8428 instruct vshift4B(vecS dst, vecS src, vecS shift, vecS tmp, rRegI scratch) %{ 8429 predicate(UseSSE > 3 && n->as_Vector()->length() == 4); 8430 match(Set dst (LShiftVB src shift)); 8431 match(Set dst (RShiftVB src shift)); 8432 match(Set dst (URShiftVB src shift)); 8433 effect(TEMP dst, TEMP tmp, TEMP scratch); 8434 format %{"vextendbw $tmp,$src\n\t" 8435 "vshiftw $tmp,$shift\n\t" 8436 "movdqu $dst,[0x00ff00ff0x00ff00ff]\n\t" 8437 "pand $dst,$tmp\n\t" 8438 "packuswb $dst,$dst\n\t ! packed4B shift" %} 8439 ins_encode %{ 8440 int opcode = this->as_Mach()->ideal_Opcode(); 8441 8442 __ vextendbw(opcode, $tmp$$XMMRegister, $src$$XMMRegister); 8443 __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister); 8444 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 8445 __ pand($dst$$XMMRegister, $tmp$$XMMRegister); 8446 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister); 8447 %} 8448 ins_pipe( pipe_slow ); 8449 %} 8450 8451 instruct vshift8B(vecD dst, vecD src, vecS shift, vecD tmp, rRegI scratch) %{ 8452 predicate(UseSSE > 3 && n->as_Vector()->length() == 8); 8453 match(Set dst (LShiftVB src shift)); 8454 match(Set dst (RShiftVB src shift)); 8455 match(Set dst (URShiftVB src shift)); 8456 effect(TEMP dst, TEMP tmp, TEMP scratch); 8457 format %{"vextendbw $tmp,$src\n\t" 8458 "vshiftw $tmp,$shift\n\t" 8459 "movdqu $dst,[0x00ff00ff0x00ff00ff]\n\t" 8460 "pand $dst,$tmp\n\t" 8461 "packuswb $dst,$dst\n\t ! packed8B shift" %} 8462 ins_encode %{ 8463 int opcode = this->as_Mach()->ideal_Opcode(); 8464 8465 __ vextendbw(opcode, $tmp$$XMMRegister, $src$$XMMRegister); 8466 __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister); 8467 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 8468 __ pand($dst$$XMMRegister, $tmp$$XMMRegister); 8469 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister); 8470 %} 8471 ins_pipe( pipe_slow ); 8472 %} 8473 8474 instruct vshift16B(vecX dst, vecX src, vecS shift, vecX tmp1, vecX tmp2, rRegI scratch) %{ 8475 predicate(UseSSE > 3 && UseAVX <= 1 && n->as_Vector()->length() == 16); 8476 match(Set dst (LShiftVB src shift)); 8477 match(Set dst (RShiftVB src shift)); 8478 match(Set dst (URShiftVB src shift)); 8479 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch); 8480 format %{"vextendbw $tmp1,$src\n\t" 8481 "vshiftw $tmp1,$shift\n\t" 8482 "pshufd $tmp2,$src\n\t" 8483 "vextendbw $tmp2,$tmp2\n\t" 8484 "vshiftw $tmp2,$shift\n\t" 8485 "movdqu $dst,[0x00ff00ff0x00ff00ff]\n\t" 8486 "pand $tmp2,$dst\n\t" 8487 "pand $dst,$tmp1\n\t" 8488 "packuswb $dst,$tmp2\n\t! packed16B shift" %} 8489 ins_encode %{ 8490 int opcode = this->as_Mach()->ideal_Opcode(); 8491 8492 __ vextendbw(opcode, $tmp1$$XMMRegister, $src$$XMMRegister); 8493 __ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister); 8494 __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE); 8495 __ vextendbw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister); 8496 __ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister); 8497 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 8498 __ pand($tmp2$$XMMRegister, $dst$$XMMRegister); 8499 __ pand($dst$$XMMRegister, $tmp1$$XMMRegister); 8500 __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister); 8501 %} 8502 ins_pipe( pipe_slow ); 8503 %} 8504 8505 instruct vshift16B_avx(vecX dst, vecX src, vecS shift, vecX tmp, rRegI scratch) %{ 8506 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 8507 match(Set dst (LShiftVB src shift)); 8508 match(Set dst (RShiftVB src shift)); 8509 match(Set dst (URShiftVB src shift)); 8510 effect(TEMP dst, TEMP tmp, TEMP scratch); 8511 format %{"vextendbw $tmp,$src\n\t" 8512 "vshiftw $tmp,$tmp,$shift\n\t" 8513 "vpand $tmp,$tmp,[0x00ff00ff0x00ff00ff]\n\t" 8514 "vextracti128_high $dst,$tmp\n\t" 8515 "vpackuswb $dst,$tmp,$dst\n\t! packed16B shift" %} 8516 ins_encode %{ 8517 int opcode = this->as_Mach()->ideal_Opcode(); 8518 8519 int vector_len = 1; 8520 __ vextendbw(opcode, $tmp$$XMMRegister, $src$$XMMRegister, vector_len); 8521 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vector_len); 8522 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vector_len, $scratch$$Register); 8523 __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister); 8524 __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0); 8525 %} 8526 ins_pipe( pipe_slow ); 8527 %} 8528 8529 instruct vshift32B_avx(vecY dst, vecY src, vecS shift, vecY tmp, rRegI scratch) %{ 8530 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 8531 match(Set dst (LShiftVB src shift)); 8532 match(Set dst (RShiftVB src shift)); 8533 match(Set dst (URShiftVB src shift)); 8534 effect(TEMP dst, TEMP tmp, TEMP scratch); 8535 format %{"vextracti128_high $tmp,$src\n\t" 8536 "vextendbw $tmp,$tmp\n\t" 8537 "vextendbw $dst,$src\n\t" 8538 "vshiftw $tmp,$tmp,$shift\n\t" 8539 "vshiftw $dst,$dst,$shift\n\t" 8540 "vpand $tmp,$tmp,[0x00ff00ff0x00ff00ff]\n\t" 8541 "vpand $dst,$dst,[0x00ff00ff0x00ff00ff]\n\t" 8542 "vpackuswb $dst,$dst,$tmp\n\t" 8543 "vpermq $dst,$dst,0xD8\n\t! packed32B shift" %} 8544 ins_encode %{ 8545 int opcode = this->as_Mach()->ideal_Opcode(); 8546 8547 int vector_len = 1; 8548 __ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister); 8549 __ vextendbw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); 8550 __ vextendbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, vector_len); 8551 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vector_len); 8552 __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vector_len); 8553 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vector_len, $scratch$$Register); 8554 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vector_len, $scratch$$Register); 8555 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 8556 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vector_len); 8557 %} 8558 ins_pipe( pipe_slow ); 8559 %} 8560 8561 instruct vshift64B_avx(vecZ dst, vecZ src, vecS shift, vecZ tmp1, vecZ tmp2, rRegI scratch) %{ 8562 predicate(UseAVX > 2 && n->as_Vector()->length() == 64); 8563 match(Set dst (LShiftVB src shift)); 8564 match(Set dst (RShiftVB src shift)); 8565 match(Set dst (URShiftVB src shift)); 8566 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch); 8567 format %{"vextracti64x4 $tmp1,$src\n\t" 8568 "vextendbw $tmp1,$tmp1\n\t" 8569 "vextendbw $tmp2,$src\n\t" 8570 "vshiftw $tmp1,$tmp1,$shift\n\t" 8571 "vshiftw $tmp2,$tmp2,$shift\n\t" 8572 "vmovdqu $dst,[0x00ff00ff0x00ff00ff]\n\t" 8573 "vpbroadcastd $dst,$dst\n\t" 8574 "vpand $tmp1,$tmp1,$dst\n\t" 8575 "vpand $tmp2,$tmp2,$dst\n\t" 8576 "vpackuswb $dst,$tmp1,$tmp2\n\t" 8577 "evmovdquq $tmp2, [0x0604020007050301]\n\t" 8578 "vpermq $dst,$tmp2,$dst\n\t! packed64B shift" %} 8579 ins_encode %{ 8580 int opcode = this->as_Mach()->ideal_Opcode(); 8581 8582 int vector_len = 2; 8583 __ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1); 8584 __ vextendbw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vector_len); 8585 __ vextendbw(opcode, $tmp2$$XMMRegister, $src$$XMMRegister, vector_len); 8586 __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vector_len); 8587 __ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vector_len); 8588 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 8589 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 8590 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vector_len); 8591 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len); 8592 __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vector_len); 8593 __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vector_len, $scratch$$Register); 8594 __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len); 8595 %} 8596 ins_pipe( pipe_slow ); 8597 %} 8598 8599 // Shorts vector logical right shift produces incorrect Java result 8600 // for negative data because java code convert short value into int with 8601 // sign extension before a shift. But char vectors are fine since chars are 8602 // unsigned values. 8603 // Shorts/Chars vector left shift 8604 instruct vshist2S(vecS dst, vecS src, vecS shift) %{ 8605 predicate(n->as_Vector()->length() == 2); 8606 match(Set dst (LShiftVS src shift)); 8607 match(Set dst (RShiftVS src shift)); 8608 match(Set dst (URShiftVS src shift)); 8609 format %{ "vshiftw $dst,$src,$shift\t! shift packed2S" %} 8610 ins_encode %{ 8611 int opcode = this->as_Mach()->ideal_Opcode(); 8612 if (UseAVX == 0) { 8613 if ($dst$$XMMRegister != $src$$XMMRegister) 8614 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 8615 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 8616 } else { 8617 int vector_len = 0; 8618 __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8619 } 8620 %} 8621 ins_pipe( pipe_slow ); 8622 %} 8623 8624 instruct vshift4S(vecD dst, vecD src, vecS shift) %{ 8625 predicate(n->as_Vector()->length() == 4); 8626 match(Set dst (LShiftVS src shift)); 8627 match(Set dst (RShiftVS src shift)); 8628 match(Set dst (URShiftVS src shift)); 8629 format %{ "vshiftw $dst,$src,$shift\t! shift packed4S" %} 8630 ins_encode %{ 8631 int opcode = this->as_Mach()->ideal_Opcode(); 8632 if (UseAVX == 0) { 8633 if ($dst$$XMMRegister != $src$$XMMRegister) 8634 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 8635 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 8636 8637 } else { 8638 int vector_len = 0; 8639 __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8640 } 8641 %} 8642 ins_pipe( pipe_slow ); 8643 %} 8644 8645 instruct vshift8S(vecX dst, vecX src, vecS shift) %{ 8646 predicate(n->as_Vector()->length() == 8); 8647 match(Set dst (LShiftVS src shift)); 8648 match(Set dst (RShiftVS src shift)); 8649 match(Set dst (URShiftVS src shift)); 8650 format %{ "vshiftw $dst,$src,$shift\t! shift packed8S" %} 8651 ins_encode %{ 8652 int opcode = this->as_Mach()->ideal_Opcode(); 8653 if (UseAVX == 0) { 8654 if ($dst$$XMMRegister != $src$$XMMRegister) 8655 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 8656 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 8657 } else { 8658 int vector_len = 0; 8659 __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8660 } 8661 %} 8662 ins_pipe( pipe_slow ); 8663 %} 8664 8665 instruct vshift16S(vecY dst, vecY src, vecS shift) %{ 8666 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 8667 match(Set dst (LShiftVS src shift)); 8668 match(Set dst (RShiftVS src shift)); 8669 match(Set dst (URShiftVS src shift)); 8670 format %{ "vshiftw $dst,$src,$shift\t! shift packed16S" %} 8671 ins_encode %{ 8672 int vector_len = 1; 8673 int opcode = this->as_Mach()->ideal_Opcode(); 8674 __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8675 %} 8676 ins_pipe( pipe_slow ); 8677 %} 8678 8679 instruct vshift32S(vecZ dst, vecZ src, vecS shift) %{ 8680 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 8681 match(Set dst (LShiftVS src shift)); 8682 match(Set dst (RShiftVS src shift)); 8683 match(Set dst (URShiftVS src shift)); 8684 format %{ "vshiftw $dst,$src,$shift\t! shift packed32S" %} 8685 ins_encode %{ 8686 int vector_len = 2; 8687 int opcode = this->as_Mach()->ideal_Opcode(); 8688 __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8689 %} 8690 ins_pipe( pipe_slow ); 8691 %} 8692 8693 // Integers vector left shift 8694 instruct vshift2I(vecD dst, vecD src, vecS shift) %{ 8695 predicate(n->as_Vector()->length() == 2); 8696 match(Set dst (LShiftVI src shift)); 8697 match(Set dst (RShiftVI src shift)); 8698 match(Set dst (URShiftVI src shift)); 8699 format %{ "vshiftd $dst,$src,$shift\t! shift packed2I" %} 8700 ins_encode %{ 8701 int opcode = this->as_Mach()->ideal_Opcode(); 8702 if (UseAVX == 0) { 8703 if ($dst$$XMMRegister != $src$$XMMRegister) 8704 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 8705 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 8706 } else { 8707 int vector_len = 0; 8708 __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8709 } 8710 %} 8711 ins_pipe( pipe_slow ); 8712 %} 8713 8714 instruct vshift4I(vecX dst, vecX src, vecS shift) %{ 8715 predicate(n->as_Vector()->length() == 4); 8716 match(Set dst (LShiftVI src shift)); 8717 match(Set dst (RShiftVI src shift)); 8718 match(Set dst (URShiftVI src shift)); 8719 format %{ "vshiftd $dst,$src,$shift\t! shift packed4I" %} 8720 ins_encode %{ 8721 int opcode = this->as_Mach()->ideal_Opcode(); 8722 if (UseAVX == 0) { 8723 if ($dst$$XMMRegister != $src$$XMMRegister) 8724 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 8725 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 8726 } else { 8727 int vector_len = 0; 8728 __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8729 } 8730 %} 8731 ins_pipe( pipe_slow ); 8732 %} 8733 8734 instruct vshift8I(vecY dst, vecY src, vecS shift) %{ 8735 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 8736 match(Set dst (LShiftVI src shift)); 8737 match(Set dst (RShiftVI src shift)); 8738 match(Set dst (URShiftVI src shift)); 8739 format %{ "vshiftd $dst,$src,$shift\t! shift packed8I" %} 8740 ins_encode %{ 8741 int vector_len = 1; 8742 int opcode = this->as_Mach()->ideal_Opcode(); 8743 __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8744 %} 8745 ins_pipe( pipe_slow ); 8746 %} 8747 8748 instruct vshift16I(vecZ dst, vecZ src, vecS shift) %{ 8749 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8750 match(Set dst (LShiftVI src shift)); 8751 match(Set dst (RShiftVI src shift)); 8752 match(Set dst (URShiftVI src shift)); 8753 format %{ "vshiftd $dst,$src,$shift\t! shift packed16I" %} 8754 ins_encode %{ 8755 int vector_len = 2; 8756 int opcode = this->as_Mach()->ideal_Opcode(); 8757 __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8758 %} 8759 ins_pipe( pipe_slow ); 8760 %} 8761 8762 // Longs vector shift 8763 instruct vshift2L(vecX dst, vecX src, vecS shift) %{ 8764 predicate(n->as_Vector()->length() == 2); 8765 match(Set dst (LShiftVL src shift)); 8766 match(Set dst (URShiftVL src shift)); 8767 format %{ "vshiftq $dst,$src,$shift\t! shift packed2L" %} 8768 ins_encode %{ 8769 int opcode = this->as_Mach()->ideal_Opcode(); 8770 if (UseAVX == 0) { 8771 if ($dst$$XMMRegister != $src$$XMMRegister) 8772 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 8773 __ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 8774 } else { 8775 int vector_len = 0; 8776 __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8777 } 8778 %} 8779 ins_pipe( pipe_slow ); 8780 %} 8781 8782 instruct vshift4L(vecY dst, vecY src, vecS shift) %{ 8783 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 8784 match(Set dst (LShiftVL src shift)); 8785 match(Set dst (URShiftVL src shift)); 8786 format %{ "vshiftq $dst,$src,$shift\t! left shift packed4L" %} 8787 ins_encode %{ 8788 int vector_len = 1; 8789 int opcode = this->as_Mach()->ideal_Opcode(); 8790 __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8791 %} 8792 ins_pipe( pipe_slow ); 8793 %} 8794 8795 instruct vshift8L(vecZ dst, vecZ src, vecS shift) %{ 8796 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8797 match(Set dst (LShiftVL src shift)); 8798 match(Set dst (RShiftVL src shift)); 8799 match(Set dst (URShiftVL src shift)); 8800 format %{ "vshiftq $dst,$src,$shift\t! shift packed8L" %} 8801 ins_encode %{ 8802 int vector_len = 2; 8803 int opcode = this->as_Mach()->ideal_Opcode(); 8804 __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8805 %} 8806 ins_pipe( pipe_slow ); 8807 %} 8808 8809 // -------------------ArithmeticRightShift ----------------------------------- 8810 // Long vector arithmetic right shift 8811 instruct vsra2L_reg(vecX dst, vecX src, vecS shift, vecX tmp, rRegI scratch) %{ 8812 predicate(UseSSE >= 2 && n->as_Vector()->length() == 2); 8813 match(Set dst (RShiftVL src shift)); 8814 effect(TEMP dst, TEMP tmp, TEMP scratch); 8815 format %{ "movdqu $dst,$src\n\t" 8816 "psrlq $dst,$shift\n\t" 8817 "movdqu $tmp,[0x8000000000000000]\n\t" 8818 "psrlq $tmp,$shift\n\t" 8819 "pxor $dst,$tmp\n\t" 8820 "psubq $dst,$tmp\t! arithmetic right shift packed2L" %} 8821 ins_encode %{ 8822 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 8823 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister); 8824 __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), $scratch$$Register); 8825 __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister); 8826 __ pxor($dst$$XMMRegister, $tmp$$XMMRegister); 8827 __ psubq($dst$$XMMRegister, $tmp$$XMMRegister); 8828 %} 8829 ins_pipe( pipe_slow ); 8830 %} 8831 8832 instruct vsra2L_reg_evex(vecX dst, vecX src, vecS shift) %{ 8833 predicate(UseAVX > 2 && n->as_Vector()->length() == 2); 8834 match(Set dst (RShiftVL src shift)); 8835 format %{ "evpsraq $dst,$src,$shift\t! arithmetic right shift packed2L" %} 8836 ins_encode %{ 8837 int vector_len = 0; 8838 __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8839 %} 8840 ins_pipe( pipe_slow ); 8841 %} 8842 8843 instruct vsra4L_reg(vecY dst, vecY src, vecS shift, vecY tmp, rRegI scratch) %{ 8844 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 8845 match(Set dst (RShiftVL src shift)); 8846 effect(TEMP dst, TEMP tmp, TEMP scratch); 8847 format %{ "vpsrlq $dst,$src,$shift\n\t" 8848 "vmovdqu $tmp,[0x8000000000000000]\n\t" 8849 "vpsrlq $tmp,$tmp,$shift\n\t" 8850 "vpxor $dst,$dst,$tmp\n\t" 8851 "vpsubq $dst,$dst,$tmp\t! arithmetic right shift packed4L" %} 8852 ins_encode %{ 8853 int vector_len = 1; 8854 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8855 __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), $scratch$$Register); 8856 __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vector_len); 8857 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 8858 __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 8859 %} 8860 ins_pipe( pipe_slow ); 8861 %} 8862 8863 instruct vsra4L_reg_evex(vecY dst, vecY src, vecS shift) %{ 8864 predicate(UseAVX > 2 && n->as_Vector()->length() == 4); 8865 match(Set dst (RShiftVL src shift)); 8866 format %{ "evpsraq $dst,$src,$shift\t! arithmetic right shift packed4L" %} 8867 ins_encode %{ 8868 int vector_len = 1; 8869 __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8870 %} 8871 ins_pipe( pipe_slow ); 8872 %} 8873 8874 // --------------------------------- AND -------------------------------------- 8875 8876 instruct vand4B(vecS dst, vecS src) %{ 8877 predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 4); 8878 match(Set dst (AndV dst src)); 8879 format %{ "pand $dst,$src\t! and vectors (4 bytes)" %} 8880 ins_encode %{ 8881 __ pand($dst$$XMMRegister, $src$$XMMRegister); 8882 %} 8883 ins_pipe( pipe_slow ); 8884 %} 8885 8886 instruct vand4B_reg(vecS dst, vecS src1, vecS src2) %{ 8887 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 8888 match(Set dst (AndV src1 src2)); 8889 format %{ "vpand $dst,$src1,$src2\t! and vectors (4 bytes)" %} 8890 ins_encode %{ 8891 int vector_len = 0; 8892 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8893 %} 8894 ins_pipe( pipe_slow ); 8895 %} 8896 8897 instruct vand4B_mem(vecS dst, vecS src, memory mem) %{ 8898 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 8899 match(Set dst (AndV src (LoadVector mem))); 8900 format %{ "vpand $dst,$src,$mem\t! and vectors (4 bytes)" %} 8901 ins_encode %{ 8902 int vector_len = 0; 8903 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8904 %} 8905 ins_pipe( pipe_slow ); 8906 %} 8907 8908 instruct vand8B(vecD dst, vecD src) %{ 8909 predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 8); 8910 match(Set dst (AndV dst src)); 8911 format %{ "pand $dst,$src\t! and vectors (8 bytes)" %} 8912 ins_encode %{ 8913 __ pand($dst$$XMMRegister, $src$$XMMRegister); 8914 %} 8915 ins_pipe( pipe_slow ); 8916 %} 8917 8918 instruct vand8B_reg(vecD dst, vecD src1, vecD src2) %{ 8919 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 8920 match(Set dst (AndV src1 src2)); 8921 format %{ "vpand $dst,$src1,$src2\t! and vectors (8 bytes)" %} 8922 ins_encode %{ 8923 int vector_len = 0; 8924 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8925 %} 8926 ins_pipe( pipe_slow ); 8927 %} 8928 8929 instruct vand8B_mem(vecD dst, vecD src, memory mem) %{ 8930 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 8931 match(Set dst (AndV src (LoadVector mem))); 8932 format %{ "vpand $dst,$src,$mem\t! and vectors (8 bytes)" %} 8933 ins_encode %{ 8934 int vector_len = 0; 8935 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8936 %} 8937 ins_pipe( pipe_slow ); 8938 %} 8939 8940 instruct vand16B(vecX dst, vecX src) %{ 8941 predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 16); 8942 match(Set dst (AndV dst src)); 8943 format %{ "pand $dst,$src\t! and vectors (16 bytes)" %} 8944 ins_encode %{ 8945 __ pand($dst$$XMMRegister, $src$$XMMRegister); 8946 %} 8947 ins_pipe( pipe_slow ); 8948 %} 8949 8950 instruct vand16B_reg(vecX dst, vecX src1, vecX src2) %{ 8951 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 8952 match(Set dst (AndV src1 src2)); 8953 format %{ "vpand $dst,$src1,$src2\t! and vectors (16 bytes)" %} 8954 ins_encode %{ 8955 int vector_len = 0; 8956 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8957 %} 8958 ins_pipe( pipe_slow ); 8959 %} 8960 8961 instruct vand16B_mem(vecX dst, vecX src, memory mem) %{ 8962 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 8963 match(Set dst (AndV src (LoadVector mem))); 8964 format %{ "vpand $dst,$src,$mem\t! and vectors (16 bytes)" %} 8965 ins_encode %{ 8966 int vector_len = 0; 8967 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8968 %} 8969 ins_pipe( pipe_slow ); 8970 %} 8971 8972 instruct vand32B_reg(vecY dst, vecY src1, vecY src2) %{ 8973 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 8974 match(Set dst (AndV src1 src2)); 8975 format %{ "vpand $dst,$src1,$src2\t! and vectors (32 bytes)" %} 8976 ins_encode %{ 8977 int vector_len = 1; 8978 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8979 %} 8980 ins_pipe( pipe_slow ); 8981 %} 8982 8983 instruct vand32B_mem(vecY dst, vecY src, memory mem) %{ 8984 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 8985 match(Set dst (AndV src (LoadVector mem))); 8986 format %{ "vpand $dst,$src,$mem\t! and vectors (32 bytes)" %} 8987 ins_encode %{ 8988 int vector_len = 1; 8989 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8990 %} 8991 ins_pipe( pipe_slow ); 8992 %} 8993 8994 instruct vand64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8995 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 8996 match(Set dst (AndV src1 src2)); 8997 format %{ "vpand $dst,$src1,$src2\t! and vectors (64 bytes)" %} 8998 ins_encode %{ 8999 int vector_len = 2; 9000 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9001 %} 9002 ins_pipe( pipe_slow ); 9003 %} 9004 9005 instruct vand64B_mem(vecZ dst, vecZ src, memory mem) %{ 9006 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 9007 match(Set dst (AndV src (LoadVector mem))); 9008 format %{ "vpand $dst,$src,$mem\t! and vectors (64 bytes)" %} 9009 ins_encode %{ 9010 int vector_len = 2; 9011 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9012 %} 9013 ins_pipe( pipe_slow ); 9014 %} 9015 9016 // --------------------------------- OR --------------------------------------- 9017 9018 instruct vor4B(vecS dst, vecS src) %{ 9019 predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 4); 9020 match(Set dst (OrV dst src)); 9021 format %{ "por $dst,$src\t! or vectors (4 bytes)" %} 9022 ins_encode %{ 9023 __ por($dst$$XMMRegister, $src$$XMMRegister); 9024 %} 9025 ins_pipe( pipe_slow ); 9026 %} 9027 9028 instruct vor4B_reg(vecS dst, vecS src1, vecS src2) %{ 9029 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 9030 match(Set dst (OrV src1 src2)); 9031 format %{ "vpor $dst,$src1,$src2\t! or vectors (4 bytes)" %} 9032 ins_encode %{ 9033 int vector_len = 0; 9034 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9035 %} 9036 ins_pipe( pipe_slow ); 9037 %} 9038 9039 instruct vor4B_mem(vecS dst, vecS src, memory mem) %{ 9040 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 9041 match(Set dst (OrV src (LoadVector mem))); 9042 format %{ "vpor $dst,$src,$mem\t! or vectors (4 bytes)" %} 9043 ins_encode %{ 9044 int vector_len = 0; 9045 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9046 %} 9047 ins_pipe( pipe_slow ); 9048 %} 9049 9050 instruct vor8B(vecD dst, vecD src) %{ 9051 predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 8); 9052 match(Set dst (OrV dst src)); 9053 format %{ "por $dst,$src\t! or vectors (8 bytes)" %} 9054 ins_encode %{ 9055 __ por($dst$$XMMRegister, $src$$XMMRegister); 9056 %} 9057 ins_pipe( pipe_slow ); 9058 %} 9059 9060 instruct vor8B_reg(vecD dst, vecD src1, vecD src2) %{ 9061 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 9062 match(Set dst (OrV src1 src2)); 9063 format %{ "vpor $dst,$src1,$src2\t! or vectors (8 bytes)" %} 9064 ins_encode %{ 9065 int vector_len = 0; 9066 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9067 %} 9068 ins_pipe( pipe_slow ); 9069 %} 9070 9071 instruct vor8B_mem(vecD dst, vecD src, memory mem) %{ 9072 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 9073 match(Set dst (OrV src (LoadVector mem))); 9074 format %{ "vpor $dst,$src,$mem\t! or vectors (8 bytes)" %} 9075 ins_encode %{ 9076 int vector_len = 0; 9077 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9078 %} 9079 ins_pipe( pipe_slow ); 9080 %} 9081 9082 instruct vor16B(vecX dst, vecX src) %{ 9083 predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 16); 9084 match(Set dst (OrV dst src)); 9085 format %{ "por $dst,$src\t! or vectors (16 bytes)" %} 9086 ins_encode %{ 9087 __ por($dst$$XMMRegister, $src$$XMMRegister); 9088 %} 9089 ins_pipe( pipe_slow ); 9090 %} 9091 9092 instruct vor16B_reg(vecX dst, vecX src1, vecX src2) %{ 9093 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 9094 match(Set dst (OrV src1 src2)); 9095 format %{ "vpor $dst,$src1,$src2\t! or vectors (16 bytes)" %} 9096 ins_encode %{ 9097 int vector_len = 0; 9098 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9099 %} 9100 ins_pipe( pipe_slow ); 9101 %} 9102 9103 instruct vor16B_mem(vecX dst, vecX src, memory mem) %{ 9104 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 9105 match(Set dst (OrV src (LoadVector mem))); 9106 format %{ "vpor $dst,$src,$mem\t! or vectors (16 bytes)" %} 9107 ins_encode %{ 9108 int vector_len = 0; 9109 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9110 %} 9111 ins_pipe( pipe_slow ); 9112 %} 9113 9114 instruct vor32B_reg(vecY dst, vecY src1, vecY src2) %{ 9115 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 9116 match(Set dst (OrV src1 src2)); 9117 format %{ "vpor $dst,$src1,$src2\t! or vectors (32 bytes)" %} 9118 ins_encode %{ 9119 int vector_len = 1; 9120 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9121 %} 9122 ins_pipe( pipe_slow ); 9123 %} 9124 9125 instruct vor32B_mem(vecY dst, vecY src, memory mem) %{ 9126 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 9127 match(Set dst (OrV src (LoadVector mem))); 9128 format %{ "vpor $dst,$src,$mem\t! or vectors (32 bytes)" %} 9129 ins_encode %{ 9130 int vector_len = 1; 9131 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9132 %} 9133 ins_pipe( pipe_slow ); 9134 %} 9135 9136 instruct vor64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 9137 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 9138 match(Set dst (OrV src1 src2)); 9139 format %{ "vpor $dst,$src1,$src2\t! or vectors (64 bytes)" %} 9140 ins_encode %{ 9141 int vector_len = 2; 9142 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9143 %} 9144 ins_pipe( pipe_slow ); 9145 %} 9146 9147 instruct vor64B_mem(vecZ dst, vecZ src, memory mem) %{ 9148 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 9149 match(Set dst (OrV src (LoadVector mem))); 9150 format %{ "vpor $dst,$src,$mem\t! or vectors (64 bytes)" %} 9151 ins_encode %{ 9152 int vector_len = 2; 9153 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9154 %} 9155 ins_pipe( pipe_slow ); 9156 %} 9157 9158 // --------------------------------- XOR -------------------------------------- 9159 9160 instruct vxor4B(vecS dst, vecS src) %{ 9161 predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 4); 9162 match(Set dst (XorV dst src)); 9163 format %{ "pxor $dst,$src\t! xor vectors (4 bytes)" %} 9164 ins_encode %{ 9165 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 9166 %} 9167 ins_pipe( pipe_slow ); 9168 %} 9169 9170 instruct vxor4B_reg(vecS dst, vecS src1, vecS src2) %{ 9171 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 9172 match(Set dst (XorV src1 src2)); 9173 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (4 bytes)" %} 9174 ins_encode %{ 9175 int vector_len = 0; 9176 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9177 %} 9178 ins_pipe( pipe_slow ); 9179 %} 9180 9181 instruct vxor4B_mem(vecS dst, vecS src, memory mem) %{ 9182 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 9183 match(Set dst (XorV src (LoadVector mem))); 9184 format %{ "vpxor $dst,$src,$mem\t! xor vectors (4 bytes)" %} 9185 ins_encode %{ 9186 int vector_len = 0; 9187 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9188 %} 9189 ins_pipe( pipe_slow ); 9190 %} 9191 9192 instruct vxor8B(vecD dst, vecD src) %{ 9193 predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 8); 9194 match(Set dst (XorV dst src)); 9195 format %{ "pxor $dst,$src\t! xor vectors (8 bytes)" %} 9196 ins_encode %{ 9197 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 9198 %} 9199 ins_pipe( pipe_slow ); 9200 %} 9201 9202 instruct vxor8B_reg(vecD dst, vecD src1, vecD src2) %{ 9203 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 9204 match(Set dst (XorV src1 src2)); 9205 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (8 bytes)" %} 9206 ins_encode %{ 9207 int vector_len = 0; 9208 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9209 %} 9210 ins_pipe( pipe_slow ); 9211 %} 9212 9213 instruct vxor8B_mem(vecD dst, vecD src, memory mem) %{ 9214 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 9215 match(Set dst (XorV src (LoadVector mem))); 9216 format %{ "vpxor $dst,$src,$mem\t! xor vectors (8 bytes)" %} 9217 ins_encode %{ 9218 int vector_len = 0; 9219 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9220 %} 9221 ins_pipe( pipe_slow ); 9222 %} 9223 9224 instruct vxor16B(vecX dst, vecX src) %{ 9225 predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 16); 9226 match(Set dst (XorV dst src)); 9227 format %{ "pxor $dst,$src\t! xor vectors (16 bytes)" %} 9228 ins_encode %{ 9229 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 9230 %} 9231 ins_pipe( pipe_slow ); 9232 %} 9233 9234 instruct vxor16B_reg(vecX dst, vecX src1, vecX src2) %{ 9235 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 9236 match(Set dst (XorV src1 src2)); 9237 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (16 bytes)" %} 9238 ins_encode %{ 9239 int vector_len = 0; 9240 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9241 %} 9242 ins_pipe( pipe_slow ); 9243 %} 9244 9245 instruct vxor16B_mem(vecX dst, vecX src, memory mem) %{ 9246 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 9247 match(Set dst (XorV src (LoadVector mem))); 9248 format %{ "vpxor $dst,$src,$mem\t! xor vectors (16 bytes)" %} 9249 ins_encode %{ 9250 int vector_len = 0; 9251 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9252 %} 9253 ins_pipe( pipe_slow ); 9254 %} 9255 9256 instruct vxor32B_reg(vecY dst, vecY src1, vecY src2) %{ 9257 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 9258 match(Set dst (XorV src1 src2)); 9259 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (32 bytes)" %} 9260 ins_encode %{ 9261 int vector_len = 1; 9262 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9263 %} 9264 ins_pipe( pipe_slow ); 9265 %} 9266 9267 instruct vxor32B_mem(vecY dst, vecY src, memory mem) %{ 9268 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 9269 match(Set dst (XorV src (LoadVector mem))); 9270 format %{ "vpxor $dst,$src,$mem\t! xor vectors (32 bytes)" %} 9271 ins_encode %{ 9272 int vector_len = 1; 9273 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9274 %} 9275 ins_pipe( pipe_slow ); 9276 %} 9277 9278 instruct vxor64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 9279 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 9280 match(Set dst (XorV src1 src2)); 9281 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (64 bytes)" %} 9282 ins_encode %{ 9283 int vector_len = 2; 9284 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9285 %} 9286 ins_pipe( pipe_slow ); 9287 %} 9288 9289 instruct vxor64B_mem(vecZ dst, vecZ src, memory mem) %{ 9290 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 9291 match(Set dst (XorV src (LoadVector mem))); 9292 format %{ "vpxor $dst,$src,$mem\t! xor vectors (64 bytes)" %} 9293 ins_encode %{ 9294 int vector_len = 2; 9295 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9296 %} 9297 ins_pipe( pipe_slow ); 9298 %} 9299 9300 // --------------------------------- ABS -------------------------------------- 9301 // a = |a| 9302 instruct vabs4B_reg(vecS dst, vecS src) %{ 9303 predicate(UseSSE > 2 && n->as_Vector()->length() == 4); 9304 match(Set dst (AbsVB src)); 9305 format %{ "pabsb $dst,$src\t# $dst = |$src| abs packed4B" %} 9306 ins_encode %{ 9307 __ pabsb($dst$$XMMRegister, $src$$XMMRegister); 9308 %} 9309 ins_pipe( pipe_slow ); 9310 %} 9311 9312 instruct vabs8B_reg(vecD dst, vecD src) %{ 9313 predicate(UseSSE > 2 && n->as_Vector()->length() == 8); 9314 match(Set dst (AbsVB src)); 9315 format %{ "pabsb $dst,$src\t# $dst = |$src| abs packed8B" %} 9316 ins_encode %{ 9317 __ pabsb($dst$$XMMRegister, $src$$XMMRegister); 9318 %} 9319 ins_pipe( pipe_slow ); 9320 %} 9321 9322 instruct vabs16B_reg(vecX dst, vecX src) %{ 9323 predicate(UseSSE > 2 && n->as_Vector()->length() == 16); 9324 match(Set dst (AbsVB src)); 9325 format %{ "pabsb $dst,$src\t# $dst = |$src| abs packed16B" %} 9326 ins_encode %{ 9327 __ pabsb($dst$$XMMRegister, $src$$XMMRegister); 9328 %} 9329 ins_pipe( pipe_slow ); 9330 %} 9331 9332 instruct vabs32B_reg(vecY dst, vecY src) %{ 9333 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 9334 match(Set dst (AbsVB src)); 9335 format %{ "vpabsb $dst,$src\t# $dst = |$src| abs packed32B" %} 9336 ins_encode %{ 9337 int vector_len = 1; 9338 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vector_len); 9339 %} 9340 ins_pipe( pipe_slow ); 9341 %} 9342 9343 instruct vabs64B_reg(vecZ dst, vecZ src) %{ 9344 predicate(UseAVX > 2 && n->as_Vector()->length() == 64); 9345 match(Set dst (AbsVB src)); 9346 format %{ "vpabsb $dst,$src\t# $dst = |$src| abs packed64B" %} 9347 ins_encode %{ 9348 int vector_len = 2; 9349 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vector_len); 9350 %} 9351 ins_pipe( pipe_slow ); 9352 %} 9353 9354 instruct vabs2S_reg(vecD dst, vecD src) %{ 9355 predicate(UseSSE > 2 && n->as_Vector()->length() == 2); 9356 match(Set dst (AbsVS src)); 9357 format %{ "pabsw $dst,$src\t# $dst = |$src| abs packed2S" %} 9358 ins_encode %{ 9359 __ pabsw($dst$$XMMRegister, $src$$XMMRegister); 9360 %} 9361 ins_pipe( pipe_slow ); 9362 %} 9363 9364 instruct vabs4S_reg(vecD dst, vecD src) %{ 9365 predicate(UseSSE > 2 && n->as_Vector()->length() == 4); 9366 match(Set dst (AbsVS src)); 9367 format %{ "pabsw $dst,$src\t# $dst = |$src| abs packed4S" %} 9368 ins_encode %{ 9369 __ pabsw($dst$$XMMRegister, $src$$XMMRegister); 9370 %} 9371 ins_pipe( pipe_slow ); 9372 %} 9373 9374 instruct vabs8S_reg(vecX dst, vecX src) %{ 9375 predicate(UseSSE > 2 && n->as_Vector()->length() == 8); 9376 match(Set dst (AbsVS src)); 9377 format %{ "pabsw $dst,$src\t# $dst = |$src| abs packed8S" %} 9378 ins_encode %{ 9379 __ pabsw($dst$$XMMRegister, $src$$XMMRegister); 9380 %} 9381 ins_pipe( pipe_slow ); 9382 %} 9383 9384 instruct vabs16S_reg(vecY dst, vecY src) %{ 9385 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 9386 match(Set dst (AbsVS src)); 9387 format %{ "vpabsw $dst,$src\t# $dst = |$src| abs packed16S" %} 9388 ins_encode %{ 9389 int vector_len = 1; 9390 __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vector_len); 9391 %} 9392 ins_pipe( pipe_slow ); 9393 %} 9394 9395 instruct vabs32S_reg(vecZ dst, vecZ src) %{ 9396 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); 9397 match(Set dst (AbsVS src)); 9398 format %{ "vpabsw $dst,$src\t# $dst = |$src| abs packed32S" %} 9399 ins_encode %{ 9400 int vector_len = 2; 9401 __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vector_len); 9402 %} 9403 ins_pipe( pipe_slow ); 9404 %} 9405 9406 instruct vabs2I_reg(vecD dst, vecD src) %{ 9407 predicate(UseSSE > 2 && n->as_Vector()->length() == 2); 9408 match(Set dst (AbsVI src)); 9409 format %{ "pabsd $dst,$src\t# $dst = |$src| abs packed2I" %} 9410 ins_encode %{ 9411 __ pabsd($dst$$XMMRegister, $src$$XMMRegister); 9412 %} 9413 ins_pipe( pipe_slow ); 9414 %} 9415 9416 instruct vabs4I_reg(vecX dst, vecX src) %{ 9417 predicate(UseSSE > 2 && n->as_Vector()->length() == 4); 9418 match(Set dst (AbsVI src)); 9419 format %{ "pabsd $dst,$src\t# $dst = |$src| abs packed4I" %} 9420 ins_encode %{ 9421 __ pabsd($dst$$XMMRegister, $src$$XMMRegister); 9422 %} 9423 ins_pipe( pipe_slow ); 9424 %} 9425 9426 instruct vabs8I_reg(vecY dst, vecY src) %{ 9427 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 9428 match(Set dst (AbsVI src)); 9429 format %{ "vpabsd $dst,$src\t# $dst = |$src| abs packed8I" %} 9430 ins_encode %{ 9431 int vector_len = 1; 9432 __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 9433 %} 9434 ins_pipe( pipe_slow ); 9435 %} 9436 9437 instruct vabs16I_reg(vecZ dst, vecZ src) %{ 9438 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 9439 match(Set dst (AbsVI src)); 9440 format %{ "vpabsd $dst,$src\t# $dst = |$src| abs packed16I" %} 9441 ins_encode %{ 9442 int vector_len = 2; 9443 __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 9444 %} 9445 ins_pipe( pipe_slow ); 9446 %} 9447 9448 instruct vabs2L_reg(vecX dst, vecX src) %{ 9449 predicate(UseAVX > 2 && n->as_Vector()->length() == 2); 9450 match(Set dst (AbsVL src)); 9451 format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packed2L" %} 9452 ins_encode %{ 9453 int vector_len = 0; 9454 __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 9455 %} 9456 ins_pipe( pipe_slow ); 9457 %} 9458 9459 instruct vabs4L_reg(vecY dst, vecY src) %{ 9460 predicate(UseAVX > 2 && n->as_Vector()->length() == 4); 9461 match(Set dst (AbsVL src)); 9462 format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packed4L" %} 9463 ins_encode %{ 9464 int vector_len = 1; 9465 __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 9466 %} 9467 ins_pipe( pipe_slow ); 9468 %} 9469 9470 instruct vabs8L_reg(vecZ dst, vecZ src) %{ 9471 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 9472 match(Set dst (AbsVL src)); 9473 format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packed8L" %} 9474 ins_encode %{ 9475 int vector_len = 2; 9476 __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 9477 %} 9478 ins_pipe( pipe_slow ); 9479 %} 9480 9481 // --------------------------------- ABSNEG -------------------------------------- 9482 9483 instruct vabsneg2D(vecX dst, vecX src, rRegI scratch) %{ 9484 predicate(UseSSE >= 2 && n->as_Vector()->length() == 2); 9485 match(Set dst (AbsVD src)); 9486 match(Set dst (NegVD src)); 9487 effect(TEMP scratch); 9488 format %{ "vabsnegd $dst,$src,[mask]\t# absneg packed2D" %} 9489 ins_encode %{ 9490 int opcode = this->as_Mach()->ideal_Opcode(); 9491 if ($dst$$XMMRegister != $src$$XMMRegister) 9492 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 9493 __ vabsnegd(opcode, $dst$$XMMRegister, $scratch$$Register); 9494 %} 9495 ins_pipe( pipe_slow ); 9496 %} 9497 9498 instruct vabsneg4D(vecY dst, vecY src, rRegI scratch) %{ 9499 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9500 match(Set dst (AbsVD src)); 9501 match(Set dst (NegVD src)); 9502 effect(TEMP scratch); 9503 format %{ "vabsnegd $dst,$src,[mask]\t# absneg packed4D" %} 9504 ins_encode %{ 9505 int opcode = this->as_Mach()->ideal_Opcode(); 9506 int vector_len = 1; 9507 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vector_len, $scratch$$Register); 9508 %} 9509 ins_pipe( pipe_slow ); 9510 %} 9511 9512 instruct vabsneg8D(vecZ dst, vecZ src, rRegI scratch) %{ 9513 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 9514 match(Set dst (AbsVD src)); 9515 match(Set dst (NegVD src)); 9516 effect(TEMP scratch); 9517 format %{ "vabsnegd $dst,$src,[mask]\t# absneg packed8D" %} 9518 ins_encode %{ 9519 int opcode = this->as_Mach()->ideal_Opcode(); 9520 int vector_len = 2; 9521 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vector_len, $scratch$$Register); 9522 %} 9523 ins_pipe( pipe_slow ); 9524 %} 9525 9526 instruct vabsneg2F(vecD dst, vecD src, rRegI scratch) %{ 9527 predicate(UseSSE > 0 && n->as_Vector()->length() == 2); 9528 match(Set dst (AbsVF src)); 9529 match(Set dst (NegVF src)); 9530 effect(TEMP scratch); 9531 format %{ "vabsnegf $dst,$src,[mask]\t# absneg packed2F" %} 9532 ins_encode %{ 9533 int opcode = this->as_Mach()->ideal_Opcode(); 9534 if ($dst$$XMMRegister != $src$$XMMRegister) 9535 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 9536 __ vabsnegf(opcode, $dst$$XMMRegister, $scratch$$Register); 9537 %} 9538 ins_pipe( pipe_slow ); 9539 %} 9540 9541 instruct vabsneg4F(vecX dst, rRegI scratch) %{ 9542 predicate(UseSSE > 0 && n->as_Vector()->length() == 4); 9543 match(Set dst (AbsVF dst)); 9544 match(Set dst (NegVF dst)); 9545 effect(TEMP scratch); 9546 format %{ "vabsnegf $dst,[mask]\t# absneg packed4F" %} 9547 ins_cost(150); 9548 ins_encode %{ 9549 int opcode = this->as_Mach()->ideal_Opcode(); 9550 __ vabsnegf(opcode, $dst$$XMMRegister, $scratch$$Register); 9551 %} 9552 ins_pipe( pipe_slow ); 9553 %} 9554 9555 instruct vabsneg8F(vecY dst, vecY src, rRegI scratch) %{ 9556 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 9557 match(Set dst (AbsVF src)); 9558 match(Set dst (NegVF src)); 9559 effect(TEMP scratch); 9560 format %{ "vabsnegf $dst,$src,[mask]\t# absneg packed8F" %} 9561 ins_cost(150); 9562 ins_encode %{ 9563 int opcode = this->as_Mach()->ideal_Opcode(); 9564 int vector_len = 1; 9565 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vector_len, $scratch$$Register); 9566 %} 9567 ins_pipe( pipe_slow ); 9568 %} 9569 9570 instruct vabsneg16F(vecZ dst, vecZ src, rRegI scratch) %{ 9571 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 9572 match(Set dst (AbsVF src)); 9573 match(Set dst (NegVF src)); 9574 effect(TEMP scratch); 9575 format %{ "vabsnegf $dst,$src,[mask]\t# absneg packed16F" %} 9576 ins_cost(150); 9577 ins_encode %{ 9578 int opcode = this->as_Mach()->ideal_Opcode(); 9579 int vector_len = 2; 9580 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vector_len, $scratch$$Register); 9581 %} 9582 ins_pipe( pipe_slow ); 9583 %} 9584 9585 // --------------------------------- FMA -------------------------------------- 9586 9587 // a * b + c 9588 instruct vfma2D_reg(vecX a, vecX b, vecX c) %{ 9589 predicate(UseFMA && n->as_Vector()->length() == 2); 9590 match(Set c (FmaVD c (Binary a b))); 9591 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed2D" %} 9592 ins_cost(150); 9593 ins_encode %{ 9594 int vector_len = 0; 9595 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 9596 %} 9597 ins_pipe( pipe_slow ); 9598 %} 9599 9600 // a * b + c 9601 instruct vfma2D_mem(vecX a, memory b, vecX c) %{ 9602 predicate(UseFMA && n->as_Vector()->length() == 2); 9603 match(Set c (FmaVD c (Binary a (LoadVector b)))); 9604 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed2D" %} 9605 ins_cost(150); 9606 ins_encode %{ 9607 int vector_len = 0; 9608 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 9609 %} 9610 ins_pipe( pipe_slow ); 9611 %} 9612 9613 9614 // a * b + c 9615 instruct vfma4D_reg(vecY a, vecY b, vecY c) %{ 9616 predicate(UseFMA && n->as_Vector()->length() == 4); 9617 match(Set c (FmaVD c (Binary a b))); 9618 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed4D" %} 9619 ins_cost(150); 9620 ins_encode %{ 9621 int vector_len = 1; 9622 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 9623 %} 9624 ins_pipe( pipe_slow ); 9625 %} 9626 9627 // a * b + c 9628 instruct vfma4D_mem(vecY a, memory b, vecY c) %{ 9629 predicate(UseFMA && n->as_Vector()->length() == 4); 9630 match(Set c (FmaVD c (Binary a (LoadVector b)))); 9631 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed4D" %} 9632 ins_cost(150); 9633 ins_encode %{ 9634 int vector_len = 1; 9635 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 9636 %} 9637 ins_pipe( pipe_slow ); 9638 %} 9639 9640 // a * b + c 9641 instruct vfma8D_reg(vecZ a, vecZ b, vecZ c) %{ 9642 predicate(UseFMA && n->as_Vector()->length() == 8); 9643 match(Set c (FmaVD c (Binary a b))); 9644 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed8D" %} 9645 ins_cost(150); 9646 ins_encode %{ 9647 int vector_len = 2; 9648 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 9649 %} 9650 ins_pipe( pipe_slow ); 9651 %} 9652 9653 // a * b + c 9654 instruct vfma8D_mem(vecZ a, memory b, vecZ c) %{ 9655 predicate(UseFMA && n->as_Vector()->length() == 8); 9656 match(Set c (FmaVD c (Binary a (LoadVector b)))); 9657 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed8D" %} 9658 ins_cost(150); 9659 ins_encode %{ 9660 int vector_len = 2; 9661 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 9662 %} 9663 ins_pipe( pipe_slow ); 9664 %} 9665 9666 // a * b + c 9667 instruct vfma4F_reg(vecX a, vecX b, vecX c) %{ 9668 predicate(UseFMA && n->as_Vector()->length() == 4); 9669 match(Set c (FmaVF c (Binary a b))); 9670 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed4F" %} 9671 ins_cost(150); 9672 ins_encode %{ 9673 int vector_len = 0; 9674 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 9675 %} 9676 ins_pipe( pipe_slow ); 9677 %} 9678 9679 // a * b + c 9680 instruct vfma4F_mem(vecX a, memory b, vecX c) %{ 9681 predicate(UseFMA && n->as_Vector()->length() == 4); 9682 match(Set c (FmaVF c (Binary a (LoadVector b)))); 9683 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed4F" %} 9684 ins_cost(150); 9685 ins_encode %{ 9686 int vector_len = 0; 9687 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 9688 %} 9689 ins_pipe( pipe_slow ); 9690 %} 9691 9692 // a * b + c 9693 instruct vfma8F_reg(vecY a, vecY b, vecY c) %{ 9694 predicate(UseFMA && n->as_Vector()->length() == 8); 9695 match(Set c (FmaVF c (Binary a b))); 9696 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed8F" %} 9697 ins_cost(150); 9698 ins_encode %{ 9699 int vector_len = 1; 9700 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 9701 %} 9702 ins_pipe( pipe_slow ); 9703 %} 9704 9705 // a * b + c 9706 instruct vfma8F_mem(vecY a, memory b, vecY c) %{ 9707 predicate(UseFMA && n->as_Vector()->length() == 8); 9708 match(Set c (FmaVF c (Binary a (LoadVector b)))); 9709 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed8F" %} 9710 ins_cost(150); 9711 ins_encode %{ 9712 int vector_len = 1; 9713 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 9714 %} 9715 ins_pipe( pipe_slow ); 9716 %} 9717 9718 // a * b + c 9719 instruct vfma16F_reg(vecZ a, vecZ b, vecZ c) %{ 9720 predicate(UseFMA && n->as_Vector()->length() == 16); 9721 match(Set c (FmaVF c (Binary a b))); 9722 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed16F" %} 9723 ins_cost(150); 9724 ins_encode %{ 9725 int vector_len = 2; 9726 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 9727 %} 9728 ins_pipe( pipe_slow ); 9729 %} 9730 9731 // a * b + c 9732 instruct vfma16F_mem(vecZ a, memory b, vecZ c) %{ 9733 predicate(UseFMA && n->as_Vector()->length() == 16); 9734 match(Set c (FmaVF c (Binary a (LoadVector b)))); 9735 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed16F" %} 9736 ins_cost(150); 9737 ins_encode %{ 9738 int vector_len = 2; 9739 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 9740 %} 9741 ins_pipe( pipe_slow ); 9742 %} 9743 9744 // --------------------------------- Vector Multiply Add -------------------------------------- 9745 9746 instruct smuladd4S2I_reg(vecD dst, vecD src1) %{ 9747 predicate(UseSSE >= 2 && UseAVX == 0 && n->as_Vector()->length() == 2); 9748 match(Set dst (MulAddVS2VI dst src1)); 9749 format %{ "pmaddwd $dst,$dst,$src1\t! muladd packed4Sto2I" %} 9750 ins_encode %{ 9751 __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister); 9752 %} 9753 ins_pipe( pipe_slow ); 9754 %} 9755 9756 instruct vmuladd4S2I_reg(vecD dst, vecD src1, vecD src2) %{ 9757 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9758 match(Set dst (MulAddVS2VI src1 src2)); 9759 format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packed4Sto2I" %} 9760 ins_encode %{ 9761 int vector_len = 0; 9762 __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9763 %} 9764 ins_pipe( pipe_slow ); 9765 %} 9766 9767 instruct smuladd8S4I_reg(vecX dst, vecX src1) %{ 9768 predicate(UseSSE >= 2 && UseAVX == 0 && n->as_Vector()->length() == 4); 9769 match(Set dst (MulAddVS2VI dst src1)); 9770 format %{ "pmaddwd $dst,$dst,$src1\t! muladd packed8Sto4I" %} 9771 ins_encode %{ 9772 __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister); 9773 %} 9774 ins_pipe( pipe_slow ); 9775 %} 9776 9777 instruct vmuladd8S4I_reg(vecX dst, vecX src1, vecX src2) %{ 9778 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9779 match(Set dst (MulAddVS2VI src1 src2)); 9780 format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packed8Sto4I" %} 9781 ins_encode %{ 9782 int vector_len = 0; 9783 __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9784 %} 9785 ins_pipe( pipe_slow ); 9786 %} 9787 9788 instruct vmuladd16S8I_reg(vecY dst, vecY src1, vecY src2) %{ 9789 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 9790 match(Set dst (MulAddVS2VI src1 src2)); 9791 format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packed16Sto8I" %} 9792 ins_encode %{ 9793 int vector_len = 1; 9794 __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9795 %} 9796 ins_pipe( pipe_slow ); 9797 %} 9798 9799 instruct vmuladd32S16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 9800 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 9801 match(Set dst (MulAddVS2VI src1 src2)); 9802 format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packed32Sto16I" %} 9803 ins_encode %{ 9804 int vector_len = 2; 9805 __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9806 %} 9807 ins_pipe( pipe_slow ); 9808 %} 9809 9810 // --------------------------------- Vector Multiply Add Add ---------------------------------- 9811 9812 instruct vmuladdadd4S2I_reg(vecD dst, vecD src1, vecD src2) %{ 9813 predicate(VM_Version::supports_vnni() && UseAVX > 2 && n->as_Vector()->length() == 2); 9814 match(Set dst (AddVI (MulAddVS2VI src1 src2) dst)); 9815 format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packed4Sto2I" %} 9816 ins_encode %{ 9817 int vector_len = 0; 9818 __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9819 %} 9820 ins_pipe( pipe_slow ); 9821 ins_cost(10); 9822 %} 9823 9824 instruct vmuladdadd8S4I_reg(vecX dst, vecX src1, vecX src2) %{ 9825 predicate(VM_Version::supports_vnni() && UseAVX > 2 && n->as_Vector()->length() == 4); 9826 match(Set dst (AddVI (MulAddVS2VI src1 src2) dst)); 9827 format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packed8Sto4I" %} 9828 ins_encode %{ 9829 int vector_len = 0; 9830 __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9831 %} 9832 ins_pipe( pipe_slow ); 9833 ins_cost(10); 9834 %} 9835 9836 instruct vmuladdadd16S8I_reg(vecY dst, vecY src1, vecY src2) %{ 9837 predicate(VM_Version::supports_vnni() && UseAVX > 2 && n->as_Vector()->length() == 8); 9838 match(Set dst (AddVI (MulAddVS2VI src1 src2) dst)); 9839 format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packed16Sto8I" %} 9840 ins_encode %{ 9841 int vector_len = 1; 9842 __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9843 %} 9844 ins_pipe( pipe_slow ); 9845 ins_cost(10); 9846 %} 9847 9848 instruct vmuladdadd32S16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 9849 predicate(VM_Version::supports_vnni() && UseAVX > 2 && n->as_Vector()->length() == 16); 9850 match(Set dst (AddVI (MulAddVS2VI src1 src2) dst)); 9851 format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packed32Sto16I" %} 9852 ins_encode %{ 9853 int vector_len = 2; 9854 __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9855 %} 9856 ins_pipe( pipe_slow ); 9857 ins_cost(10); 9858 %} 9859 9860 // --------------------------------- PopCount -------------------------------------- 9861 9862 instruct vpopcount2I(vecD dst, vecD src) %{ 9863 predicate(VM_Version::supports_vpopcntdq() && UsePopCountInstruction && n->as_Vector()->length() == 2); 9864 match(Set dst (PopCountVI src)); 9865 format %{ "vpopcntd $dst,$src\t! vector popcount packed2I" %} 9866 ins_encode %{ 9867 int vector_len = 0; 9868 __ vpopcntd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 9869 %} 9870 ins_pipe( pipe_slow ); 9871 %} 9872 9873 instruct vpopcount4I(vecX dst, vecX src) %{ 9874 predicate(VM_Version::supports_vpopcntdq() && UsePopCountInstruction && n->as_Vector()->length() == 4); 9875 match(Set dst (PopCountVI src)); 9876 format %{ "vpopcntd $dst,$src\t! vector popcount packed4I" %} 9877 ins_encode %{ 9878 int vector_len = 0; 9879 __ vpopcntd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 9880 %} 9881 ins_pipe( pipe_slow ); 9882 %} 9883 9884 instruct vpopcount8I(vecY dst, vecY src) %{ 9885 predicate(VM_Version::supports_vpopcntdq() && UsePopCountInstruction && n->as_Vector()->length() == 8); 9886 match(Set dst (PopCountVI src)); 9887 format %{ "vpopcntd $dst,$src\t! vector popcount packed8I" %} 9888 ins_encode %{ 9889 int vector_len = 1; 9890 __ vpopcntd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 9891 %} 9892 ins_pipe( pipe_slow ); 9893 %} 9894 9895 instruct vpopcount16I(vecZ dst, vecZ src) %{ 9896 predicate(VM_Version::supports_vpopcntdq() && UsePopCountInstruction && n->as_Vector()->length() == 16); 9897 match(Set dst (PopCountVI src)); 9898 format %{ "vpopcntd $dst,$src\t! vector popcount packed16I" %} 9899 ins_encode %{ 9900 int vector_len = 2; 9901 __ vpopcntd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 9902 %} 9903 ins_pipe( pipe_slow ); 9904 %}